`
smallearth
  • 浏览: 34156 次
  • 性别: Icon_minigender_1
社区版块
存档分类
最新评论

Lucene小练十五(过滤器)

 
阅读更多
package Java.se.lucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Test_Index {

	private Analyzer analyzer=new IKAnalyzer();
	private static Directory directory=null;
	static{
		try {
			directory=FSDirectory.open(new File("f:/lucene/Index07"));
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public static Directory getDirectory()
	{
		return directory;
	}
	public void index(boolean hasNew)
	{
		IndexWriter writer=null;
		try {
			  writer=new IndexWriter(directory,new IndexWriterConfig
					(Version.LUCENE_36, analyzer));
			if(hasNew)
			{
				writer.deleteAll();
			}
			File file=new File("F:/lucene/lucenes");
			Document doc=null;
			for(File f:file.listFiles())
			{
				doc=new Document();
				doc.add(new Field("content",new FileReader(f)));//添加内容
				doc.add(new Field("filename",f.getName(),Field.Store.YES,
						Field.Index.NOT_ANALYZED));//添加Name
				doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,
						Field.Index.NOT_ANALYZED));
				doc.add(new NumericField("date",Field.Store.YES, true).setLongValue
						(f.lastModified()));
				doc.add(new NumericField("size",Field.Store.YES,true).setIntValue
					((int)f.length()));
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}finally
		{
			if(writer!=null)
			{
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}
	
}
package Java.se.lucene;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

public class Test_Search {

	static IndexReader reader=null;
	static{
		try {
			reader=IndexReader.open(Test_Index.getDirectory());
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public IndexSearcher getSearcher()
	{
		 try {
			reader=IndexReader.open(Test_Index.getDirectory());
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		 return new IndexSearcher(reader);
	}
	//返回Searcher
	public void Searcher(String keyword,Sort sort)
	{
		 try {
			 IndexSearcher searcher=getSearcher();
			 //使文件评分显示出来
			 searcher.setDefaultFieldSortScoring(true, false);
			 //在搜索器中使用IKSimilarity相似度评估器
			 searcher.setSimilarity(new IKSimilarity());
			 //创建IK。。 Query
			 Query query =IKQueryParser.parse("content",keyword);
			 TopDocs topDocs=null;
			 if(sort!=null)
			 {
				 topDocs=searcher.search(query, 50, sort); 
			 }else {
				 topDocs=searcher.search(query, 50); 
			}
			 //设置时间格式
			 SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			 for(ScoreDoc sds:topDocs.scoreDocs)
			 {
				 Document document=searcher.doc(sds.doc);
				 System.out.println(sds.doc+"-->"+document.get("filename")
						 +"【"+document.get("path")+"】"+"["+document.get("size")
						 +"]"+"("+sds.score+")"+"-->"+sdf.format
						 (new Date(Long.valueOf(document.get("date")))));
			 }
			 searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	//过滤
	public void Searcher(String keyword,Filter filter)
	{
		 try {
			 IndexSearcher searcher=getSearcher();
			 //使文件评分显示出来
			 searcher.setDefaultFieldSortScoring(true, false);
			 //在搜索器中使用IKSimilarity相似度评估器
			 searcher.setSimilarity(new IKSimilarity());
			 //创建IK。。 Query
			 Query query =IKQueryParser.parse("content",keyword);
			 TopDocs topDocs=null;
			 if(filter!=null)
			 {
				 topDocs=searcher.search(query,filter,50); 
			 }else {
				 topDocs=searcher.search(query, 50); 
			}
			 //设置时间格式
			 SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			 for(ScoreDoc sds:topDocs.scoreDocs)
			 {
				 Document document=searcher.doc(sds.doc);
				 System.out.println(sds.doc+"-->"+document.get("filename")
						 +"【"+document.get("path")+"】"+"["+document.get("size")
						 +"]"+"("+sds.score+")"+"-->"+sdf.format
						 (new Date(Long.valueOf(document.get("date")))));
			 }
			 searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}
package Java.se.lucene;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.WildcardQuery;
import org.junit.Before;
import org.junit.Test;

public class Test_All {
	Test_Index index=null;
	Test_Search search =null;
	@Before
	public void init()
	{
		 index=new Test_Index();
		 search =new Test_Search();
	}
	
	@Test
	public void test_index()
	{
		index.index(true);
	}
	@Test
	public void test_search01()
	{
//		search.Searcher("java", null);
//		//按照默认评分排序
//		search.Searcher("java", new Sort().RELEVANCE);
//		//通过文件ID排序
//		search.Searcher("java", new Sort().INDEXORDER);
//		//通过文件大小排序
//		search.Searcher("java", new Sort(new SortField("size",SortField.INT)));
		//通过文件日期排序
//		search.Searcher("java", new Sort(new SortField("date",SortField.LONG)));
		//通过文件名称排序,第三个参数设置排序方式(true为降序)
//		search.Searcher("java", new Sort(new SortField("filename",SortField.STRING
//			,true)));
		//多条件排序
		search.Searcher("java",new Sort(new SortField("filename",SortField.STRING
				),SortField.FIELD_SCORE));
	}
	
	@Test
	public void test_search02()
	{
		Filter trf=new TermRangeFilter("filename", "Tomcat环境变量配置.kk", 
				"Tomcat环境变量配置.txt.kk", true,true);
		trf=NumericRangeFilter.newIntRange("size", 2, 100000, 
				true, true);
		//通过一个query进行过滤
		trf=new QueryWrapperFilter(new WildcardQuery(new Term("filename","*.kk"))); 
		search.Searcher("java", trf);
	}

}


分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics