package Java.se.lucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Test_Index {
private Analyzer analyzer=new IKAnalyzer();
private static Directory directory=null;
static{
try {
directory=FSDirectory.open(new File("f:/lucene/Index07"));
} catch (IOException e) {
e.printStackTrace();
}
}
public static Directory getDirectory()
{
return directory;
}
public void index(boolean hasNew)
{
IndexWriter writer=null;
try {
writer=new IndexWriter(directory,new IndexWriterConfig
(Version.LUCENE_36, analyzer));
if(hasNew)
{
writer.deleteAll();
}
File file=new File("F:/lucene/lucenes");
Document doc=null;
for(File f:file.listFiles())
{
doc=new Document();
doc.add(new Field("content",new FileReader(f)));//添加内容
doc.add(new Field("filename",f.getName(),Field.Store.YES,
Field.Index.NOT_ANALYZED));//添加Name
doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new NumericField("date",Field.Store.YES, true).setLongValue
(f.lastModified()));
doc.add(new NumericField("size",Field.Store.YES,true).setIntValue
((int)f.length()));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally
{
if(writer!=null)
{
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
package Java.se.lucene;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
public class Test_Search {
static IndexReader reader=null;
static{
try {
reader=IndexReader.open(Test_Index.getDirectory());
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSearcher()
{
try {
reader=IndexReader.open(Test_Index.getDirectory());
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return new IndexSearcher(reader);
}
//返回Searcher
public void Searcher(String keyword,Sort sort)
{
try {
IndexSearcher searcher=getSearcher();
//使文件评分显示出来
searcher.setDefaultFieldSortScoring(true, false);
//在搜索器中使用IKSimilarity相似度评估器
searcher.setSimilarity(new IKSimilarity());
//创建IK。。 Query
Query query =IKQueryParser.parse("content",keyword);
TopDocs topDocs=null;
if(sort!=null)
{
topDocs=searcher.search(query, 50, sort);
}else {
topDocs=searcher.search(query, 50);
}
//设置时间格式
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for(ScoreDoc sds:topDocs.scoreDocs)
{
Document document=searcher.doc(sds.doc);
System.out.println(sds.doc+"-->"+document.get("filename")
+"【"+document.get("path")+"】"+"["+document.get("size")
+"]"+"("+sds.score+")"+"-->"+sdf.format
(new Date(Long.valueOf(document.get("date")))));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//过滤
public void Searcher(String keyword,Filter filter)
{
try {
IndexSearcher searcher=getSearcher();
//使文件评分显示出来
searcher.setDefaultFieldSortScoring(true, false);
//在搜索器中使用IKSimilarity相似度评估器
searcher.setSimilarity(new IKSimilarity());
//创建IK。。 Query
Query query =IKQueryParser.parse("content",keyword);
TopDocs topDocs=null;
if(filter!=null)
{
topDocs=searcher.search(query,filter,50);
}else {
topDocs=searcher.search(query, 50);
}
//设置时间格式
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for(ScoreDoc sds:topDocs.scoreDocs)
{
Document document=searcher.doc(sds.doc);
System.out.println(sds.doc+"-->"+document.get("filename")
+"【"+document.get("path")+"】"+"["+document.get("size")
+"]"+"("+sds.score+")"+"-->"+sdf.format
(new Date(Long.valueOf(document.get("date")))));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
package Java.se.lucene;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.WildcardQuery;
import org.junit.Before;
import org.junit.Test;
public class Test_All {
Test_Index index=null;
Test_Search search =null;
@Before
public void init()
{
index=new Test_Index();
search =new Test_Search();
}
@Test
public void test_index()
{
index.index(true);
}
@Test
public void test_search01()
{
// search.Searcher("java", null);
// //按照默认评分排序
// search.Searcher("java", new Sort().RELEVANCE);
// //通过文件ID排序
// search.Searcher("java", new Sort().INDEXORDER);
// //通过文件大小排序
// search.Searcher("java", new Sort(new SortField("size",SortField.INT)));
//通过文件日期排序
// search.Searcher("java", new Sort(new SortField("date",SortField.LONG)));
//通过文件名称排序,第三个参数设置排序方式(true为降序)
// search.Searcher("java", new Sort(new SortField("filename",SortField.STRING
// ,true)));
//多条件排序
search.Searcher("java",new Sort(new SortField("filename",SortField.STRING
),SortField.FIELD_SCORE));
}
@Test
public void test_search02()
{
Filter trf=new TermRangeFilter("filename", "Tomcat环境变量配置.kk",
"Tomcat环境变量配置.txt.kk", true,true);
trf=NumericRangeFilter.newIntRange("size", 2, 100000,
true, true);
//通过一个query进行过滤
trf=new QueryWrapperFilter(new WildcardQuery(new Term("filename","*.kk")));
search.Searcher("java", trf);
}
}
分享到:
相关推荐
NULL 博文链接:https://iamyida.iteye.com/blog/2199368
Lucene索引器实例Lucene索引器实例Lucene索引器实例Lucene索引器实例
lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例
lucene入门小例子
本课程由浅入深的介绍了Lucene4的发展历史,开发环境搭建,分析lucene4的中文分词原理,深入讲了lucenne4的系统架构,分析lucene4索引实现原理及性能优化,了解关于lucene4的搜索算法优化及利用java结合lucene4实现...
为了对文档进行索引,Lucene 提供了五个基础的类 public class IndexWriter org.apache.lucene.index.IndexWriter public abstract class Directory org.apache.lucene.store.Directory public abstract class ...
lucene的排序过滤和分页,lucene开发技术,lucene下载
lucene3.0 lucene3.0 lucene3.0 lucene3.0 lucene3.0
lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习lucene学习...
Lucene中文分词器组件,不错的。
lucene 小资源,只是接触lucene的经验积累过程,从中学习一些有意义的经验。
基于Lucene小型搜索引擎的研究与实现
lucene3.0 中文分词器, 庖丁解牛
lucene 索引库查看器 5.3.0 版本,欢迎大家一起学习交流。
关于搜索的一个小案例,使用的是lucene,框架为ibatis,spring,stuts2
lucene相关十几个包。
Lucene3.0特性Lucene3.0特性
lucene相关jar+查询分析器jar,具体使用可参见资源,类似于map查询吧
来自“猎图网 www.richmap.cn”基于IKAnalyzer分词算法的准商业化Lucene中文分词器。 1. 正向全切分算法,42万汉字字符/每秒的处理能力(IBM ThinkPad 酷睿I 1.6G 1G内存 WinXP) 2. 对数量词、地名、路名的...
lucene