`
smallearth
  • 浏览: 34554 次
  • 性别: Icon_minigender_1
社区版块
存档分类
最新评论

Lucene小练八(实现了索引和搜索)

 
阅读更多
//主类

package Java.se.lucene;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;


public class index {
	private String[] ids={"1","2","3","4","5","6"};
	private String[] emails={"aa@aa.com","bb@bb.com",
			"cc@cc.com","dd@dd.com","ee@ee.com","ff@ff.com"};
	private String[] contents={"i like  gdsfgfds","i like fsdfs","i like fdsfsd",
			"i like fdsfsd","i like like fdfs","i like like like fsefsdfg"};
	private int[] attachs={1,2,3,4,5,6};
	private String[] names={"liwu","zhangsan","xiaoqinag","laona",
			"dabao","lisi"};
	private Directory directory=null;
	private IndexWriter writer=null;
	private Date[] dates=null;
	private Map<String,Float> scores=new HashMap<String,Float>();
	private static IndexReader reader=null;
	
	public index()
	{
		setDate();//创建日期
		try {
			scores.put("aa.com", 2.0f);
			scores.put("bb.com", 1.0f);
			scores.put("cc.com", 3.0f);
			scores.put("dd.com", 4.0f);
			scores.put("ee.com", 5.0f);
			scores.put("ff.com", 6.0f);
		  // directory=new RAMDirectory();//从内存打开Directory
		    Index();
		//从硬盘打开Directory
		    directory=FSDirectory.open(new File("f:/lucene/Index04"));
		//	reader=IndexReader.open(directory);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	//建立索引
	public void Index()
	{
		Document document=null;
         try {
			writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
				new StandardAnalyzer(Version.LUCENE_36)));
			writer.deleteAll();//更新索引
			for(int i=0;i<ids.length;i++)
			{
				document=new Document();
				document.add(new Field("id", ids[i], 
						Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));
				document.add(new Field("email",emails[i],
						Field.Store.YES,Field.Index.NOT_ANALYZED));
				document.add(new Field("content", contents[i], 
						Field.Store.YES, Field.Index.ANALYZED));
				document.add(new Field("name",names[i],
						Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
		    	//为数字添加索引
				document.add(new NumericField("attach", Field.Store.YES,true).
						setIntValue(attachs[i]));
				//为日期添加索引
				document.add(new NumericField("date", Field.Store.YES,true)
				.setLongValue(dates[i].getTime()));//记住要getTime
						
				
		    	String str=emails[i].substring(emails[i].lastIndexOf("@")+1);
		    	System.out.println(str);
    		    if(scores.containsKey(str))
		    	{
		    		document.setBoost(scores.get(str));
		    	}else{
	    		document.setBoost(0.5f);
		    	}
		    	writer.addDocument(document);
		    	writer.commit();//提交writer
			}
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		finally{
            try {
				writer.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}
    //遍历各种视频 
	public void query()
	{
		try {
			IndexReader reader=IndexReader.open(directory);
			System.out.println("numdocs:"+reader.numDocs());//文档总数
			System.out.println("maxDocs:"+reader.maxDoc());//可存储文章做大数目
			System.out.println("detelemaxDocs:"+reader.numDeletedDocs());
			reader.close();
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	 //用writer删除索引,但并没有完全删除,可以恢复的
	public void delete01()
	{
		try {
			writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
					new StandardAnalyzer(Version.LUCENE_36)));
			writer.deleteDocuments(new Term("id","1"));
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			try {
				writer.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}
	//用reader来删除

	//使用reader进行恢复
	@SuppressWarnings("deprecation")
	public void undelete()
	{
		IndexReader reader = null;
			try {
				reader = IndexReader.open(directory,false);
				reader.undeleteAll();
				reader.close();
			} catch (StaleReaderException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
	}
	//清空回收站
	public void forceDelete()
	{
		try {
			writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
					new StandardAnalyzer(Version.LUCENE_36)));
			writer.forceMergeDeletes();
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			try {
				writer.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}
    //已经停用
    public void forceMerge()
    {
    	try {
			writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
					new StandardAnalyzer(Version.LUCENE_36)));
			writer.forceMerge(3);
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			try {
				writer.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
    }
    //更新索引
    public void update()
    {
    	Document document=null;
        try {
			writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
				new StandardAnalyzer(Version.LUCENE_36)));
			/*
			 * lucene没有提供更新,只能先删除再添加
			 * 
			 */
			for(int i=0;i<ids.length;i++)
			{
				document=new Document();
				document.add(new Field("id", "11", 
						Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));
				document.add(new Field("email",emails[0],
						Field.Store.YES,Field.Index.ANALYZED));
				document.add(new Field("content", contents[0], 
						Field.Store.NO, Field.Index.NOT_ANALYZED));
				document.add(new Field("name",names[0],
						Field.Store.YES,Field.Index.NOT_ANALYZED));
		    	writer.updateDocument(new Term("id","1"), document);
			}
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
           try {
				writer.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
    }
    //搜索 
    public void search01() 
    {
    	IndexReader reader=null;
		try { 	
			reader = IndexReader.open(directory);
			IndexSearcher searcher=new IndexSearcher(reader);
			TermQuery query=new TermQuery(new Term("content","like"));
			TopDocs tds =searcher.search(query, 10);
			for(ScoreDoc sdc:tds.scoreDocs)
			{
				Document document=searcher.doc(sdc.doc);
				System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+
						"]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get("date"));
			}
			reader.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
    }
    
    public void search02() 
    {
    	//IndexReader reader=null;
		try { 	
			//reader = IndexReader.open(directory);
		    IndexSearcher searcher=getSearcher();
			TermQuery query=new TermQuery(new Term("content","like"));
			TopDocs tds =searcher.search(query, 10);
			for(ScoreDoc sdc:tds.scoreDocs)
			{
				Document document=searcher.doc(sdc.doc);
				System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+
						"]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get
						("date"));
			}
	        searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
    }
    //创建日期
    public void setDate()
    {
    	SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk");
    	try {
    		dates=new Date[ids.length];
        	dates[0]=sdf.parse("2010-08-17");
        	dates[1]=sdf.parse("2011-02-17");
        	dates[2]=sdf.parse("2012-03-17");
        	dates[3]=sdf.parse("2011-04-17");
        	dates[4]=sdf.parse("2012-05-17");
        	dates[5]=sdf.parse("2011-07-17");
		} catch (Exception e) {
			e.printStackTrace();
			// TODO: handle exception
		}
    } 
   //创建Searcher
    public IndexSearcher getSearcher()
    {
    	try {
			reader=IndexReader.open(directory);
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

    	return(new IndexSearcher(reader));
     }
	
    
 }
//测试类

package Java.se.lucene;


import org.junit.Test;

public class Test_Index {
	@Test
	public void test_index() //测试索引
	{
		index ind=new index();
		ind.Index();
	}
	@Test
	public void test_query() //遍历
	{
		index ind=new index();
		ind.query();
	}
	@Test
	public void test_delete01() //删除索引
	{
		index ind=new index();
		ind.delete01();
	}

	@Test
	public void test_undelete() //恢复删除
	{
		index ind=new index();
		ind.undelete();
	}
	@Test
	public void test_forceDelete() //清空回收站站
	{
		index ind=new index();
		ind.forceDelete();
	}
	@Test
	public void test_forceMerge() //清空回收站站
	{
		index ind=new index();
		ind.forceMerge();
	}
	@Test
	public void test_update() //更新索引
	{
		index ind=new index();
		ind.update();
	}
	@Test
	public void test_search01() //更新索引
	{
		index ind=new index();
		ind.search01();
	}
	@Test
	public void test_search02() //更新索引
	{
		index ind=new index();
		for(int i=0;i<5;i++)
		{
			 ind.search02();
			 System.out.println("------------------------");
			  try {
				Thread.sleep(5000);	
			} catch (InterruptedException e) {
				e.printStackTrace();
			}	 
		}
	}
		

/*	public void check() throws IOException{						//检查索引是否被正确建立(打印索引)
		Directory directory = FSDirectory.open(new File("f:/lucene/Index04/"));//创建directory,其储存方式为在
		IndexReader reader = IndexReader.open(directory);
		for(int i = 0;i<reader.numDocs();i++){
			System.out.println(reader.document(i));
		}
		reader.close();
	}
		public static void main(String[] args) throws IOException {
			new index().check();
		}*/
}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics