`
smallearth
  • 浏览: 34558 次
  • 性别: Icon_minigender_1
社区版块
存档分类
最新评论

Lucene小练四——为数字和日期添加索引

阅读更多
//主程序
package org.se.lucene;


import java.io.File;
import java.io.IOException;
//import java.sql.Date;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class lucene_index {
		private String[] ids={"1","2","3","4","5","6"};
		private String[] emails={"welcometotyu","hellowboy",
				"higirl","howareyou","googluck","badgosh"};
		private String[] contents={"I like 1","I like 2","I like 3","I like 4" +
				"I like 5"};
		private int[] attachs={1,2,3,4,5,6};
		private String[] names={"liwu","zhangsan","xiaoqinag","laona",
				"dabao","lisi"};
		private Date[] dates=null;
		private Directory directory=null;
		private Map<String,Float> scores=new HashMap<String, Float>();
	
		
		public void index()
		{
			   IndexWriter writer=null;
			   Document doc=null;
			   try {
				writer =new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_36, 
						   new StandardAnalyzer(Version.LUCENE_36)));
				//writer.deleteAll();
				for(int i=0;i<ids.length;i++)
				{
					doc=new Document();
			    	doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			    	doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
			    	doc.add(new Field("contents",contents[i],Field.Store.YES,Field.Index.ANALYZED));
			    	doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			    	//存储数字
			    	doc.add(new NumericField("attachs",Field.Store.YES,true).setIntValue(attachs[i]));
			    	//存储日期
			    	doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
			    	String et=emails[i].substring(emails[i].lastIndexOf("@")+1);
			    	System.out.println(et);
			    	if(scores.containsKey(et))
			    	{
			    		doc.setBoost(scores.get(et));
			    	}
			    	else {
			    		doc.setBoost(0.5f);
					}
			    	
			    	writer.addDocument(doc); 
				}
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			   finally{
				   if(writer!=null)
				   {
					  try {
						writer.close();
					} catch (CorruptIndexException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}   
				   }
			   }
		}
        public void setDate()
        {
        	SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk");
        	try {
        		dates=new Date[ids.length];
            	dates[0]=sdf.parse("2010-08-17");
            	dates[1]=sdf.parse("2011-02-17");
            	dates[2]=sdf.parse("2012-03-17");
            	dates[3]=sdf.parse("2011-04-17");
            	dates[4]=sdf.parse("2012-05-17");
            	dates[5]=sdf.parse("2011-07-17");
			} catch (Exception e) {
				e.printStackTrace();
				// TODO: handle exception
			}
        } 
		public lucene_index()
		{
			setDate();
			try {
				directory=FSDirectory.open(new File("f:/lucene/index02"));
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		public void quary()
		{
			try {
				IndexReader reader=IndexReader.open(directory);
				System.out.println("numdocs"+reader.numDocs());
				System.out.println("maxDocs"+reader.maxDoc());
				System.out.println("detelemaxDocs"+reader.numDeletedDocs());
				reader.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block 
				e.printStackTrace();
			}
			
		}
		
		@SuppressWarnings("deprecation")
		public void undelete()
		{
			try {
				//回复时必须把reader的只读设为false
				IndexReader reader=IndexReader.open(directory,false);
				reader.undeleteAll();
				reader.close();
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
	
		//清空回收站,强制优化
		public void forceDelete()
		{
			IndexWriter writer=null;
			try {
				writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
						new StandardAnalyzer(Version.LUCENE_36)));
				//参数十一个选项,可以是一个query,也可以是一个term  term就是一个精确查找的值
				//此时删除的文档并未完全删除,而是存储在回收站中,可以恢复的
				writer.forceMergeDeletes();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				e.printStackTrace();
			} catch (IOException e) {
			    e.printStackTrace();
			}
			finally{
				if (writer!=null) {
					try {
						writer.close();
					} catch (CorruptIndexException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			}
		}
		
		public void merge()
		{
			IndexWriter writer=null;
			try {
				writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
						new StandardAnalyzer(Version.LUCENE_36)));
				
				writer.forceMerge(2);
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				e.printStackTrace();
			} catch (IOException e) {
			    e.printStackTrace();
			}
			finally{
				if (writer!=null) {
					try {
						writer.close();
					} catch (CorruptIndexException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			}
		}
		
		public void delete()
		{
			IndexWriter writer=null;
			try {
				writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
						new StandardAnalyzer(Version.LUCENE_36)));
				//参数十一个选项,可以是一个query,也可以是一个term  term就是一个精确查找的值
				//此时删除的文档并未完全删除,而是存储在回收站中,可以恢复的
				writer.deleteDocuments(new Term("id","1"));
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				e.printStackTrace();
			} catch (IOException e) {
			    e.printStackTrace();
			}
			finally{
				if (writer!=null) {
					try {
						writer.close();
					} catch (CorruptIndexException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			}
		}
	
       //更新索引
	     public void update()
		{
			/*lucene本身不支持更新
			 * 
			 * 通过删除索引然后再建立索引来更新
			 * 
			 */
		       IndexWriter writer=null;
			   Document doc=null;
			   try {
				writer =new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_36, 
						   new StandardAnalyzer(Version.LUCENE_36)));
				writer.deleteAll();
				for(int i=0;i<ids.length;i++)
				{
					doc=new Document();
			    	doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			    	doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
			    	doc.add(new Field("contents",contents[i],Field.Store.YES,Field.Index.ANALYZED));
			    	doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			    	writer.updateDocument(new Term("id","1"), doc); 
				}
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (LockObtainFailedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			   finally{
				   if(writer!=null)
				   {
					  try {
						writer.close();
					} catch (CorruptIndexException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					} catch (IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}   
				   }
			   }	
		}

         public void serch()
         {
        	 try {
				IndexReader reader=IndexReader.open(directory);
				IndexSearcher searcher=new IndexSearcher(reader);
				TermQuery query=new TermQuery(new Term("contents","like"));
				TopDocs tds=searcher.search(query, 10);
				
				for(ScoreDoc sd:tds.scoreDocs)
				{
					Document doc=searcher.doc(sd.doc);
					System.out.println("("+sd.doc+"-"+doc.getBoost()+"-"+sd.score+")"+doc.get("name")+"["+doc.get("email")+"]-->"
							+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));
				}
			} catch (CorruptIndexException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch blocket
				e.printStackTrace();
			}
         }

}
//测试类
package org.se.lucene;

import org.junit.Test;

public class test {

	@Test
	public void testIndex()
	{
		lucene_index l_index=new lucene_index();
		l_index.index();
    }
	@Test
	
	public void testquary()
	{
		lucene_index l_index=new lucene_index();
		l_index.quary();
	}
	@Test
	public void testDelete()
	{
		lucene_index l_index=new lucene_index();
		l_index.delete();
	}
	@Test
	public void testunDelete()
	{
		lucene_index l_index=new lucene_index();
		l_index.undelete();
	}
	@Test
	public void testForceDelete()
	{
		lucene_index l_index=new lucene_index();
		l_index.forceDelete();
	}
	@Test
	public void testmerge()
	{
		lucene_index l_index=new lucene_index();
		l_index.merge();
	}
	
	@Test
	public void upDate()
	{
		lucene_index l_index=new lucene_index();
		l_index.update();
	}
	
	@Test
	public void testSercher()
	{
		lucene_index l_index=new lucene_index();
		l_index.serch();
	}
}


分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics