lucene- HelloWord

2012-07-08

lucene-- HelloWord? 资料：lucene实战，Lucene in Action 。lucene父亲著。??? ?Lucene+3.0+原理与代码分析完

lucene-- HelloWord

? 资料：lucene实战，Lucene in Action 。lucene父亲著。

??? ?Lucene+3.0+原理与代码分析完整版.pdf ?http://dl.dbank.com/c0b9qk1dth 理论部分，很容易理解。

? 照抄lucene in action的代码。

package cn.my.a;import java.io.File;import java.io.FileFilter;import java.io.FileReader;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class Indexer {public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {String indexDir = "E:/devlop_lucene/index";//生成索引目录String dataDir  = "E:/devlop_lucene/doc";//源文件目录 long start = System.currentTimeMillis();Indexer indexer = new Indexer(indexDir);int numIndexed = 0;try {numIndexed = indexer.index(dataDir, new TextFilesFilter());} catch (Exception e) {e.printStackTrace();}finally{indexer.close();}long end = System.currentTimeMillis();System.out.println("indexing " + numIndexed +"  time:"+(end-start));}private IndexWriter writer;public Indexer(String indexDir) throws IOException {Directory dir = FSDirectory.open(new File(indexDir));writer = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_36),true,IndexWriter.MaxFieldLength.UNLIMITED);}public void close() throws CorruptIndexException, IOException{writer.close();}public int index(String dataDir,FileFilter filter) throws IOException{File[] files = new File(dataDir).listFiles();for(File f : files){if(!f.isDirectory()&&!f.isHidden()&&f.exists()&&f.canRead()&&(filter==null||filter.accept(f))){indexFile(f);}}return writer.numDocs();}private void indexFile(File f) throws CorruptIndexException, IOException {System.out.println("Indexing  "+f.getCanonicalPath());Document doc = getDocument(f);writer.addDocument(doc);}private Document getDocument(File f) throws IOException { Document doc = new Document(); doc.add(new Field("contents",new FileReader(f))); doc.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("fullpath",f.getCanonicalPath(),Field.Store.YES,Field.Index.NOT_ANALYZED)); return doc;}}class TextFilesFilter implements  FileFilter{@Overridepublic boolean accept(File pathname) {return pathname.getName().toLowerCase().endsWith(".txt");}}

?package cn.my.a;

import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class Searcher {public static void main(String[] args) throws ParseException {String indexDir = "E:/devlop_lucene/index";//索引目录String q  = "jsp";//要查询的关键词try {search(indexDir,q);} catch (IOException e) {e.printStackTrace();}}private static void search(String indexDir, String q) throws IOException, ParseException {Directory dir = FSDirectory.open(new File(indexDir));IndexSearcher is = new IndexSearcher(dir);QueryParser parser = new QueryParser(Version.LUCENE_36,"contents",new StandardAnalyzer(Version.LUCENE_36));Query query = parser.parse(q);long start = System.currentTimeMillis();TopDocs hits = is.search(query, 10);long end = System.currentTimeMillis();System.out.println("found "+ hits.totalHits + " \t time :" +(end-start));for(ScoreDoc scoreDoc : hits.scoreDocs){Document doc = is.doc(scoreDoc.doc);System.out.println(doc.get("fullpath"));}is.close();}}

热点排行

开源软件

lucene- HelloWord