基于 lucene3.1 开发demo.
package com.search.crawler;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexProcess {private static String indexPath = "src/indexFiles"; //生成索引存放目录public static void createIndex(String docsPath) {File docDir = new File(docsPath);if (!docDir.exists() || !docDir.canRead()) {System.out.println("Document directory '"+ docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");System.exit(1);}System.out.println("Indexing to directory '" + indexPath + "'...");Directory dir = null;try {dir = FSDirectory.open(new File(indexPath));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);IndexWriter writer = new IndexWriter(dir, analyzer, true, new IndexWriter.MaxFieldLength(25000));indexDocs(writer, docDir);writer.close();dir.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}static void indexDocs(IndexWriter writer, File file) throws IOException {// do not try to index files that cannot be readif (file.canRead()) {if (file.isDirectory()) {File[] files = file.listFiles();if (files != null) {for (int i = 0; i < files.length; i++) {indexDocs(writer, files[i]);}}} else {FileInputStream fis = null;try {fis = new FileInputStream(file);} catch (FileNotFoundException fnfe) {fnfe.printStackTrace();}try {// make a new, empty documentDocument doc = new Document();//create index of pathdoc.add( new Field("path", file.getPath(),Field.Store.YES, Field.Index.ANALYZED));//create index of contentdoc.add(new Field("content", new FileReader(file))); writer.addDocument(doc);} finally {fis.close();}}}}static void search(String key ,String value) {Date startTime = new Date(); //Directory dir = null;try {dir = FSDirectory.open(new File(indexPath));IndexSearcher searcher = new IndexSearcher(dir,true);QueryParser par = new QueryParser(Version.LUCENE_31,key,new StandardAnalyzer(Version.LUCENE_31));Query query = null;try {query = par.parse(value);} catch (ParseException e) {// TODO Auto-generated catch blocke.printStackTrace();}//System.out.println(query.toString());TopDocs topDocs = searcher.search(query, null, 1000);ScoreDoc[] scores = topDocs.scoreDocs;for (ScoreDoc soc : scores) {System.out.println(soc+"\t"+searcher.doc(soc.doc).get("path") );}} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}Date endTime = new Date();System.out.println("总共花了" + (endTime.getTime() - startTime.getTime())+ "毫秒时间");}/** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub IndexProcess.createIndex("src/index");System.out.println("search starting :");IndexProcess.search("content","revision");}}
?