首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 网站开发 > Web前端 >

【转】lucene3.0入门范例

2012-11-23 
【转】lucene3.0入门实例/** * @author ht * 预处理 * */public class FilePreprocess {public static void

【转】lucene3.0入门实例

    /** * @author ht * 预处理 * */public class FilePreprocess { public static void main(String[] arg){String outputpath = "D:\\test\\small\";//小文件存放路径String filename = "D:\\test\\三国演义.txt";//原文件存放路径if(!new File(outputpath).exists()){new File(outputpath).mkdirs();}splitToSmallFiles(new File(filename), outputpath); }/**大文件切割为小的 * @param file * @param outputpath */ public static void splitToSmallFiles(File file ,String outputpath){ int filePointer = 0;int MAX_SIZE = 10240;String filename = "output";BufferedWriter writer = null;try {BufferedReader reader = new BufferedReader(new FileReader(file));StringBuffer buffer = new StringBuffer();String line = reader.readLine();while(line != null){buffer.append(line).append("\r\n");if(buffer.toString().getBytes().length>=MAX_SIZE){writer = new BufferedWriter(new FileWriter(outputpath+filename+filePointer+".txt"));writer.write(buffer.toString());writer.close();filePointer++;buffer=new StringBuffer();}line = reader.readLine();}writer = new BufferedWriter(new FileWriter(outputpath+filename+filePointer+".txt"));writer.write(buffer.toString());writer.close();System.out.println("The file hava splited to small files !");} catch (FileNotFoundException e) {System.out.println("file not found !");e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}
      /** * @author ht * 索引生成 * */public class Indexer { private static String INDEX_DIR = "D:\\test\\index";//索引存放目录 private static String DATA_DIR = "D:\\test\\small\";//小文件存放的目录 public static void main(String[] args) throws Exception { long start = new Date().getTime(); int numIndexed = index(new File(INDEX_DIR), new File(DATA_DIR));//调用index方法 long end = new Date().getTime(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } /**索引dataDir下的.txt文件,并储存在indexDir下,返回索引的文件数量 * @param indexDir * @param dataDir * @return int * @throws IOException */public static int index(File indexDir, File dataDir) throws IOException { if (!dataDir.exists() || !dataDir.isDirectory()) { throw new IOException(dataDir + " does not exist or is not a directory"); } IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);//有变化的地方 indexDirectory(writer, dataDir); int numIndexed = writer.numDocs(); writer.optimize(); writer.close(); return numIndexed; } /**循环遍历目录下的所有.txt文件并进行索引 * @param writer * @param dir * @throws IOException */private static void indexDirectory(IndexWriter writer, File dir) throws IOException { File[] files = dir.listFiles(); for (int i = 0; i < files.length; i++) { File f = files[i]; if (f.isDirectory()) { indexDirectory(writer, f); // recurse } else if (f.getName().endsWith(".txt")) { indexFile(writer, f); } } } /**对单个txt文件进行索引 * @param writer * @param f * @throws IOException */private static void indexFile(IndexWriter writer, File f) throws IOException { if (f.isHidden() || !f.exists() || !f.canRead()) { return; } System.out.println("Indexing " + f.getCanonicalPath()); Document doc = new Document(); doc.add(new Field("contents",new FileReader(f)));//有变化的地方 doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));//有变化的地方 writer.addDocument(doc); }}
        /** * @author ht * 查询 * */public class Searcher { private static String INDEX_DIR = "D:\\test\\index\";//索引所在的路径 private static String KEYWORD = "玄德";//关键词 private static int TOP_NUM = 100;//显示前100条结果 public static void main(String[] args) throws Exception {File indexDir = new File(INDEX_DIR); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new Exception(indexDir + " does not exist or is not a directory."); } search(indexDir, KEYWORD);//调用search方法进行查询 }/**查询 * @param indexDir * @param q * @throws Exception */ public static void search(File indexDir, String q) throws Exception { IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir),true);//read-only String field = "contents"; QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, new StandardAnalyzer(Version.LUCENE_CURRENT));//有变化的地方 Query query = parser.parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);//有变化的地方 long start = new Date().getTime();// start time is.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println(hits.length); for (int i = 0; i < hits.length; i++) { Document doc = is.doc(hits[i].doc);//new method is.doc() System.out.println(doc.getField("filename")+" "+hits[i].toString()+" ");} long end = new Date().getTime();//end time System.out.println("Found " + collector.getTotalHits() + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); }}

        4.结果就不贴啦,反正能运行就是啦 lucene3.0入门实例源码.rar (3 KB)下载次数: 146 1 楼 libingyang 2010-05-11   很好很强大

热点排行