Lucene学习之构建简单通用的搜索查询接口
?
为了便于大家使用,在此处将全部源码公开,
DAO的基类LuceneDao,提供常用的增删改查方法,并且将根据资料生成Document以及查询结果这两个扩展点进行抽象,在子类中可以根据不同的资料,进行扩展实现:
package com.hsdl.lucene;import java.io.File;import java.io.IOException;import java.util.List;import java.util.Map;import java.util.Set;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;public abstract class LuceneDao {private Analyzer analyzer = new IKAnalyzer(true);private String indexPath = "D:/work/lucene/tika/index";public void add(Stuff stuff) throws Exception {createIndex(stuff);}public void batchAdd(List<Stuff> stuffs) throws Exception {createIndexs(stuffs);}/*** * * 删除方法 * * */public void delete(String fieldName, String fieldVaule) {try {IndexWriter writer = getIndexWrite();Query q = new TermQuery(new Term(fieldName, fieldVaule));writer.deleteDocuments(q);// 删除指定条件的Documentwriter.commit();// 提交writer.close();// 关闭System.out.println("删除" + fieldName + "为" + fieldVaule + "的记录成功");} catch (Exception e) {e.printStackTrace();}}/** * 批量删除 * * @param fieldMap * @throws Exception */public void batchDelete(Map<String, String> fieldMap) throws Exception {IndexWriter writer = getIndexWrite();for (String fieldName : fieldMap.keySet()) {Query q = new TermQuery(new Term(fieldName, fieldMap.get(fieldName)));writer.deleteDocuments(q);// 删除指定条件的DocumentSystem.out.println("删除" + fieldName + "为" + fieldMap.get(fieldName)+ "的记录成功");}writer.commit();// 提交writer.close();// 关闭}protected abstract Document getDocument(Stuff stuff) throws Exception;/** * * @param fieldName * @param fieldVaule * @param stuff * @throws Exception */public void update(String fieldName, String fieldVaule, Stuff stuff)throws Exception {try {IndexWriter writer = getIndexWrite();Document doc = getDocument(stuff);writer.updateDocument(new Term(fieldName, fieldVaule), doc);writer.commit();writer.close();// 关闭System.out.println("更新" + fieldName + "为" + fieldVaule + "的记录成功");} catch (Exception e) {throw e;}}public void setAnalyzer(Analyzer analyzer) {this.analyzer = analyzer;}/** * 设置索引文件的目录 * * @param indexPath */public void setIndexPath(String indexPath) {this.indexPath = indexPath;}/** * 创建索引 * * @param analyzer * @param indexPath * @param docPath * @throws Exception */protected void createIndex(Stuff stuff) throws Exception {IndexWriter iwriter = getIndexWrite();indexDoc(iwriter, stuff);iwriter.commit();iwriter.close();}protected void indexDoc(IndexWriter iwriter, Stuff stuff) throws Exception {Document doc = getDocument(stuff);iwriter.addDocument(doc);}/** * 批量创建索引 * * @param analyzer * @param indexPath * @param docPath * @throws Exception */protected void createIndexs(List<Stuff> stuffs) throws Exception {IndexWriter iwriter = getIndexWrite();for (Stuff stuff : stuffs) {indexDoc(iwriter, stuff);}iwriter.close();}/** * 获取IndexWrite实例 * * @param analyzer * @param indexPath * @return * @throws IOException */protected IndexWriter getIndexWrite() throws IOException {IndexWriter iwriter;Directory directory = FSDirectory.open(new File(indexPath));// 配置IndexWriterConfigIndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_45,analyzer);iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);iwriter = new IndexWriter(directory, iwConfig);return iwriter;}/** * 搜索 * * @param searchField * 搜索域 * @param indexPath * 索引目录 * @param topCount * 返回搜索相似度最高的条数 * @throws CorruptIndexException * @throws IOException * @throws ParseException */public Document[] search(String searchField, String searchKeyStr,int topCount) throws CorruptIndexException, IOException,ParseException {Directory directory = FSDirectory.open(new File(indexPath));// 搜索过程**********************************// 实例化搜索器IndexReader ireader = DirectoryReader.open(directory);IndexSearcher isearcher = new IndexSearcher(ireader);// 使用QueryParser查询分析器构造Query对象QueryParser qp = new QueryParser(Version.LUCENE_45, searchField,analyzer);qp.setDefaultOperator(QueryParser.AND_OPERATOR);Query query = qp.parse(searchKeyStr);// 搜索相似度最高的topCount条记录TopDocs topDocs = isearcher.search(query, topCount);// 输出结果Document[] docs=new Document[topDocs.scoreDocs.length];for(int i=0;i<docs.length;i++){docs[i]=isearcher.doc(topDocs.scoreDocs[i].doc);}return docs;}public void displaySearchResult(Document[] docs) {System.out.println("开始显示搜索查询结果....\n返回查询条数:"+docs.length);}/** * 为索引文档添加附加的数据,一般为数据库存储相关记录的主键,便于在搜索后查询该文档其它的信息 * * @param attachData * @param doc */protected void addAttacheData(Document doc, Map<String, String> attachData) {if (attachData != null) {Set<String> keys = attachData.keySet();for (String key : keys) {doc.add(new StringField(key, attachData.get(key),Field.Store.YES));}}}}
?
文档库资料对象的基类Stuff,我们将资料内容之外的其他数据放入到Map中,做为附加数据。
?
package com.hsdl.lucene;import java.util.Map;/** * 文档库资料对象的基类 * @author alex * */public class Stuff {private Map<String,String> attacheData;public Map<String,String> getAttacheData() {return attacheData;}public void setAttacheData(Map<String,String> attacheData) {this.attacheData = attacheData;};}
??
文件资料对象FileStuff,在这个类中有文件路径以及代表文件内容的域的名字,在构建索引和搜索时使用:
package com.hsdl.lucene;/** * 文件资料 * @author alex * */public class FileStuff extends Stuff{private String filePath;private String contentFieldName;public String getContentFieldName() {return contentFieldName;}public void setContentFieldName(String contentFieldName) {this.contentFieldName = contentFieldName;}public String getFilePath() {return filePath;}public void setFilePath(String filePath) {this.filePath = filePath;}}
?
知识问答资料 AskAnswerStuff:
package com.hsdl.lucene;/** * 知识问答资料 * @author alex * */public class AskAnswerStuff extends Stuff{private String ask;private String answer;private String contentFieldName;public String getContentFieldName() {return contentFieldName;}public void setContentFieldName(String contentFieldName) {this.contentFieldName = contentFieldName;}public String getAsk() {return ask;}public void setAsk(String ask) {this.ask = ask;}public String getAnswer() {return answer;}public void setAnswer(String answer) {this.answer = answer;}}
?
?
文档库访问之文件对象实现LuceneDaoFileImpl:
package com.hsdl.lucene;import java.io.File;import java.io.IOException;import java.util.Map;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.tika.Tika;/** * 文档库访问之文件对象实现 * @author alex * */public class LuceneDaoFileImpl extends LuceneDao{private static String contentFieldName = "content";private static Tika tika = new Tika();protected void indexDoc(IndexWriter iwriter, Stuff stuff) throws Exception {FileStuff fileStuff=(FileStuff)stuff;File file=new File(fileStuff.getFilePath());if(file.isDirectory()){indexDocByFileDir(iwriter,new File(fileStuff.getFilePath()),stuff.getAttacheData());}else{super.indexDoc(iwriter,stuff);}}/** * 根据指定存放内容的目录创建索引 * * @param iwriter * @param file * @throws IOException */private void indexDocByFileDir(IndexWriter iwriter, File file,Map<String,String> attachData) throws IOException {if (file.canRead()){if (file.isDirectory()) {String[] files = file.list();if (files != null)for (int i = 0; i < files.length; i++)indexDocByFileDir(iwriter, new File(file, files[i]),attachData);} else {Document doc = getDocument(file,attachData);iwriter.addDocument(doc);}}}protected Document getDocument(File file,Map<String,String> attachData) throws IOException {Document doc = new Document();addAttacheData(doc,attachData );// 此处添加文件内容时,需要根据tika获取Reader对象doc.add(new TextField(contentFieldName, tika.parse(file)));doc.add(new StringField("fileName", file.getName(),Field.Store.YES));doc.add(new StringField("path", file.getAbsolutePath(),Field.Store.YES));return doc;}public void displaySearchResult(Document[] docs) {super.displaySearchResult(docs);for (int i = 0; i < docs.length; i++) {System.out.println("内容:" + docs[i].toString());System.out.println(docs[i].get("fileName") + "["+ docs[i].get("path") + "]");}}@Overrideprotected Document getDocument(Stuff stuff) throws IOException {FileStuff fileStuff=(FileStuff)stuff;File file=new File(fileStuff.getFilePath());Document doc = new Document();addAttacheData(doc,stuff.getAttacheData() );// 此处添加文件内容时,需要根据tika获取Reader对象doc.add(new TextField(contentFieldName, tika.parse(file)));doc.add(new StringField("fileName", file.getName(),Field.Store.YES));doc.add(new StringField("path", file.getAbsolutePath(),Field.Store.YES));return doc;}}
?
?
文档库访问之知识问答实现LuceneDaoAskAnswerImpl:
package com.hsdl.lucene;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;/** * 文档库访问之知识问答实现 * @author alex * */public class LuceneDaoAskAnswerImpl extends LuceneDao{@Overrideprotected Document getDocument(Stuff stuff) throws Exception {AskAnswerStuff fileStuff=(AskAnswerStuff)stuff;Document doc = new Document();addAttacheData(doc,stuff.getAttacheData() );// 此处添加文件内容时,需要根据tika获取Reader对象doc.add(new TextField("ask",fileStuff.getAsk(),Field.Store.YES));doc.add(new StringField("answer", fileStuff.getAnswer(),Field.Store.YES));return doc;}public void displaySearchResult(Document[] docs) {super.displaySearchResult(docs);for (int i = 0; i < docs.length; i++) {System.out.println("内容:" + docs[i].toString());System.out.println(docs[i].get("ask") + ":["+ docs[i].get("answer") + "]");}}}
??下面我们来编写两个测试类,分别测试文件库的访问以及知识问答库:
LuceneDaoFileTest
package com.hsdl.lucene;import java.util.HashMap;import java.util.Map;import org.apache.lucene.document.Document;/** * 测试文件索引与搜索 * @author alex * */public class LuceneDaoFileTest {public static void main(String[] args) {LuceneDao luceneDao=new LuceneDaoFileImpl();luceneDao.setIndexPath("D:/work/lucene/filetest/index");FileStuff fileStuff=new FileStuff();fileStuff.setFilePath("D:/work/lucene/filetest/doc/test.txt");Map<String,String> attacheData=new HashMap<String,String>();attacheData.put("ID", "001");fileStuff.setAttacheData(attacheData);fileStuff.setContentFieldName("content");try {Document[] docs;//添加测试System.err.println("------------开始添加测试------------");luceneDao.add(fileStuff);docs=luceneDao.search(fileStuff.getContentFieldName(),"微信收费",10);luceneDao.displaySearchResult(docs);docs=luceneDao.search(fileStuff.getContentFieldName(),"网站收费",10);luceneDao.displaySearchResult(docs);//删除测试System.err.println("------------开始删除测试------------");luceneDao.delete("ID", "001");docs=luceneDao.search(fileStuff.getContentFieldName(),"微信收费",10);luceneDao.displaySearchResult(docs);//更新测试fileStuff.setFilePath("D:/work/lucene/filetest/doc/test.xls");luceneDao.update("ID", "001",fileStuff);System.err.println("------------开始更新测试------------");docs=luceneDao.search(fileStuff.getContentFieldName(),"微信收费",10);luceneDao.displaySearchResult(docs);docs=luceneDao.search(fileStuff.getContentFieldName(),"网站费用",10);luceneDao.displaySearchResult(docs);} catch (Exception e) {e.printStackTrace();}}}
?
?
LuceneDaoAskAnswerTest
package com.hsdl.lucene;import java.util.HashMap;import java.util.Map;import org.apache.lucene.document.Document;/** * 测试问答索引与搜索 * @author alex * */public class LuceneDaoAskAnswerTest {public static void main(String[] args){//测试问答知识的索引与搜索LuceneDao luceneDao=new LuceneDaoAskAnswerImpl();luceneDao.setIndexPath("D:/work/lucene/askanswer/index");AskAnswerStuff askAnswerStuff=new AskAnswerStuff();askAnswerStuff.setAsk("微信营销怎么收费?");askAnswerStuff.setAnswer("3000元每年,10年25000");Map<String,String> attacheData=new HashMap<String,String>();attacheData.put("ID", "001");askAnswerStuff.setAttacheData(attacheData);try {Document[] docs;//添加测试System.err.println("------------开始添加测试------------");luceneDao.add(askAnswerStuff);docs=luceneDao.search("ask","微信收费",10);luceneDao.displaySearchResult(docs);docs=luceneDao.search("ask","网站收费",10);luceneDao.displaySearchResult(docs);//删除测试System.err.println("------------开始删除测试------------");luceneDao.delete("ID", "001");docs=luceneDao.search("ask","微信收费",10);luceneDao.displaySearchResult(docs);//更新测试askAnswerStuff.setAsk("网站建设怎么收费?");askAnswerStuff.setAnswer("普通企业网站6000,商城网站10000,其他网站价格面议!");luceneDao.update("ID", "001",askAnswerStuff);System.err.println("------------开始更新测试------------");docs=luceneDao.search("ask","微信收费",10);luceneDao.displaySearchResult(docs);docs=luceneDao.search("ask","网站收费",10);luceneDao.displaySearchResult(docs);} catch (Exception e) {e.printStackTrace();}}}
?