Lucene3.0搜索结果排序和高亮及前缀通配符
原文地址: http://www.yuqindieyuan.cn/Detail.html?id=29
本示例是对Lucene查询,对结果进行了一些处理:
1、支持前缀搜索,如*国,可以搜索出中国,美国等国字结尾的词的内容:
// 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
parser.setAllowLeadingWildcard(true);
2、搜索时在有通配符时可以不区分大小写:
// 有通配符时不转换大小写
parser.setLowercaseExpandedTerms(false);
3、结果进行多字段排序,详细见代码排序部分;
4、结果高亮显示,详细见代码高亮部分。
package cn.test.gxg.engine.query;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
/**
* 创建索引并查询示例
*
* @createTime: Feb 22, 2010 3:02:28 PM
* @author: <a href="mailto:leader1212@sina.com.cn">天涯 </a>
* @version: 0.1
* @lastVersion: 0.1
* @updateTime:
* @updateAuthor: <a href="mailto:leader1212@sina.com.cn">天涯 </a>
* @changesSum:
*
*/
public class QueryTest {
public static void main(String[] args) {
//索引目录 D:\workspace\code\java\TestLucene3\index\txt\test
String INDNEX_PATH = "D:\\workspace\\code\\java\\TestLucene3\\index\\txt\\test";
createIndex(INDNEX_PATH);
search(INDNEX_PATH);
}
public static void createIndex (String indexPath) {
// 获取中文分词器,查询的时候也要用一样的分词器。不然会导致查询结果不准确
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
// 建立索引
IndexWriter writer;
NumericField nField = null;
try {
writer = new IndexWriter(FSDirectory.open(new File(indexPath)),
analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
Field field = null;
for(int i =0; i <10; i++) {
doc = new Document();
field = new Field("Code", "feinnocdb_App_info"+i, Field.Store.YES,
Field.Index.ANALYZED);
doc.add(field);
nField = new NumericField("Id", Store.YES, true).setIntValue(i%3);
doc.add(nField);
field = new Field("Name", "国家名字-" + i, Field.Store.YES,
Field.Index.ANALYZED);
doc.add(nField);
field = new Field("Content", "中国中华人民共和国—" + i, Field.Store.YES,
Field.Index.ANALYZED);
doc.add(field);
nField = new NumericField("Type", Store.YES, true).setIntValue((i%10));
doc.add(nField);
nField = new NumericField("Price", Store.YES, true).setFloatValue((i%3));
doc.add(nField);
nField = new NumericField("Sex", Store.YES, true).setIntValue((i%2));
doc.add(nField);
writer.addDocument(doc);
}
writer.close();
System.out.println("Indexed success!");
} catch (Exception e) {
e.printStackTrace();
}
}
public static void search(String indexPath) {
//获取Lucene标准分词器,可以使用其他分词器,前提是创建索引的时候也使用相同的分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
//建立索引
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "Content", analyzer);
Query query = null;
String q = "Content:国";
try {
query = parser.parse(q);
// 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
parser.setAllowLeadingWildcard(true);
// 有通配符时不转换大小写
parser.setLowercaseExpandedTerms(false);
} catch (ParseException e) {
e.printStackTrace();
}
// 多字段排序,设置在前面的会优先排序
SortField[] sortFields = new SortField[2];
SortField sortField = new SortField("Id", SortField.INT, true);
SortField FIELD_SEX = new SortField("Sex", SortField.INT, false);
sortFields[0] = sortField;
sortFields[1] = FIELD_SEX;
Sort sort = new Sort(sortFields);
// 单字段排序
/*
SortField sortField = new SortField("Id", SortField.INT, true);
Sort sort = new Sort(sortField);
*/
Searcher searcher = new IndexSearcher(reader);
// 如果不需要排序则使用注释掉的代码查询
//TopDocs topDocs = searcher.search(query, 100);
TopDocs topDocs = searcher.search(query, null, 1000, sort);
System.out.println("查询语句为:" + query.toString());
System.out.println("查询到数据条数为:" + topDocs.totalHits);
if (topDocs.totalHits != 0) {
// 用作高亮显示的Query语句。绝大多数情况都是使用查询的Query语句。
// 这里为了演示,所以不那样做
Query hilightQuery = null;
try {
hilightQuery = parser.parse("Content:中");
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// 设置需要高亮的字段值
String[] highlightCol = {"Content", "Name"};
Highlighter highlighter = null;
// 关键字高亮显示设置
// 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(" <FONT COLOR='RED'>", " </FONT>");
highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(hilightQuery));
//设置每次返回的字符数
highlighter.setTextFragmenter(new SimpleFragmenter(1000));
// 遍历查询的索引,得到具体索引值。
for(ScoreDoc sd : topDocs.scoreDocs) {
Document document = searcher.doc(sd.doc);
for (Fieldable fa : document.getFields()) {
String value = document.get(fa.name());
for (String col : highlightCol) {
if(fa.name().equals(col)) {
//设置高显内容
TokenStream tokenStream = analyzer.tokenStream("Content",new StringReader(value));
value = highlighter.getBestFragment(tokenStream, value);
}
}
System.out.print(fa.name() + ":" + value + " ");
}
System.out.println();
}
}
reader.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}