Lucene分词器（搜寻关键字解析器）

2012-09-25

Lucene分词器（搜索关键字解析器）import java.io.StringReaderimport java.text.SimpleDateFormatimport

Lucene分词器（搜索关键字解析器）

import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;

public class luncens {

public static void analyze(Analyzer analyzer, String text) throws Exception {
System.out.println("分词器：" + analyzer.getClass());
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
tokenStream.addAttribute(TermAttribute.class);
while (tokenStream.incrementToken()) {
TermAttribute termAttribute =tokenStream.getAttribute(TermAttribute.class);
System.out.println(termAttribute.term());
}
}

public static void main(String[] args) throws Exception {
String enText = "China is a great country!JC-46C(16tt)dd";
// String chText = "昨天，一阵雷阵雨后，空气异常地清新!46C";
String chText = "SICAO新潮半导体电子酒柜JC-46C（16支）";
SimpleAnalyzer analyzer1 = new SimpleAnalyzer();
analyze(analyzer1,enText);
analyze(analyzer1,chText);
StopAnalyzer analyzer2 = new StopAnalyzer(Version.LUCENE_CURRENT);
analyze(analyzer2,enText);
analyze(analyzer2,chText);
StandardAnalyzer analyzer3 = new StandardAnalyzer(Version.LUCENE_CURRENT);
analyze(analyzer3,enText);
analyze(analyzer3,chText);//逐个字切分
WhitespaceAnalyzer analyzer4 = new WhitespaceAnalyzer();
analyze(analyzer4,enText);
analyze(analyzer4,chText);
KeywordAnalyzer analyzer5 = new KeywordAnalyzer();
analyze(analyzer5,enText);
analyze(analyzer5,chText);

SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

Date date = new Date();
//当前时间
String endtime = format.format(date).substring(11, 13);
System.out.println("时间="+endtime);
}
}

热点排行

互联网

Lucene分词器（搜寻关键字解析器）