Lucene分词器(搜索关键字解析器)
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
public class luncens {
public static void analyze(Analyzer analyzer, String text) throws Exception {
System.out.println("分词器:" + analyzer.getClass());
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
tokenStream.addAttribute(TermAttribute.class);
while (tokenStream.incrementToken()) {
TermAttribute termAttribute =tokenStream.getAttribute(TermAttribute.class);
System.out.println(termAttribute.term());
}
}
public static void main(String[] args) throws Exception {
String enText = "China is a great country!JC-46C(16tt)dd";
// String chText = "昨天,一阵雷阵雨后,空气异常 地清新!46C";
String chText = "SICAO新潮半导体电子酒柜JC-46C(16支)";
SimpleAnalyzer analyzer1 = new SimpleAnalyzer();
analyze(analyzer1,enText);
analyze(analyzer1,chText);
StopAnalyzer analyzer2 = new StopAnalyzer(Version.LUCENE_CURRENT);
analyze(analyzer2,enText);
analyze(analyzer2,chText);
StandardAnalyzer analyzer3 = new StandardAnalyzer(Version.LUCENE_CURRENT);
analyze(analyzer3,enText);
analyze(analyzer3,chText);//逐个字切分
WhitespaceAnalyzer analyzer4 = new WhitespaceAnalyzer();
analyze(analyzer4,enText);
analyze(analyzer4,chText);
KeywordAnalyzer analyzer5 = new KeywordAnalyzer();
analyze(analyzer5,enText);
analyze(analyzer5,chText);
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date date = new Date();
//当前时间
String endtime = format.format(date).substring(11, 13);
System.out.println("时间="+endtime);
}
}