Lucene 大数据量处理
一台机器有60个应用分为4个weblogic域提供webservice查询,查询根据ENTITYID来查询的,所有的数据量10亿左右。index文件大小好几百个G 。查询时候报内存溢出,每个域的内存我已经调整到2048M了。
并发查询量20左右。
求好的解决思路。
Caused by: java.lang.OutOfMemoryError: Java heap space
at org.apache.lucene.index.SegmentNorms.bytes(SegmentNorms.java:156)
at org.apache.lucene.index.SegmentReader.norms(SegmentReader.java:575)
at org.apache.lucene.search.PhraseQuery$PhraseWeight.scorer(PhraseQuery.java:251)
at org.apache.lucene.search.BooleanQuery$BooleanWeight.scorer(BooleanQuery.java:298)
at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:577)
at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:383)
at com.xinyi.tech.se.server.lucene.service.ISearchIKServiceImpl.countSearch(ISearchIKServiceImpl.java:100)
at sun.reflect.GeneratedMethodAccessor32.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.codehaus.xfire.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:59)
this.entityId = entityId;
// 获取查询域字段名
String[] fields = this.getFields(entityId);
EntityBean entity = this.context.entityContext.getEntityBean(entityId);
// 获取查询模式
Occur[] occur = this.getOccur(queryMode, fields.length);
IndexBean indexBean = this.context.indexContext
.getIndexBeanByEntityId(entityId);
// 获取索引根目录
String rootDir = this.context.serverContext.getServerBean(
indexBean.getServerId()).getDirRoot();
// 实体名称
String entityName = entity.getEntityName();
String entityNameNC = entity.getEntityNameCN();
String result = entityId + ":" + entityNameNC + ":";// 返回结果字符串
// 实体编号+实体名称+数量
IndexSearcher searcher = null;
//本机调试目录
MultiReader multiReader = this.getMultiReader(rootDir, entityName);// 多目录搜索对象
if (multiReader == null) {
return result + "0";
}
TotalHitCountCollector collector = new TotalHitCountCollector();
try {
searcher = new IndexSearcher(multiReader);
searcher.setSimilarity(new IKSimilarity());// 设置ik相似度分析
//解析查询字符串
List<String> termStrLst = new ArrayList<String>();
TokenStream ts = new IKAnalyzer().tokenStream("FIELD_NULL", new StringReader(queryValue));
ts.addAttribute(CharTermAttribute.class);
try {
while(ts.incrementToken()){
CharTermAttribute charTermAttribute = ts.getAttribute(CharTermAttribute.class);
termStrLst.add(charTermAttribute.toString());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Query query =null;
//查询字符串长度大于3开启IK自带配置查询
if(queryValue.length()>3){
query = IKQueryParser.parseMultiField(fields, queryValue, occur);
}else{
//词组查询一般查询姓名的时候用
query = toParserQuery(termStrLst, fields, queryValue, occur);
}
searcher.search(query, collector);
closeStream(multiReader);
closeStream(searcher);
multiReader = null;
searcher = null;
return result + collector.getTotalHits();
} catch (Exception e) {
e.printStackTrace();
return result + "0";
} finally {
if(multiReader!=null)
closeStream(multiReader);
if(searcher!=null)
closeStream(searcher);
}