百度在附近谷歌做技术地图reduce (LuceneUtil.directory

2013-03-13

百度在附近谷歌做技术 mapreduce(LuceneUtil.directorypackage com.tfy.luceneimport java.io.Fileimpor

百度在附近谷歌做技术 mapreduce (LuceneUtil.directory
package com.tfy.lucene;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.print.Doc;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import com.tfy.lucene.util.DocumentUtil;
import com.tfy.lucene.util.LuceneUtil;

/**
* 1、把一个对象放入到索引库中
* 2、从索引库中把一个对象检索出来
* 3、更新、删除
* @author Think
*
*/
public class tfyArticleIndex {
@Test
public void testCresteIndex() throws Exception{
/**
* 1、创建一个对象，并且设置值
* 2、创建一个IndexWriter对象
* 3、利用IndexWriter把该对象放入到索引库中
* 4、关闭indexWriter
*/
Article article=new Article();
article.setId(1L);
article.setName("it coud 谭方勇 lucene可以做搜索引擎");
article.setContent("在黑马程序员训练营 baidu,google都是很好的搜索引擎 ");
/**
* directory指明了索引库的位置
* analyzer把要输入的内容进行分词，把分词后的结果存储到目录库中
* MaxFieldLength.LIMITED 在内容库中存储的字段最大的长度
*/
IndexWriter indexWriter=new IndexWriter(LuceneUtil.directory, LuceneUtil.analyzer, MaxFieldLength.LIMITED);
//把 article转化为document
// Document document=new Document();
Document document=DocumentUtil.article2Document(article);
/**
* name
* 存储到索引库中的名称
* value
* 存储到索引库中的值
* Store
* YES
* 该字段向内容库中储存
* NO
// * 该字段不向内容库中储存
* Index
* No 在目录库中不存储
* NOT_ANALYZED 在目录库中存储，但是不分词
* ANALYZED 在目录库中存储，并且分词
*/
// Field idfield=new Field("id",article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
// Field namefield=new Field("name",article.getName().toString(), Store.YES, Index.ANALYZED);
// Field contentfield=new Field("content",article.getContent().toString(), Store.YES, Index.ANALYZED);

// document.add(idfield);
// document.add(namefield);
// document.add(contentfield);
//向索引库中增加一行记录

indexWriter.addDocument(document);

indexWriter.close();
}

@Test
public void testSearchIndex() throws Exception{
// Directory directory=FSDirectory.open(new File("./indexDir"));
/**
* 创建一个indexSearch,用于检索
* directory
* 指向索引库的位置
*/
IndexSearcher indexSearcher=new IndexSearcher(LuceneUtil.directory);
// Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_30);
/**
* version
* 版本号
* f
* 在那个字段中进行检索
* String[]
* 在多个字段中进行检索
* analyzer
* 在检索的时候，会填入一些词汇进行检索，那么在lucene内部会对这个词汇进行分词
*/
QueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"name","content"}, LuceneUtil.analyzer);
/**
* parse反复发存放关键词
*/

Query query=queryParser.parse("lucene");
/**
* TopDocs代表前几个文档
*/
TopDocs topDocs=indexSearcher.search(query, 1);
//根据关键词检索出来的总的记录数
int count=topDocs.totalHits;
//由关键词及索引值组成的对象为ScoreDoc
ScoreDoc[] scoreDocs=topDocs.scoreDocs;
List<Article> articles=new ArrayList<Article>();
//遍历目录库中所有的关键词
for(int i=0;i<scoreDocs.length;i++){
//doc是关键词对应的索引值
int index=scoreDocs[i].doc;
//根据索引值检索内容
Document document=indexSearcher.doc(index);
//由document转变为article的过程
/*Article article=new Article();
article.setId(Long.parseLong(document.get("id")));
article.setName(document.get("name"));
article.setContent(document.get("content"));*/
Article article=DocumentUtil.document2Article(document);
articles.add(article);
}
for(Article article:articles){
System.out.println(article.getContent());
System.out.println(article.getName());
}

}
/**
* 删除的原理：
* 不是把原来的文件删除掉了，而是在原来文件的基础上多了一个del文件
* @throws Exception
*/
@Test
public void testDelete() throws Exception{
/**
* Term为关键词对象
* 指定一个字段，指定该字段中的关键词
*/
IndexWriter indexWriter=new IndexWriter(LuceneUtil.directory, LuceneUtil.analyzer, MaxFieldLength.LIMITED);
Term term=new Term("name", "lucene");
indexWriter.deleteDocuments(term);
indexWriter.close();

}
/**
* 采用的方法：
* 先删除后增加
* @throws Exception
*/
@Test
public void testUpdate() throws Exception{
Term term=new Term("name","lucene");
Article article=new Article();
article.setId(1L);
article.setName("fangzelin");
article.setContent(" 百度在附近谷歌做技术 mapreduce");

IndexWriter indexWriter=new IndexWriter(LuceneUtil.directory, LuceneUtil.analyzer, MaxFieldLength.LIMITED);

Document document=DocumentUtil.article2Document(article);
indexWriter.addDocument(document);
indexWriter.close();

}
}

热点排行

云计算

百度在附近 谷歌做技术 地图reduce (LuceneUtil.directory

百度在附近谷歌做技术地图reduce (LuceneUtil.directory