创建索引(Luncene + paoding)
?
一、先去网上Down下来 paoding-analysis-2.0.4-beta,解压,在lib目录找到lucene-core-2.2.0.jar,lucene-highlighter-2.2.0.jar,commons-logging.jar 这几个Jar包放到项目当中。
二、添加Paoding。 在项目中使用发现,直接添加paoding-analysis.jar会发生异常。所以我们不直接添加这个Jar,而是把Src目录下的所有文件(也就是源代码) copy 都我们项目的Src目录下;
三、把dic(词典)目录也copy 到我们项目的src下,并在Src目录新建一个包data.index,主要是用来存放生成的索引文件;
四、修改 paoding-dic-home.properties 这个属性文件
#values are "system-env" or "this";#if value is "this" , using the paoding.dic.home as dicHome if configed!#paoding.dic.home.config-fisrt=system-env#dictionary home (directory)#"classpath:xxx" means dictionary home is in classpath.#e.g "classpath:dic" means dictionaries are in "classes/dic" directory or any other classpath directory#paoding.dic.home=dic#seconds for dic modification detection#paoding.dic.detector.interval=60paoding.dic.home=classpath:dic // dic 所在目录paoding.dic.home.config-fisrt=this //应用当前的配置?
五、在 package?? net.paoding.analysis.knife 下找到PaodingMaker类, Ctrl + S + O 定位到
private static void setDicHomeProperties(Properties p) 这个方法,修改private static void setDicHomeProperties(Properties p) { String dicHomeAbsultePath = p .getProperty("paoding.dic.home.absolute.path"); if (dicHomeAbsultePath != null) { return; } // 获取词典安装目录配置: // 如配置了PAODING_DIC_HOME环境变量,则将其作为字典的安装主目录 // 否则使用属性文件的paoding.dic.home配置 // 但是如果属性文件中强制配置paoding.dic.home.config-first=this, // 则优先考虑属性文件的paoding.dic.home配置, // 此时只有当属性文件没有配置paoding.dic.home时才会采用环境变量的配置 String dicHomeBySystemEnv = null; try { dicHomeBySystemEnv = getSystemEnv(Constants.ENV_PAODING_DIC_HOME); } catch (Error e) { log.warn("System.getenv() is not supported in JDK1.4. "); } String dicHome = getProperty(p, Constants.DIC_HOME); if (dicHomeBySystemEnv != null) { String first = getProperty(p, Constants.DIC_HOME_CONFIG_FIRST); if (first != null && first.equalsIgnoreCase("this")) { if (dicHome == null) { dicHome = dicHomeBySystemEnv; } } else { dicHome = dicHomeBySystemEnv; } } // 如果环境变量和属性文件都没有配置词典安转目录 // 则尝试在当前目录和类路径下寻找是否有dic目录, // 若有,则采纳他为paoding.dic.home // 如果尝试后均失败,则抛出PaodingAnalysisException异常 if (dicHome == null) { File f = new File("dic"); if (f.exists()) { dicHome = "dic/"; } else { URL url = PaodingMaker.class.getClassLoader() .getResource("dic"); if (url != null) { dicHome = "classpath:dic/"; } } } if (dicHome == null) { throw new PaodingAnalysisException( "please set a system env PAODING_DIC_HOME or Config paoding.dic.home in paoding-dic-home.properties point to the dictionaries!"); } // 规范化dicHome,并设置到属性文件对象中 dicHome = dicHome.replace('\\', '/'); if (!dicHome.endsWith("/")) { dicHome = dicHome + "/"; } p.setProperty(Constants.DIC_HOME, dicHome);// writer to the properites //修改部分 String path=""; try { path = URLDecoder.decode(getFile(dicHome).getPath(), "utf-8"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } // object // 将dicHome转化为一个系统唯一的绝对路径,记录在属性对象中 File dicHomeFile = getFile(path);//修改部分结束 if (!dicHomeFile.exists()) { throw new PaodingAnalysisException( "not found the dic home dirctory! " + dicHomeFile.getAbsolutePath()); } if (!dicHomeFile.isDirectory()) { throw new PaodingAnalysisException( "dic home should not be a file, but a directory!"); } p.setProperty("paoding.dic.home.absolute.path", dicHomeFile .getAbsolutePath());}?
六、创建一个Servlet:AnalyzerServlet
?
package com.lunceneTest.servlet;import java.io.IOException;import java.net.URLDecoder;import java.util.List;import javax.servlet.ServletException;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.store.FSDirectory;import com.lunceneTest.Model.News;import com.lunceneTest.db.NewsDao;public class AnalyzerServlet extends HttpServlet {/*** */private static final long serialVersionUID = -822638045647816348L;public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { }public void doGet(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException { try { this.createIndex(); } catch (Exception e) { e.printStackTrace(); }}public void createIndex() throws Exception{ //实例化分词器,使用中文分词器 Analyzer analyzer =new PaodingAnalyzer(); String path = URLDecoder.decode(AnalyzerServlet.class.getResource("/data/index").toString(),"UTF-8").replace("file:/",""); System.out.println(path); FSDirectory directory = FSDirectory.getDirectory(path); // true表示覆盖原来已经创建的索引,如果是false表示不覆盖,而是继续添加索引 IndexWriter writer = new IndexWriter(directory,analyzer,true); List<News> list = NewsDao.getAllNews(); // Dao层获取数据库新闻列表的方法 for (News news : list) { Document doc = new Document(); //Field.Index.UN_TOKENIZED 不分词 Field id_filed = new Field(News.ID,String.valueOf(news.getId()),Field.Store.YES,Field.Index.UN_TOKENIZED); Field title_field = new Field(News.TITLE,news.getTitle(),Field.Store.YES,Field.Index.TOKENIZED); Field content_field = new Field(News.CONTENT,news.getContent(),Field.Store.YES,Field.Index.TOKENIZED); doc.add(id_filed); doc.add(title_field); doc.add(content_field); writer.addDocument(doc); } writer.optimize(); writer.close();}}?
到这里就基本完成了,运行后会发现data.index这个目录下多了很多文件
http://apps.hi.baidu.com/share/detail/24048347