Lucene代码示范：使用SpanQuery找到keyword在文档中第一次出现的地方

2014-07-02

Lucene代码示例：使用SpanQuery找到keyword在文档中第一次出现的地方无干货，仅供复制位置信息类package pla

Lucene代码示例：使用SpanQuery找到keyword在文档中第一次出现的地方
无干货，仅供复制

位置信息类

package player.kent.chen.temp.lucene.span;import org.apache.commons.lang.builder.ToStringBuilder;public class KeywordLocation {    private String file;    /**     * position in the token stream     */    private int    position;    private KeywordLocation() {    }    public static final KeywordLocation createInstance(String file, int position) {        KeywordLocation instance = new KeywordLocation();        instance.file = file;        instance.position = position;        return instance;    }    public String getFile() {        return file;    }    public void setFile(String file) {        this.file = file;    }    public int getPosition() {        return position;    }    public void setPosition(int position) {        this.position = position;    }    @Override    public String toString() {        return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE);    }}

搜索器

package player.kent.chen.temp.lucene.span;import java.io.File;public class FindFirstOccurenceSearcher {    public static void main(String[] args) throws Exception {        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo";        File id = new File(rootDir, "index");        Directory indexDir = FSDirectory.open(id);        IndexSearcher searcher = new IndexSearcher(indexDir);        String keyword = "Brotherhood";        KeywordLocation kl = findFirstOccurence(searcher, keyword);        System.out.println(MessageFormat.format(""{0}":{1}", keyword, kl));        searcher.close();    }    private static KeywordLocation findFirstOccurence(IndexSearcher searcher, String keyword)            throws IOException, CorruptIndexException {        SpanTermQuery spanTermQuery = new SpanTermQuery(new Term("contents", keyword.toLowerCase()));        IndexReader indexReader = searcher.getIndexReader();        Spans spans = spanTermQuery.getSpans(indexReader);        TopDocs hits = searcher.search(spanTermQuery, 1);        if (hits.totalHits == 0) {            return null;        }        spans.next();        Document doc = indexReader.document(spans.doc());        String file = doc.get("filepath");        int position = spans.start();        return KeywordLocation.createInstance(file, position);    }}

另附索引器

package player.kent.chen.temp.lucene.span;import java.io.File;public class LearnSpanLuceneIndexer {    public static void main(String[] args) throws Exception {        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo";        File contentDir = new File(rootDir, "content");        File indexDir = new File(rootDir, "index");        FileUtils.deleteDirectory(indexDir);        indexDir.mkdirs();        long begin = now();        doIndex(contentDir, indexDir);        System.out.println("Done in miliseconds of : " + (now() - begin));    }    private static void doIndex(File cd, File id) throws IOException {        Directory indexDir = FSDirectory.open(id);        IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30),                true, IndexWriter.MaxFieldLength.UNLIMITED);        File[] files = cd.listFiles();        for (File file : files) {            System.out.println("Indexing ... " + file.getAbsolutePath());            Document doc = new Document();            doc.add(new Field("contents", new FileReader(file)));            doc.add(new Field("filepath", file.getAbsolutePath(), Field.Store.YES,                    Field.Index.ANALYZED));            writer.addDocument(doc);        }        writer.numDocs();        writer.close();    }    private static long now() {        return System.currentTimeMillis();    }}

热点排行

编程

Lucene代码示范：使用SpanQuery找到keyword在文档中第一次出现的地方