Lucene中关于FSDirectory和RAMDirectory的疑问
为了验证RAMDirectory的性能总是比FSDirectory好,做了一个实验。创建两个索引,一个基于FSDirectory,另一个基于RAMDirectory,每个索引都包含了3000个具有相同内容的文档。结果发现如果先建立FSDirectory的索引,则FSDirectory的速度比较快。先建立RAMDirectory的话,RAMDirectory快。
这些测试结果
1.先建立RAMDirectory
RAMDirectory Time: 94 ms
FSDirectory Time : 531 ms
2.先建立FSDirectory
RAMDirectory Time: 359 ms
FSDirectory Time : 328 ms
哪为牛人能告知我一下原因啊。。十分感谢
这是测试代码,lucene in action中的源码
package lucenedemo.test;
import java.io.File;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.SimpleAnalyzer;
import junit.framework.TestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.ArrayList;
import java.util.Iterator;
/**
*
*/
public class FSversusRAMDirectoryTest extends TestCase {
private Directory fsDir;
private Directory ramDir;
private Collection docs = loadDocuments(3000, 5);
@Override
protected void setUp() throws Exception {
ramDir = new RAMDirectory();
fsDir = FSDirectory.open(new File("d:\\index"));
}
public void testTiming() throws IOException {
//如果这两个操作调换位置,结果是不同的
long ramTiming = timeIndexWriter(ramDir);
long fsTiming = timeIndexWriter(fsDir);
System.out.println("RAMDirectory Time: " + (ramTiming) + " ms");
System.out.println("FSDirectory Time : " + (fsTiming) + " ms");
assertTrue(fsTiming > ramTiming);
}
private long timeIndexWriter(Directory dir) throws IOException {
long start = System.currentTimeMillis();
addDocuments(dir);
long stop = System.currentTimeMillis();
return (stop - start);
}
private void addDocuments(Directory dir) throws IOException {
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(),true,IndexWriter.MaxFieldLength.UNLIMITED);
// change to adjust performance of indexing with FSDirectory
writer.setMergeFactor(100);
for (Iterator iter = docs.iterator(); iter.hasNext();) {
Document doc = new Document();
String word = (String) iter.next();
doc.add(new Field("keyword", word,Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("unindexed", word,Field.Store.YES,Field.Index.NO));
doc.add(new Field("unstored", word,Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("text", word,Field.Store.YES,Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
private Collection loadDocuments(int numDocs, int wordsPerDoc) {
Collection docCollection = new ArrayList(numDocs);
for (int i = 0; i < numDocs; i++) {
StringBuilder doc = new StringBuilder(wordsPerDoc);
for (int j = 0; j < wordsPerDoc; j++) {
doc.append("Bibamus ");
}
docCollection.add(doc.toString());
}
return docCollection;
}
}
private Collection docs = loadDocuments(300000, 5);,这样你会发现将索引见到内存花费的时间总是小于把索引见到磁盘,不管你怎么颠倒代码运行顺序;