Adding Lucene test
This commit is contained in:
parent
2ca702e0f3
commit
2759dc7cf3
1 changed files with 187 additions and 0 deletions
187
src/test/java/org/forkalsrud/album/index/LuceneTest.java
Normal file
187
src/test/java/org/forkalsrud/album/index/LuceneTest.java
Normal file
|
|
@ -0,0 +1,187 @@
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.forkalsrud.album.index;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.DateTools;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Searcher;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author knut
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class LuceneTest {
|
||||||
|
|
||||||
|
Version version = Version.LUCENE_29;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexCreation() throws Exception {
|
||||||
|
|
||||||
|
File index = new File("/Users/knut/Desktop/albumidx");
|
||||||
|
recursiveDelete(index);
|
||||||
|
index.mkdirs();
|
||||||
|
|
||||||
|
Directory dir = FSDirectory.open(index);
|
||||||
|
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
|
||||||
|
Version.LUCENE_CURRENT), true,
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
|
||||||
|
// System.out.println("Indexing to directory '" + index + "'...");
|
||||||
|
indexDocs(writer, new File("photos"));
|
||||||
|
System.out.println("Optimizing...");
|
||||||
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(dir, true);
|
||||||
|
// only searching, so read-only=true
|
||||||
|
|
||||||
|
Searcher searcher = new IndexSearcher(reader);
|
||||||
|
Analyzer analyzer = new StandardAnalyzer(version);
|
||||||
|
|
||||||
|
String field = "path";
|
||||||
|
QueryParser parser = new QueryParser(version, field, analyzer);
|
||||||
|
Query query = parser.parse("geiranger");
|
||||||
|
System.out.println("Searching for: " + query.toString(field));
|
||||||
|
|
||||||
|
Collector streamingHitCollector = new Collector() {
|
||||||
|
|
||||||
|
private Scorer scorer;
|
||||||
|
private int docBase;
|
||||||
|
private IndexReader reader;
|
||||||
|
|
||||||
|
// simply print docId and score of every matching document
|
||||||
|
@Override
|
||||||
|
public void collect(int docNo) throws IOException {
|
||||||
|
|
||||||
|
int docId = docBase + docNo;
|
||||||
|
Document doc = reader.document(docId);
|
||||||
|
|
||||||
|
System.out.println("docId=" + docId + " score="
|
||||||
|
+ scorer.score() + " path=" + doc.get("path"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(IndexReader reader, int docBase)
|
||||||
|
throws IOException {
|
||||||
|
this.docBase = docBase;
|
||||||
|
this.reader = reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
searcher.search(query, streamingHitCollector);
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
// recursiveDelete(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void indexDocs(IndexWriter writer, File file) throws IOException {
|
||||||
|
// do not try to index files that cannot be read
|
||||||
|
if (file.canRead()) {
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
String[] files = file.list();
|
||||||
|
// an IO error could occur
|
||||||
|
if (files != null) {
|
||||||
|
for (int i = 0; i < files.length; i++) {
|
||||||
|
indexDocs(writer, new File(file, files[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
System.out.println("adding " + file);
|
||||||
|
writer.addDocument(FileDocument.Document(file));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class FileDocument {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a document for a File.
|
||||||
|
* <p>
|
||||||
|
* The document has three fields:
|
||||||
|
* <ul>
|
||||||
|
* <li><code>path</code>--containing the pathname of the file, as a
|
||||||
|
* stored, untokenized field;
|
||||||
|
* <li><code>modified</code>--containing the last modified date of the
|
||||||
|
* file as a field as created by <a
|
||||||
|
* href="lucene.document.DateTools.html">DateTools</a>; and
|
||||||
|
* <li><code>contents</code>--containing the full contents of the file,
|
||||||
|
* as a Reader field;
|
||||||
|
*/
|
||||||
|
public static Document Document(File f) {
|
||||||
|
|
||||||
|
// make a new, empty document
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
// Add the path of the file as a field named "path". Use a
|
||||||
|
// field that is indexed (i.e. searchable), but don't
|
||||||
|
// tokenize the field into words.
|
||||||
|
doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED));
|
||||||
|
|
||||||
|
// Add the last modified date of the file a field named
|
||||||
|
// "modified". Use a field that is indexed
|
||||||
|
// (i.e. searchable), but don't tokenize the field into
|
||||||
|
// words.
|
||||||
|
doc.add(new Field("modified", DateTools.timeToString(f
|
||||||
|
.lastModified(), DateTools.Resolution.MINUTE),
|
||||||
|
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
|
||||||
|
// Add the contents of the file to a field named
|
||||||
|
// "contents". Specify a Reader, so that the text of the
|
||||||
|
// file is tokenized and indexed, but not stored. Note
|
||||||
|
// that FileReader expects the file to be in the system's
|
||||||
|
// default encoding. If that's not the case searching for
|
||||||
|
// special characters will fail.
|
||||||
|
|
||||||
|
// doc.add(new Field("contents", new FileReader(f)));
|
||||||
|
|
||||||
|
// return the document
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileDocument() {
|
||||||
|
// not to be instantiated
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void recursiveDelete(File f) {
|
||||||
|
if (f.isDirectory()) {
|
||||||
|
for (File e : f.listFiles()) {
|
||||||
|
recursiveDelete(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue