diff --git a/src/test/java/org/forkalsrud/album/index/LuceneTest.java b/src/test/java/org/forkalsrud/album/index/LuceneTest.java new file mode 100644 index 0000000..5d54d77 --- /dev/null +++ b/src/test/java/org/forkalsrud/album/index/LuceneTest.java @@ -0,0 +1,187 @@ +/** + * + */ +package org.forkalsrud.album.index; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.junit.Test; + +/** + * @author knut + * + */ +public class LuceneTest { + + Version version = Version.LUCENE_29; + + @Test + public void testIndexCreation() throws Exception { + + File index = new File("/Users/knut/Desktop/albumidx"); + recursiveDelete(index); + index.mkdirs(); + + Directory dir = FSDirectory.open(index); + + IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer( + Version.LUCENE_CURRENT), true, + IndexWriter.MaxFieldLength.LIMITED); + + // System.out.println("Indexing to directory '" + index + "'..."); + indexDocs(writer, new File("photos")); + System.out.println("Optimizing..."); + writer.optimize(); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + // only searching, so read-only=true + + Searcher searcher = new IndexSearcher(reader); + Analyzer analyzer = new StandardAnalyzer(version); + + String field = "path"; + QueryParser parser = new QueryParser(version, field, analyzer); + Query query = parser.parse("geiranger"); + System.out.println("Searching for: " + query.toString(field)); + + Collector streamingHitCollector = new Collector() { + + private Scorer scorer; + private int docBase; + private IndexReader reader; + + // simply print docId and score of every matching document + @Override + public void collect(int docNo) throws IOException { + + int docId = docBase + docNo; + Document doc = reader.document(docId); + + System.out.println("docId=" + docId + " score=" + + scorer.score() + " path=" + doc.get("path")); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + this.docBase = docBase; + this.reader = reader; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + }; + + searcher.search(query, streamingHitCollector); + + reader.close(); + + dir.close(); + // recursiveDelete(index); + } + + void indexDocs(IndexWriter writer, File file) throws IOException { + // do not try to index files that cannot be read + if (file.canRead()) { + if (file.isDirectory()) { + String[] files = file.list(); + // an IO error could occur + if (files != null) { + for (int i = 0; i < files.length; i++) { + indexDocs(writer, new File(file, files[i])); + } + } + } else { + System.out.println("adding " + file); + writer.addDocument(FileDocument.Document(file)); + } + } + } + + static class FileDocument { + + /** + * Makes a document for a File. + *
+ * The document has three fields: + *
path--containing the pathname of the file, as a
+ * stored, untokenized field;
+ * modified--containing the last modified date of the
+ * file as a field as created by DateTools; and
+ * contents--containing the full contents of the file,
+ * as a Reader field;
+ */
+ public static Document Document(File f) {
+
+ // make a new, empty document
+ Document doc = new Document();
+
+ // Add the path of the file as a field named "path". Use a
+ // field that is indexed (i.e. searchable), but don't
+ // tokenize the field into words.
+ doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED));
+
+ // Add the last modified date of the file a field named
+ // "modified". Use a field that is indexed
+ // (i.e. searchable), but don't tokenize the field into
+ // words.
+ doc.add(new Field("modified", DateTools.timeToString(f
+ .lastModified(), DateTools.Resolution.MINUTE),
+ Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+ // Add the contents of the file to a field named
+ // "contents". Specify a Reader, so that the text of the
+ // file is tokenized and indexed, but not stored. Note
+ // that FileReader expects the file to be in the system's
+ // default encoding. If that's not the case searching for
+ // special characters will fail.
+
+ // doc.add(new Field("contents", new FileReader(f)));
+
+ // return the document
+ return doc;
+ }
+
+ private FileDocument() {
+ // not to be instantiated
+ }
+ }
+
+ void recursiveDelete(File f) {
+ if (f.isDirectory()) {
+ for (File e : f.listFiles()) {
+ recursiveDelete(e);
+ }
+ }
+ f.delete();
+ }
+}