Adding Lucene test

2010-03-04 18:55:01 -08:00 · 2010-03-04 18:55:01 -08:00 · 2759dc7cf3
commit 2759dc7cf3
parent 2ca702e0f3
1 changed files with 187 additions and 0 deletions
--- a/src/test/java/org/forkalsrud/album/index/LuceneTest.java
+++ b/src/test/java/org/forkalsrud/album/index/LuceneTest.java
@ -0,0 +1,187 @@
+/**
+ * 
+ */
+package org.forkalsrud.album.index;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Version;
+import org.junit.Test;
+
+/**
+ * @author knut
+ * 
+ */
+public class LuceneTest {
+
+    Version version = Version.LUCENE_29;
+
+    @Test
+    public void testIndexCreation() throws Exception {
+
+        File index = new File("/Users/knut/Desktop/albumidx");
+        recursiveDelete(index);
+        index.mkdirs();
+
+        Directory dir = FSDirectory.open(index);
+
+        IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
+                Version.LUCENE_CURRENT), true,
+                IndexWriter.MaxFieldLength.LIMITED);
+
+        // System.out.println("Indexing to directory '" + index + "'...");
+        indexDocs(writer, new File("photos"));
+        System.out.println("Optimizing...");
+        writer.optimize();
+        writer.close();
+
+        IndexReader reader = IndexReader.open(dir, true);
+        // only searching, so read-only=true
+
+        Searcher searcher = new IndexSearcher(reader);
+        Analyzer analyzer = new StandardAnalyzer(version);
+
+        String field = "path";
+        QueryParser parser = new QueryParser(version, field, analyzer);
+        Query query = parser.parse("geiranger");
+        System.out.println("Searching for: " + query.toString(field));
+
+        Collector streamingHitCollector = new Collector() {
+
+            private Scorer scorer;
+            private int docBase;
+            private IndexReader reader;
+
+            // simply print docId and score of every matching document
+            @Override
+            public void collect(int docNo) throws IOException {
+
+                int docId = docBase + docNo;
+                Document doc = reader.document(docId);
+
+                System.out.println("docId=" + docId + " score="
+                        + scorer.score() + " path=" + doc.get("path"));
+            }
+
+            @Override
+            public boolean acceptsDocsOutOfOrder() {
+                return true;
+            }
+
+            @Override
+            public void setNextReader(IndexReader reader, int docBase)
+                    throws IOException {
+                this.docBase = docBase;
+                this.reader = reader;
+            }
+
+            @Override
+            public void setScorer(Scorer scorer) throws IOException {
+                this.scorer = scorer;
+            }
+
+        };
+
+        searcher.search(query, streamingHitCollector);
+
+        reader.close();
+
+        dir.close();
+        // recursiveDelete(index);
+    }
+
+    void indexDocs(IndexWriter writer, File file) throws IOException {
+        // do not try to index files that cannot be read
+        if (file.canRead()) {
+            if (file.isDirectory()) {
+                String[] files = file.list();
+                // an IO error could occur
+                if (files != null) {
+                    for (int i = 0; i < files.length; i++) {
+                        indexDocs(writer, new File(file, files[i]));
+                    }
+                }
+            } else {
+                System.out.println("adding " + file);
+                writer.addDocument(FileDocument.Document(file));
+            }
+        }
+    }
+
+    static class FileDocument {
+
+        /**
+         * Makes a document for a File.
+         * <p>
+         * The document has three fields:
+         * <ul>
+         * <li><code>path</code>--containing the pathname of the file, as a
+         * stored, untokenized field;
+         * <li><code>modified</code>--containing the last modified date of the
+         * file as a field as created by <a
+         * href="lucene.document.DateTools.html">DateTools</a>; and
+         * <li><code>contents</code>--containing the full contents of the file,
+         * as a Reader field;
+         */
+        public static Document Document(File f) {
+
+            // make a new, empty document
+            Document doc = new Document();
+
+            // Add the path of the file as a field named "path". Use a
+            // field that is indexed (i.e. searchable), but don't
+            // tokenize the field into words.
+            doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED));
+
+            // Add the last modified date of the file a field named
+            // "modified". Use a field that is indexed
+            // (i.e. searchable), but don't tokenize the field into
+            // words.
+            doc.add(new Field("modified", DateTools.timeToString(f
+                    .lastModified(), DateTools.Resolution.MINUTE),
+                    Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+            // Add the contents of the file to a field named
+            // "contents". Specify a Reader, so that the text of the
+            // file is tokenized and indexed, but not stored. Note
+            // that FileReader expects the file to be in the system's
+            // default encoding. If that's not the case searching for
+            // special characters will fail.
+
+            // doc.add(new Field("contents", new FileReader(f)));
+
+            // return the document
+            return doc;
+        }
+
+        private FileDocument() {
+            // not to be instantiated
+        }
+    }
+
+    void recursiveDelete(File f) {
+        if (f.isDirectory()) {
+            for (File e : f.listFiles()) {
+                recursiveDelete(e);
+            }
+        }
+        f.delete();
+    }
+}