Adding Lucene test

2010-03-04 18:55:01 -08:00 · 2010-03-04 18:55:01 -08:00 · 2759dc7cf3
commit 2759dc7cf3
parent 2ca702e0f3
1 changed files with 187 additions and 0 deletions
--- a/src/test/java/org/forkalsrud/album/index/LuceneTest.java
+++ b/src/test/java/org/forkalsrud/album/index/LuceneTest.java
@ -0,0 +1,187 @@
 /**
 * 
 */
 package org.forkalsrud.album.index;
 import java.io.File;
 import java.io.IOException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.junit.Test;
 /**
 * @author knut
 * 
 */
 public class LuceneTest {
    Version version = Version.LUCENE_29;
    @Test
    public void testIndexCreation() throws Exception {
        File index = new File("/Users/knut/Desktop/albumidx");
        recursiveDelete(index);
        index.mkdirs();
        Directory dir = FSDirectory.open(index);
        IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
                Version.LUCENE_CURRENT), true,
                IndexWriter.MaxFieldLength.LIMITED);
        // System.out.println("Indexing to directory '" + index + "'...");
        indexDocs(writer, new File("photos"));
        System.out.println("Optimizing...");
        writer.optimize();
        writer.close();
        IndexReader reader = IndexReader.open(dir, true);
        // only searching, so read-only=true
        Searcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(version);
        String field = "path";
        QueryParser parser = new QueryParser(version, field, analyzer);
        Query query = parser.parse("geiranger");
        System.out.println("Searching for: " + query.toString(field));
        Collector streamingHitCollector = new Collector() {
            private Scorer scorer;
            private int docBase;
            private IndexReader reader;
            // simply print docId and score of every matching document
            @Override
            public void collect(int docNo) throws IOException {
                int docId = docBase + docNo;
                Document doc = reader.document(docId);
                System.out.println("docId=" + docId + " score="
                        + scorer.score() + " path=" + doc.get("path"));
            }
            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
            @Override
            public void setNextReader(IndexReader reader, int docBase)
                    throws IOException {
                this.docBase = docBase;
                this.reader = reader;
            }
            @Override
            public void setScorer(Scorer scorer) throws IOException {
                this.scorer = scorer;
            }
        };
        searcher.search(query, streamingHitCollector);
        reader.close();
        dir.close();
        // recursiveDelete(index);
    }
    void indexDocs(IndexWriter writer, File file) throws IOException {
        // do not try to index files that cannot be read
        if (file.canRead()) {
            if (file.isDirectory()) {
                String[] files = file.list();
                // an IO error could occur
                if (files != null) {
                    for (int i = 0; i < files.length; i++) {
                        indexDocs(writer, new File(file, files[i]));
                    }
                }
            } else {
                System.out.println("adding " + file);
                writer.addDocument(FileDocument.Document(file));
            }
        }
    }
    static class FileDocument {
        /**
         * Makes a document for a File.
         * <p>
         * The document has three fields:
         * <ul>
         * <li><code>path</code>--containing the pathname of the file, as a
         * stored, untokenized field;
         * <li><code>modified</code>--containing the last modified date of the
         * file as a field as created by <a
         * href="lucene.document.DateTools.html">DateTools</a>; and
         * <li><code>contents</code>--containing the full contents of the file,
         * as a Reader field;
         */
        public static Document Document(File f) {
            // make a new, empty document
            Document doc = new Document();
            // Add the path of the file as a field named "path". Use a
            // field that is indexed (i.e. searchable), but don't
            // tokenize the field into words.
            doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED));
            // Add the last modified date of the file a field named
            // "modified". Use a field that is indexed
            // (i.e. searchable), but don't tokenize the field into
            // words.
            doc.add(new Field("modified", DateTools.timeToString(f
                    .lastModified(), DateTools.Resolution.MINUTE),
                    Field.Store.YES, Field.Index.NOT_ANALYZED));
            // Add the contents of the file to a field named
            // "contents". Specify a Reader, so that the text of the
            // file is tokenized and indexed, but not stored. Note
            // that FileReader expects the file to be in the system's
            // default encoding. If that's not the case searching for
            // special characters will fail.
            // doc.add(new Field("contents", new FileReader(f)));
            // return the document
            return doc;
        }
        private FileDocument() {
            // not to be instantiated
        }
    }
    void recursiveDelete(File f) {
        if (f.isDirectory()) {
            for (File e : f.listFiles()) {
                recursiveDelete(e);
            }
        }
        f.delete();
    }
 }