Updating Lucene test with Faceted search.

2013-11-07 21:11:55 -08:00 · 2013-11-07 21:11:55 -08:00 · 0ea01af309
commit 0ea01af309
parent 193c51900f
2 changed files with 114 additions and 140 deletions
--- a/pom.xml
+++ b/pom.xml
@ -172,8 +172,25 @@
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-core</artifactId>
-      <version>3.0.0</version>
+      <version>4.4.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-queryparser</artifactId>
      <version>4.4.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-facet</artifactId>
        <version>4.4.0</version>
    </dependency>
 <!--
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-bdb-je</artifactId>
        <version>3.1.0</version>
    </dependency>
 -->
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-api</artifactId>
--- a/src/test/java/org/forkalsrud/album/index/LuceneTest.java
+++ b/src/test/java/org/forkalsrud/album/index/LuceneTest.java
@ -3,26 +3,39 @@
 */
 package org.forkalsrud.album.index;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.facet.index.FacetFields;
 import org.apache.lucene.facet.params.FacetIndexingParams;
 import org.apache.lucene.facet.params.FacetSearchParams;
 import org.apache.lucene.facet.search.CountFacetRequest;
 import org.apache.lucene.facet.search.DrillDownQuery;
 import org.apache.lucene.facet.search.FacetResult;
 import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.FacetsCollector;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.search.Collector;
+import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.junit.Ignore;
 import org.junit.Test;
 /**
@ -31,159 +44,103 @@ import org.junit.Test;
 */
 public class LuceneTest {
-    Version version = Version.LUCENE_29;
+    Version version = Version.LUCENE_44;
    @Ignore
    @Test
    public void testIndexCreation() throws Exception {
-        File index = new File("/Users/knut/Desktop/albumidx");
+        StandardAnalyzer analyzer = new StandardAnalyzer(version);
-        recursiveDelete(index);
+        Directory index = new RAMDirectory();
-        index.mkdirs();
+        Directory taxoDir = new RAMDirectory();
-        Directory dir = FSDirectory.open(index);
+        IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
        IndexWriter w = new IndexWriter(index, config);
-        IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
+        DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
-                Version.LUCENE_CURRENT), true,
+        FacetFields facetFields = new FacetFields(taxoWriter);
                IndexWriter.MaxFieldLength.LIMITED);
-        // System.out.println("Indexing to directory '" + index + "'...");
+        List<CategoryPath> book1 = new ArrayList<CategoryPath>();
-        indexDocs(writer, new File("photos"));
+        book1.add(new CategoryPath("Author", "Erik Hatcher"));
-        System.out.println("Optimizing...");
+        book1.add(new CategoryPath("Author", "Otis Gospodnetić"));
-        writer.optimize();
+        book1.add(new CategoryPath("Pub Date", "2004", "December", "1"));
        writer.close();
-        IndexReader reader = IndexReader.open(dir, true);
+        List<CategoryPath> book2 = new ArrayList<CategoryPath>();
-        // only searching, so read-only=true
+        book2.add(new CategoryPath("Author", "Michael McCandless"));
        book2.add(new CategoryPath("Author", "Erik Hatcher"));
        book2.add(new CategoryPath("Author", "Otis Gospodnetić"));
        book2.add(new CategoryPath("Pub Date", "2010", "July", "28"));
-        Searcher searcher = new IndexSearcher(reader);
+        addDoc(w, facetFields, "Lucene in Action", "193398817", book1);
-        Analyzer analyzer = new StandardAnalyzer(version);
+        addDoc(w, facetFields, "Lucene for Dummies", "55320055Z", book2);
        addDoc(w, facetFields, "Managing Gigabytes", "55063554A", null);
        addDoc(w, facetFields, "The Art of Computer Science", "9900333X", null);
        w.close();
        taxoWriter.close();
        String field = "path";
        QueryParser parser = new QueryParser(version, field, analyzer);
        Query query = parser.parse("geiranger");
        System.out.println("Searching for: " + query.toString(field));
-        Collector streamingHitCollector = new Collector() {
+        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
-            private Scorer scorer;
+        DirectoryTaxonomyReader taxor = new DirectoryTaxonomyReader(taxoDir);
            private int docBase;
            private IndexReader reader;
            // simply print docId and score of every matching document
            @Override
            public void collect(int docNo) throws IOException {
-                int docId = docBase + docNo;
+        FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10), new CountFacetRequest(new CategoryPath("Pub Date"), 10));
-                Document doc = reader.document(docId);
+        FacetsCollector facetsCollector = FacetsCollector.create(fsp, reader, taxor);
        searcher.search(new MatchAllDocsQuery(), facetsCollector);
-                System.out.println("docId=" + docId + " score="
+        for (FacetResult fres : facetsCollector.getFacetResults()) {
-                        + scorer.score() + " path=" + doc.get("path"));
+            FacetResultNode root = fres.getFacetResultNode();
-            }
+            root.value = fres.getNumValidDescendants();
            System.out.println(root.toString());
        }
-            @Override
+        String querystr = "lucene";
-            public boolean acceptsDocsOutOfOrder() {
+        Query q = new QueryParser(version, "title", analyzer).parse(querystr);
                return true;
            }
-            @Override
+        showResultsForQuery(searcher, q);
            public void setNextReader(IndexReader reader, int docBase)
                    throws IOException {
                this.docBase = docBase;
                this.reader = reader;
            }
-            @Override
+        DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, q);
-            public void setScorer(Scorer scorer) throws IOException {
+        ddq.add(new CategoryPath("Author", "Michael McCandless"));
-                this.scorer = scorer;
+        showResultsForQuery(searcher, ddq);
            }
        };
-        searcher.search(query, streamingHitCollector);
+        // reader can only be closed when there
-
+        // is no need to access the documents any more.
        searcher.close();
        reader.close();
-
+        taxor.close();
-        dir.close();
+        index.close();
-        // recursiveDelete(index);
+        taxoDir.close();
    }
-    void indexDocs(IndexWriter writer, File file) throws IOException {
+    /**
-        // do not try to index files that cannot be read
+     * @param hitsPerPage
-        if (file.canRead()) {
+     * @param searcher
-            if (file.isDirectory()) {
+     * @param q
-                String[] files = file.list();
+     * @throws IOException
-                // an IO error could occur
+     */
-                if (files != null) {
+    public void showResultsForQuery(IndexSearcher searcher,
-                    for (int i = 0; i < files.length; i++) {
+            Query q) throws IOException {
-                        indexDocs(writer, new File(file, files[i]));
+        int hitsPerPage = 10;
-                    }
+
-                }
+        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
-            } else {
+        searcher.search(q, collector);
-                System.out.println("adding " + file);
+        ScoreDoc[] hits = collector.topDocs().scoreDocs;
-                writer.addDocument(FileDocument.Document(file));
+
-            }
+        System.out.println("Found " + hits.length + " hits.");
        for(int i=0;i<hits.length;++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title"));
        }
    }
-    static class FileDocument {
+    private void addDoc(IndexWriter w, FacetFields facetFields, String title, String isbn, List<CategoryPath> categories) throws IOException {
-
+        Document doc = new Document();
-        /**
+        doc.add(new TextField("title", title, Field.Store.YES));
-         * Makes a document for a File.
+        doc.add(new StringField("isbn", isbn, Field.Store.YES));
-         * <p>
+        if (categories != null) {
-         * The document has three fields:
+            facetFields.addFields(doc, categories);
         * <ul>
         * <li><code>path</code>--containing the pathname of the file, as a
         * stored, untokenized field;
         * <li><code>modified</code>--containing the last modified date of the
         * file as a field as created by <a
         * href="lucene.document.DateTools.html">DateTools</a>; and
         * <li><code>contents</code>--containing the full contents of the file,
         * as a Reader field;
         */
        public static Document Document(File f) {
            // make a new, empty document
            Document doc = new Document();
            // Add the path of the file as a field named "path". Use a
            // field that is indexed (i.e. searchable), but don't
            // tokenize the field into words.
            doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED));
            // Add the last modified date of the file a field named
            // "modified". Use a field that is indexed
            // (i.e. searchable), but don't tokenize the field into
            // words.
            doc.add(new Field("modified", DateTools.timeToString(f
                    .lastModified(), DateTools.Resolution.MINUTE),
                    Field.Store.YES, Field.Index.NOT_ANALYZED));
            // Add the contents of the file to a field named
            // "contents". Specify a Reader, so that the text of the
            // file is tokenized and indexed, but not stored. Note
            // that FileReader expects the file to be in the system's
            // default encoding. If that's not the case searching for
            // special characters will fail.
            // doc.add(new Field("contents", new FileReader(f)));
            // return the document
            return doc;
        }
-
+        w.addDocument(doc);
        private FileDocument() {
            // not to be instantiated
        }
    }
    void recursiveDelete(File f) {
        if (f.isDirectory()) {
            for (File e : f.listFiles()) {
                recursiveDelete(e);
            }
        }
        f.delete();
    }
 }