diff --git a/pom.xml b/pom.xml
index 3ab8d66..701dfbf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -172,8 +172,25 @@
org.apache.lucene
lucene-core
- 3.0.0
+ 4.4.0
+
+ org.apache.lucene
+ lucene-queryparser
+ 4.4.0
+
+
+ org.apache.lucene
+ lucene-facet
+ 4.4.0
+
+
org.slf4j
slf4j-api
diff --git a/src/test/java/org/forkalsrud/album/index/LuceneTest.java b/src/test/java/org/forkalsrud/album/index/LuceneTest.java
index 32295d9..ab35d29 100644
--- a/src/test/java/org/forkalsrud/album/index/LuceneTest.java
+++ b/src/test/java/org/forkalsrud/album/index/LuceneTest.java
@@ -3,26 +3,39 @@
*/
package org.forkalsrud.album.index;
-import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.CountFacetRequest;
+import org.apache.lucene.facet.search.DrillDownQuery;
+import org.apache.lucene.facet.search.FacetResult;
+import org.apache.lucene.facet.search.FacetResultNode;
+import org.apache.lucene.facet.search.FacetsCollector;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.Collector;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -31,159 +44,103 @@ import org.junit.Test;
*/
public class LuceneTest {
- Version version = Version.LUCENE_29;
+ Version version = Version.LUCENE_44;
- @Ignore
@Test
public void testIndexCreation() throws Exception {
- File index = new File("/Users/knut/Desktop/albumidx");
- recursiveDelete(index);
- index.mkdirs();
+ StandardAnalyzer analyzer = new StandardAnalyzer(version);
+ Directory index = new RAMDirectory();
+ Directory taxoDir = new RAMDirectory();
- Directory dir = FSDirectory.open(index);
+ IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
+ IndexWriter w = new IndexWriter(index, config);
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
- Version.LUCENE_CURRENT), true,
- IndexWriter.MaxFieldLength.LIMITED);
+ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+ FacetFields facetFields = new FacetFields(taxoWriter);
- // System.out.println("Indexing to directory '" + index + "'...");
- indexDocs(writer, new File("photos"));
- System.out.println("Optimizing...");
- writer.optimize();
- writer.close();
+ List book1 = new ArrayList();
+ book1.add(new CategoryPath("Author", "Erik Hatcher"));
+ book1.add(new CategoryPath("Author", "Otis Gospodnetić"));
+ book1.add(new CategoryPath("Pub Date", "2004", "December", "1"));
- IndexReader reader = IndexReader.open(dir, true);
- // only searching, so read-only=true
+ List book2 = new ArrayList();
+ book2.add(new CategoryPath("Author", "Michael McCandless"));
+ book2.add(new CategoryPath("Author", "Erik Hatcher"));
+ book2.add(new CategoryPath("Author", "Otis Gospodnetić"));
+ book2.add(new CategoryPath("Pub Date", "2010", "July", "28"));
+
+ addDoc(w, facetFields, "Lucene in Action", "193398817", book1);
+ addDoc(w, facetFields, "Lucene for Dummies", "55320055Z", book2);
+ addDoc(w, facetFields, "Managing Gigabytes", "55063554A", null);
+ addDoc(w, facetFields, "The Art of Computer Science", "9900333X", null);
+ w.close();
+ taxoWriter.close();
- Searcher searcher = new IndexSearcher(reader);
- Analyzer analyzer = new StandardAnalyzer(version);
- String field = "path";
- QueryParser parser = new QueryParser(version, field, analyzer);
- Query query = parser.parse("geiranger");
- System.out.println("Searching for: " + query.toString(field));
+ IndexReader reader = DirectoryReader.open(index);
+ IndexSearcher searcher = new IndexSearcher(reader);
- Collector streamingHitCollector = new Collector() {
+ DirectoryTaxonomyReader taxor = new DirectoryTaxonomyReader(taxoDir);
- private Scorer scorer;
- private int docBase;
- private IndexReader reader;
- // simply print docId and score of every matching document
- @Override
- public void collect(int docNo) throws IOException {
+ FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10), new CountFacetRequest(new CategoryPath("Pub Date"), 10));
+ FacetsCollector facetsCollector = FacetsCollector.create(fsp, reader, taxor);
+ searcher.search(new MatchAllDocsQuery(), facetsCollector);
- int docId = docBase + docNo;
- Document doc = reader.document(docId);
+ for (FacetResult fres : facetsCollector.getFacetResults()) {
+ FacetResultNode root = fres.getFacetResultNode();
+ root.value = fres.getNumValidDescendants();
+ System.out.println(root.toString());
+ }
- System.out.println("docId=" + docId + " score="
- + scorer.score() + " path=" + doc.get("path"));
- }
+ String querystr = "lucene";
+ Query q = new QueryParser(version, "title", analyzer).parse(querystr);
+
+ showResultsForQuery(searcher, q);
- @Override
- public boolean acceptsDocsOutOfOrder() {
- return true;
- }
+ DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, q);
+ ddq.add(new CategoryPath("Author", "Michael McCandless"));
+ showResultsForQuery(searcher, ddq);
- @Override
- public void setNextReader(IndexReader reader, int docBase)
- throws IOException {
- this.docBase = docBase;
- this.reader = reader;
- }
-
- @Override
- public void setScorer(Scorer scorer) throws IOException {
- this.scorer = scorer;
- }
-
- };
-
- searcher.search(query, streamingHitCollector);
-
- searcher.close();
+
+ // reader can only be closed when there
+ // is no need to access the documents any more.
reader.close();
-
- dir.close();
- // recursiveDelete(index);
+ taxor.close();
+ index.close();
+ taxoDir.close();
}
- void indexDocs(IndexWriter writer, File file) throws IOException {
- // do not try to index files that cannot be read
- if (file.canRead()) {
- if (file.isDirectory()) {
- String[] files = file.list();
- // an IO error could occur
- if (files != null) {
- for (int i = 0; i < files.length; i++) {
- indexDocs(writer, new File(file, files[i]));
- }
- }
- } else {
- System.out.println("adding " + file);
- writer.addDocument(FileDocument.Document(file));
- }
+ /**
+ * @param hitsPerPage
+ * @param searcher
+ * @param q
+ * @throws IOException
+ */
+ public void showResultsForQuery(IndexSearcher searcher,
+ Query q) throws IOException {
+ int hitsPerPage = 10;
+
+ TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
+ searcher.search(q, collector);
+ ScoreDoc[] hits = collector.topDocs().scoreDocs;
+
+ System.out.println("Found " + hits.length + " hits.");
+ for(int i=0;i
- * The document has three fields:
- *
- * path--containing the pathname of the file, as a
- * stored, untokenized field;
- * modified--containing the last modified date of the
- * file as a field as created by DateTools; and
- * contents--containing the full contents of the file,
- * as a Reader field;
- */
- public static Document Document(File f) {
-
- // make a new, empty document
- Document doc = new Document();
-
- // Add the path of the file as a field named "path". Use a
- // field that is indexed (i.e. searchable), but don't
- // tokenize the field into words.
- doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED));
-
- // Add the last modified date of the file a field named
- // "modified". Use a field that is indexed
- // (i.e. searchable), but don't tokenize the field into
- // words.
- doc.add(new Field("modified", DateTools.timeToString(f
- .lastModified(), DateTools.Resolution.MINUTE),
- Field.Store.YES, Field.Index.NOT_ANALYZED));
-
- // Add the contents of the file to a field named
- // "contents". Specify a Reader, so that the text of the
- // file is tokenized and indexed, but not stored. Note
- // that FileReader expects the file to be in the system's
- // default encoding. If that's not the case searching for
- // special characters will fail.
-
- // doc.add(new Field("contents", new FileReader(f)));
-
- // return the document
- return doc;
+ private void addDoc(IndexWriter w, FacetFields facetFields, String title, String isbn, List categories) throws IOException {
+ Document doc = new Document();
+ doc.add(new TextField("title", title, Field.Store.YES));
+ doc.add(new StringField("isbn", isbn, Field.Store.YES));
+ if (categories != null) {
+ facetFields.addFields(doc, categories);
}
-
- private FileDocument() {
- // not to be instantiated
- }
- }
-
- void recursiveDelete(File f) {
- if (f.isDirectory()) {
- for (File e : f.listFiles()) {
- recursiveDelete(e);
- }
- }
- f.delete();
+ w.addDocument(doc);
}
}