diff --git a/pom.xml b/pom.xml index 3ab8d66..701dfbf 100644 --- a/pom.xml +++ b/pom.xml @@ -172,8 +172,25 @@ org.apache.lucene lucene-core - 3.0.0 + 4.4.0 + + org.apache.lucene + lucene-queryparser + 4.4.0 + + + org.apache.lucene + lucene-facet + 4.4.0 + + org.slf4j slf4j-api diff --git a/src/test/java/org/forkalsrud/album/index/LuceneTest.java b/src/test/java/org/forkalsrud/album/index/LuceneTest.java index 32295d9..ab35d29 100644 --- a/src/test/java/org/forkalsrud/album/index/LuceneTest.java +++ b/src/test/java/org/forkalsrud/album/index/LuceneTest.java @@ -3,26 +3,39 @@ */ package org.forkalsrud.album.index; -import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.CountFacetRequest; +import org.apache.lucene.facet.search.DrillDownQuery; +import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.search.FacetResultNode; +import org.apache.lucene.facet.search.FacetsCollector; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.Collector; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; -import org.junit.Ignore; import org.junit.Test; /** @@ -31,159 +44,103 @@ import org.junit.Test; */ public class LuceneTest { - Version version = Version.LUCENE_29; + Version version = Version.LUCENE_44; - @Ignore @Test public void testIndexCreation() throws Exception { - File index = new File("/Users/knut/Desktop/albumidx"); - recursiveDelete(index); - index.mkdirs(); + StandardAnalyzer analyzer = new StandardAnalyzer(version); + Directory index = new RAMDirectory(); + Directory taxoDir = new RAMDirectory(); - Directory dir = FSDirectory.open(index); + IndexWriterConfig config = new IndexWriterConfig(version, analyzer); + IndexWriter w = new IndexWriter(index, config); - IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer( - Version.LUCENE_CURRENT), true, - IndexWriter.MaxFieldLength.LIMITED); + DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); + FacetFields facetFields = new FacetFields(taxoWriter); - // System.out.println("Indexing to directory '" + index + "'..."); - indexDocs(writer, new File("photos")); - System.out.println("Optimizing..."); - writer.optimize(); - writer.close(); + List book1 = new ArrayList(); + book1.add(new CategoryPath("Author", "Erik Hatcher")); + book1.add(new CategoryPath("Author", "Otis Gospodnetić")); + book1.add(new CategoryPath("Pub Date", "2004", "December", "1")); - IndexReader reader = IndexReader.open(dir, true); - // only searching, so read-only=true + List book2 = new ArrayList(); + book2.add(new CategoryPath("Author", "Michael McCandless")); + book2.add(new CategoryPath("Author", "Erik Hatcher")); + book2.add(new CategoryPath("Author", "Otis Gospodnetić")); + book2.add(new CategoryPath("Pub Date", "2010", "July", "28")); + + addDoc(w, facetFields, "Lucene in Action", "193398817", book1); + addDoc(w, facetFields, "Lucene for Dummies", "55320055Z", book2); + addDoc(w, facetFields, "Managing Gigabytes", "55063554A", null); + addDoc(w, facetFields, "The Art of Computer Science", "9900333X", null); + w.close(); + taxoWriter.close(); - Searcher searcher = new IndexSearcher(reader); - Analyzer analyzer = new StandardAnalyzer(version); - String field = "path"; - QueryParser parser = new QueryParser(version, field, analyzer); - Query query = parser.parse("geiranger"); - System.out.println("Searching for: " + query.toString(field)); + IndexReader reader = DirectoryReader.open(index); + IndexSearcher searcher = new IndexSearcher(reader); - Collector streamingHitCollector = new Collector() { + DirectoryTaxonomyReader taxor = new DirectoryTaxonomyReader(taxoDir); - private Scorer scorer; - private int docBase; - private IndexReader reader; - // simply print docId and score of every matching document - @Override - public void collect(int docNo) throws IOException { + FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10), new CountFacetRequest(new CategoryPath("Pub Date"), 10)); + FacetsCollector facetsCollector = FacetsCollector.create(fsp, reader, taxor); + searcher.search(new MatchAllDocsQuery(), facetsCollector); - int docId = docBase + docNo; - Document doc = reader.document(docId); + for (FacetResult fres : facetsCollector.getFacetResults()) { + FacetResultNode root = fres.getFacetResultNode(); + root.value = fres.getNumValidDescendants(); + System.out.println(root.toString()); + } - System.out.println("docId=" + docId + " score=" - + scorer.score() + " path=" + doc.get("path")); - } + String querystr = "lucene"; + Query q = new QueryParser(version, "title", analyzer).parse(querystr); + + showResultsForQuery(searcher, q); - @Override - public boolean acceptsDocsOutOfOrder() { - return true; - } + DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, q); + ddq.add(new CategoryPath("Author", "Michael McCandless")); + showResultsForQuery(searcher, ddq); - @Override - public void setNextReader(IndexReader reader, int docBase) - throws IOException { - this.docBase = docBase; - this.reader = reader; - } - - @Override - public void setScorer(Scorer scorer) throws IOException { - this.scorer = scorer; - } - - }; - - searcher.search(query, streamingHitCollector); - - searcher.close(); + + // reader can only be closed when there + // is no need to access the documents any more. reader.close(); - - dir.close(); - // recursiveDelete(index); + taxor.close(); + index.close(); + taxoDir.close(); } - void indexDocs(IndexWriter writer, File file) throws IOException { - // do not try to index files that cannot be read - if (file.canRead()) { - if (file.isDirectory()) { - String[] files = file.list(); - // an IO error could occur - if (files != null) { - for (int i = 0; i < files.length; i++) { - indexDocs(writer, new File(file, files[i])); - } - } - } else { - System.out.println("adding " + file); - writer.addDocument(FileDocument.Document(file)); - } + /** + * @param hitsPerPage + * @param searcher + * @param q + * @throws IOException + */ + public void showResultsForQuery(IndexSearcher searcher, + Query q) throws IOException { + int hitsPerPage = 10; + + TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); + searcher.search(q, collector); + ScoreDoc[] hits = collector.topDocs().scoreDocs; + + System.out.println("Found " + hits.length + " hits."); + for(int i=0;i - * The document has three fields: - *
    - *
  • path--containing the pathname of the file, as a - * stored, untokenized field; - *
  • modified--containing the last modified date of the - * file as a field as created by DateTools; and - *
  • contents--containing the full contents of the file, - * as a Reader field; - */ - public static Document Document(File f) { - - // make a new, empty document - Document doc = new Document(); - - // Add the path of the file as a field named "path". Use a - // field that is indexed (i.e. searchable), but don't - // tokenize the field into words. - doc.add(new Field("path", f.getPath()/*.replaceAll(File.separator, " ")*/, Field.Store.YES, Field.Index.ANALYZED)); - - // Add the last modified date of the file a field named - // "modified". Use a field that is indexed - // (i.e. searchable), but don't tokenize the field into - // words. - doc.add(new Field("modified", DateTools.timeToString(f - .lastModified(), DateTools.Resolution.MINUTE), - Field.Store.YES, Field.Index.NOT_ANALYZED)); - - // Add the contents of the file to a field named - // "contents". Specify a Reader, so that the text of the - // file is tokenized and indexed, but not stored. Note - // that FileReader expects the file to be in the system's - // default encoding. If that's not the case searching for - // special characters will fail. - - // doc.add(new Field("contents", new FileReader(f))); - - // return the document - return doc; + private void addDoc(IndexWriter w, FacetFields facetFields, String title, String isbn, List categories) throws IOException { + Document doc = new Document(); + doc.add(new TextField("title", title, Field.Store.YES)); + doc.add(new StringField("isbn", isbn, Field.Store.YES)); + if (categories != null) { + facetFields.addFields(doc, categories); } - - private FileDocument() { - // not to be instantiated - } - } - - void recursiveDelete(File f) { - if (f.isDirectory()) { - for (File e : f.listFiles()) { - recursiveDelete(e); - } - } - f.delete(); + w.addDocument(doc); } }