java - 使用 IntField 的 Lucene 搜索查询在文档更新后不起作用

我正在尝试使用它们的 ID 和 Intfield 的值对 Lucene 中的一组两个文档运行一个简单的查询。查询在添加这两个字段后立即正确返回它们。现在我使用检索到的文档并更改 CONTEXT_FIELD(查询中未使用)并更新索引中的文档。

有趣的是，现在搜索没有返回任何结果，无论是旧文档还是新文档。如果我只在查询中使用 METHOD_NAME 字段，一切都按预期工作，问题似乎是 NUMBER_OF_ARGUMENTS IntField。

为什么会这样？

示例代码:

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneDemo {

private static final String ID1 = "Great#text";
private static final String ID2 = "Another#bonus";

    private static final String METHOD_NAME_FIELD = "method_name";
    private static final String NUMBER_OF_ARGUMENTS = "number_of_arguments";
    private static final String CONTEXT_FIELD = "context";

    /** Parser used to parse queries */
    private static QueryParser parser = new QueryParser(Version.LUCENE_43,
            METHOD_NAME_FIELD, createDefaultAnalyzer());

    public static void main(String[] args) throws IOException, ParseException {
        IndexWriter luceneIndexWriter = new IndexWriter(
                FSDirectory.open(new File("/tmp/test")), createWriterConfig(64));
        Document doc1 = createDocument(ID1, "context1", 1);
        luceneIndexWriter.addDocument(doc1);
        Document doc2 = createDocument(ID2, "context2", 2);
        luceneIndexWriter.addDocument(doc2);

        System.out.println("Found doc1: "
                + findDocument(ID1, 1, luceneIndexWriter));
        System.out.println("Found doc2: "
                + findDocument(ID2, 2, luceneIndexWriter));
        doc1 = findDocument(ID1, 1, luceneIndexWriter);

        // Section 1
        doc1.removeField(CONTEXT_FIELD);
        doc1.add(new TextField(CONTEXT_FIELD, "context1_changed",
                Field.Store.YES));
        luceneIndexWriter.updateDocument(new Term(METHOD_NAME_FIELD, "text"),
                doc1);

        System.out.println("Found doc1: "
                + findDocument(ID1, 1, luceneIndexWriter));
        System.out.println("Found doc2: "
                + findDocument(ID2, 2, luceneIndexWriter));

        // Section 2
        // doc1 = findDocument(ID1, 1, luceneIndexWriter); <- null
        doc1.removeField(CONTEXT_FIELD);
        doc1.add(new TextField(CONTEXT_FIELD, "context1_changed2",
                Field.Store.YES));
        luceneIndexWriter.updateDocument(new Term(METHOD_NAME_FIELD, "text"),
                doc1);

        System.out.println("Found doc1: "
                + findDocument(ID1, 1, luceneIndexWriter));
        System.out.println("Found doc2: "
                + findDocument(ID2, 2, luceneIndexWriter));

        luceneIndexWriter.close();
    }

    private static Document createDocument(String id, String context, int value) {
        Document doc = new Document();
        doc.add(new TextField(METHOD_NAME_FIELD, id, Field.Store.YES));
        doc.add(new TextField(CONTEXT_FIELD, context, Field.Store.YES));
        doc.add(new IntField(NUMBER_OF_ARGUMENTS, value, Field.Store.YES));
        return doc;
    }

    private static Document findDocument(String id, int value,
            IndexWriter luceneIndexWriter) throws IOException, ParseException {
        DirectoryReader reader = DirectoryReader.open(luceneIndexWriter, true);
        IndexSearcher searcher = new IndexSearcher(reader);
        String[] split = id.split("#");
        Query methodQuery = parser.parse(split[1]);
        Query classQuery = parser.parse(split[0]);
        NumericRangeQuery<Integer> range = NumericRangeQuery.newIntRange(
                NUMBER_OF_ARGUMENTS, 1, value, value, true, true);
        BooleanQuery query = new BooleanQuery();
        query.add(methodQuery, Occur.MUST);
        query.add(classQuery, Occur.MUST);
        query.add(range, Occur.MUST);
        TopDocs result = searcher.search(query, 1);
        if (result.totalHits == 0) {
            System.err.println("Problem, nothing found (Method: " + id + ")");
            return null;
        }
        Document document = searcher.doc(result.scoreDocs[0].doc);
        if (document.get(METHOD_NAME_FIELD).equals(id)) {
            return document;
        }
        return null;
    }

    /** create the analyzer used */
    private static Analyzer createDefaultAnalyzer() {
        Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
        analyzerPerField.put(NUMBER_OF_ARGUMENTS, new KeywordAnalyzer());
        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
                new SimpleAnalyzer(Version.LUCENE_43), analyzerPerField);
        return analyzer;
    }

    /** Creates the configuration used for writing. */
    public static IndexWriterConfig createWriterConfig(double ramBufferSizeMB) {
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43,
                createDefaultAnalyzer());
        config.setRAMBufferSizeMB(ramBufferSizeMB);
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        config.setMaxBufferedDeleteTerms(1); // desperate try at
        config.setMaxBufferedDocs(2); // storing everything correctly right away
        // config.setInfoStream(System.out); <- set this for more output
        return config;
    }
}

输出:

Found doc1: Document<stored,indexed,tokenized<method_name:Great#text> stored,indexed,tokenized<context:context1> stored<number_of_arguments:1>>
Found doc2: Document<stored,indexed,tokenized<method_name:Another#bonus> stored,indexed,tokenized<context:context2> stored<number_of_arguments:2>>
Problem, nothing found (Method: Great#text)
Found doc1: null
Found doc2: Document<stored,indexed,tokenized<method_name:Another#bonus> stored,indexed,tokenized<context:context2> stored<number_of_arguments:2>>
Problem, nothing found (Method: Great#text)
Found doc1: null
Found doc2: Document<stored,indexed,tokenized<method_name:Another#bonus> stored,indexed,tokenized<context:context2> stored<number_of_arguments:2>>

包括 config.setInfoStream(System.out) 的输出:

http://bpaste.net/show/ko8kkxeFxZFE26NuecZc/ (太长了，抱歉)

最佳答案

问题是您将 NUMBER_OF_ARGUMENTS 字段索引为 IntField，但从索引返回的版本是 StoredField。当您重新索引它时，它不再被格式化为 IntField，因此 NumericRangeQuery 不会得到任何结果。您可以简单地将 NUMBER_OF_ARGUMENTS 上的查询词设置为 Occur.SHOULD 子句，以查看问题出在该字段上。

一种解决方案是手动将该字段重新添加到文档中，例如:

public static void main(String[] args) throws IOException, ParseException {
    IndexWriter luceneIndexWriter = new IndexWriter(
            FSDirectory.open(new File("/tmp/test")), createWriterConfig(64));
    Document doc1 = createDocument(ID1, "context1", 1);
    luceneIndexWriter.addDocument(doc1);
    Document doc2 = createDocument(ID2, "context2", 2);
    luceneIndexWriter.addDocument(doc2);

    System.out.println("Found doc1: "
            + findDocument(ID1, 1, luceneIndexWriter));
    System.out.println("Found doc2: "
            + findDocument(ID2, 2, luceneIndexWriter));
    doc1 = findDocument(ID1, 1, luceneIndexWriter);

    // Section 1
    doc1.removeField(CONTEXT_FIELD);
    doc1.add(new TextField(CONTEXT_FIELD, "context1_changed",
            Field.Store.YES));

    //re-adding the IntField here
    Number num = doc1.getField(NUMBER_OF_ARGUMENTS).numericValue();
    doc1.removeField(NUMBER_OF_ARGUMENTS);
    doc1.add(new IntField(NUMBER_OF_ARGUMENTS, num.intValue(),
            Field.Store.YES));

    luceneIndexWriter.updateDocument(new Term(METHOD_NAME_FIELD, "text"),
            doc1);

    System.out.println("Found doc1: "
            + findDocument(ID1, 1, luceneIndexWriter));
    System.out.println("Found doc2: "
            + findDocument(ID2, 2, luceneIndexWriter));

    // Section 2
    doc1 = findDocument(ID1, 1, luceneIndexWriter);
    doc1.removeField(CONTEXT_FIELD);
    doc1.add(new TextField(CONTEXT_FIELD, "context1_changed2",
            Field.Store.YES));
    luceneIndexWriter.updateDocument(new Term(METHOD_NAME_FIELD, "text"),
            doc1);
    num = doc1.getField(NUMBER_OF_ARGUMENTS).numericValue();
    doc1.removeField(NUMBER_OF_ARGUMENTS);
    doc1.add(new IntField(NUMBER_OF_ARGUMENTS, num.intValue(),
            Field.Store.YES));
    luceneIndexWriter.updateDocument(new Term(METHOD_NAME_FIELD, "text"),
            doc1);

    System.out.println("Found doc1: "
            + findDocument(ID1, 1, luceneIndexWriter));
    System.out.println("Found doc2: "
            + findDocument(ID2, 2, luceneIndexWriter));

    luceneIndexWriter.close();
}

更安全的方法是构建一个新的替换文档，而不是尝试修改和保留从索引中提取的文档。从索引中检索到的文档的存储版本肯定会缺少有关应如何为该字段编制索引的大量信息。

旁注，在创建构建小型索引的测试函数时，我会使用:

config.setOpenMode(OpenMode.CREATE);

而不是 CREATE_OR_APPEND。这允许您从一个空索引开始，因此结果更容易预测，并且您可以在每次构建新索引时查看索引的内容，以进行调试，例如:

public static void outputTheWholeThing(IndexWriter writer) throws IOException {
    DirectoryReader reader = DirectoryReader.open(writer, true);
    for (int i=0; i<reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        System.out.println(doc);
    }
    System.out.println("Pending deletions:" + reader.numDeletedDocs());
}

关于java - 使用 IntField 的 Lucene 搜索查询在文档更新后不起作用，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/17109371/

java - 使用 IntField 的 Lucene 搜索查询在文档更新后不起作用

上一篇：java - 我们应该使用什么方法来代替 Hazelcast.getMap ("Map")？

下一篇：java - 使用 Java 按 HashMap 类中的值排序