Using Lucene 9.10.0 MemoryIndex in Java to ingest and search IntField and use rangequery

46 views Asked by At

I am trying to overcome integer range query problem with MemoryIndex. ingesting 2 fields status which is string field and portno which is int field. If the port range is within 50 to 90, input should match. When I am using String representation of portno, getting unwanted results due to number indexed as String. But when I try to use IntField for portno, it is not matching valid numbers within range too. Code and output as follows:

Criteria: status: Open AND portno: [50 TO 90]

portno: 4 Criteria matched: false

portno: 5 Criteria matched: false

portno: 6 Criteria matched: true

portno: 7 Criteria matched: true

portno: 8 Criteria matched: true

portno: 9 Criteria matched: true

portno: 49 Criteria matched: false

portno: 50 Criteria matched: true

portno: 89 Criteria matched: true

portno: 90 Criteria matched: true

portno: 91 Criteria matched: false

portno: 100 Criteria matched: false

=>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Criteria: status: Open AND portno: [50 TO 90]

portno: 4 Criteria matched: false

portno: 5 Criteria matched: false

portno: 6 Criteria matched: false

portno: 7 Criteria matched: false

portno: 8 Criteria matched: false

portno: 9 Criteria matched: false

portno: 49 Criteria matched: false

portno: 50 Criteria matched: false

portno: 89 Criteria matched: false

portno: 90 Criteria matched: false

portno: 91 Criteria matched: false

portno: 100 Criteria matched: false


import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;

import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class CriteriaEvaluatorTest {

    private static void indexNumberAsString() {
        List<Integer> ilist = List.of(4, 5, 6, 7, 8, 9, 49, 50, 89, 90, 91, 100);
        String criteria = "status: Open AND portno: [50 TO 90]";
        System.out.println("Criteria: " + criteria);
        ilist.forEach(number -> {
                    MemoryIndex memoryIndex = new MemoryIndex();
                    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
                    Map<String, Object> fmap = new HashMap<>();
                    fmap.put("status", "Open");
                    fmap.put("portno", String.valueOf(number));
                    for (Map.Entry<String, Object> entry : fmap.entrySet()) {
                        String fldName = entry.getKey();
                        String fldValue = entry.getValue().toString();
                        memoryIndex.addField(fldName, fldValue, analyzer);
                    }
                    IndexSearcher indexSearcher = memoryIndex.createSearcher();
                    String[] fldArray = new String[3];
                    int j = 0;
                    for (String fld : fmap.keySet()) {
                        fldArray[j++] = fld;
                    }
                    MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(fldArray, analyzer);
                    try {
                        Query query = multiFieldQueryParser.parse(criteria);
                        TopDocs topDocs = indexSearcher.search(query, 1);
                        boolean result = topDocs.totalHits.value > 0;
                        System.out.println("portno: " + number + " Criteria matched: " + result);
                    } catch (Exception e) {
                        String message = e.getMessage();
                    }
                }
        );
    }

    private static void indexNumberAsIntPoint() {
        List<Integer> ilist = List.of(4, 5, 6, 7, 8, 9, 49, 50, 89, 90, 91, 100);
        String criteria = "status: Open AND portno: [50 TO 90]";
        System.out.println("Criteria: " + criteria);
        ilist.forEach(number -> {
                    MemoryIndex memoryIndex = new MemoryIndex(true, true);
                    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
                    Map<String, Object> fmap = new HashMap<>();
                    fmap.put("status", "Open");
                    fmap.put("portno", number);
                    for (Map.Entry<String, Object> entry : fmap.entrySet()) {
                        String fldName = entry.getKey();
                        String fldValue = entry.getValue().toString();
                        if (fldName.equals("portno")) {
                            IntField intField = new IntField("portno", number, Field.Store.YES);
                            memoryIndex.addField(intField, analyzer);
                        } else {
                            memoryIndex.addField(fldName, fldValue, analyzer);
                        }
                    }
                    IndexSearcher indexSearcher = memoryIndex.createSearcher();
                    String[] fldArray = new String[3];
                    int j = 0;
                    for (String fld : fmap.keySet()) {
                        fldArray[j++] = fld;
                    }
                    MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(fldArray, analyzer);
                    try {
                        Query query = multiFieldQueryParser.parse(criteria);
                        TopDocs topDocs = indexSearcher.search(query, 1);
                        boolean result = topDocs.totalHits.value > 0;
                        System.out.println("portno: " + number + " Criteria matched: " + result);
                    } catch (Exception e) {
                        String message = e.getMessage();
                    }
                }
        );
    }

    public static void main(String args[]) {
        indexNumberAsString();
        System.out.println("=>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
        indexNumberAsIntPoint();
    }
1

There are 1 answers

0
ParagJ On

Found solution:

  1. While adding fields in MemoryIndex, add as LongField (or IntField):
if(LONG_FIELDS.contains(fldName)) {
  LongField longField = new LongField(fldName, Long.valueOf(fldValue), Field.Store.YES);
  memoryIndex.addField(longField, analyzer);
} else {
  memoryIndex.addField(fldName, fldValue, analyzer);
}
  1. Add a custom parser: So for specific Long/Int Fields, range queries are working properly.
public class CustomCriteriaParser extends MultiFieldQueryParser {
  public CustomCriteriaParser(String[] f, List<String> lflist, Analyzer a) {
    super(f, a);
    if (Utils.notNullAndEmpty(lflist)) {
      this.longFields.addAll(lflist);
    }
  }
            
  private List<String> longFields = new ArrayList<>();
            
  protected Query getFieldQuery(String field, String queryText, boolean quoted) {
    return newRangeQuery(field, queryText, queryText, true, true);
  }
            
  protected Query newRangeQuery(String field, String fromValue, String toValue, boolean startInclusive, boolean endInclusive) {
    if (this.longFields.contains(field)) {
      return LongField.newRangeQuery(field, Long.valueOf(fromValue), Long.valueOf(toValue));
    }
    return (TermRangeQuery) super.newRangeQuery(field, fromValue, toValue, startInclusive, endInclusive);
  }
}