Java Lucene 4.5 how to search by case insensitive

4.7k views Asked by At

We have implemented Java Lucene search engine 4.5, I am trying to search the content even if the field value is case insensitive (e.g., if I search a city with name "Banglore" I get a result, but when I search a city with name "banglore" I get 0 results).

I have used StandardAnalyzer for analyzing the data and WildcardQuery to match a Like condition (I tried as mentioned here without success).

I am not sure where I have gone wrong. I appreciate any guidance on fixing this case sensitivity problem.

public SearchHelper
{
    Analyzer analyzer;

    Directory index;
    public IndexSearcher searcher = null;
    public IndexWriter indexWriter = null;
    public QueryParser parser = null;
    private static int hitsPerPage = 100;

    /**
     * @param indexFileLocation
     * @throws IOException
     */
    public SearchHelper(String indexFileLocation) throws IOException
    {
//        this.analyzer =new StandardAnalyzer();
        this.analyzer = new CaseStandardAnalyzer();
//        analyzer = new ThaiAnalyzer();
        this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation));
    }

    /**
     * @param create
     * @return
     * @throws IOException
     */
    public IndexWriter getIndexWriter(boolean create) throws IOException
    {
        if (indexWriter == null)
        {
            IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer);
            this.indexWriter = new IndexWriter(this.index, iwc);
        }
        return this.indexWriter;
    } //End of getIndexWriter

    /**
     * @throws IOException
     */
    public void closeIndexWriter() throws IOException
    {
        if (this.indexWriter != null)
        {
             this.indexWriter.commit();//optimize(); LUCENE_36
             this.indexWriter.close();
        }
    } //End closeIndexWriter

    /**
     * @param indexFileLocation
     * @throws CorruptIndexException
     * @throws IOException
     */
    public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException
    {
//        searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation)));

        IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)));
//        IndexReader.open(this.index);
//        open(getIndexWriter(true), true);
        this.searcher = new IndexSearcher(reader);
    }

    /**
     * @param fieldNames
     * @param fieldValues
     * @return
     * @throws IOException
     * @throws ParseException
     * 
     * <p></p>
     * https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
     */
    public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException
    {
        this.analyzer = new StandardAnalyzer();
        int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length;

        BooleanQuery booleanQuery = new BooleanQuery();

        for (int i = 0; i < searchFieldSize; i++)
        {
             Query query1 = searchIndexWithWildcardQuery(fieldNames[i], fieldValues[i]);                
             addQueries(booleanQuery, query1, 2);               
        }

        TopScoreDocCollector collector = null; // Or use by default hitsPerPage instead limitSize

        if (limitSize > 0)
        {
            collector = TopScoreDocCollector.create(limitSize);
        } else {
            collector = TopScoreDocCollector.create(hitsPerPage);
        }

        this.searcher.search(booleanQuery,collector);

        return  collector.topDocs().scoreDocs;
    }

    /**
     * @param whichField
     * @param searchString
     * @return
     * @throws IOException
     * @throws ParseException
     */
    public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException
    {
        Term term = addTerm(whichField, "*" + searchString + "*");
        Query query = new WildcardQuery(term);
        return query;
    }

    /**
     * @param whichField
     * @param searchString
     * @return
     */
    public Term addTerm(String whichField, String searchString)
    {
        Term term = new Term(whichField, searchString);
        return term;
    }

    /**
     * @param searchString
     * @param operation
     * @return
     * @throws ParseException
     */
    public Query addConditionOpertaion(String searchString, String operation) throws ParseException
    {
        Query query = null;
        if ("and".equals(operation))
        {
            parser.setDefaultOperator(QueryParser.AND_OPERATOR);
        } else if("or".equals(operation)) {
            parser.setDefaultOperator(QueryParser.AND_OPERATOR);
        }

        query = parser.parse(searchString);
        return query;
    }

    /**
     * @param booleanQuery <code>BooleanQuery</code>
     * @param q <code>Query</code>
     * @param type <code>int</code> , 1--> Must, 2-->Should, 3 --> Must Not
     */
    public void addQueries(BooleanQuery booleanQuery, Query q, int type)
    {
        switch(type)
        {
            case 1: booleanQuery.add(q, Occur.MUST);
                    break;
            case 2: booleanQuery.add(q, Occur.SHOULD);
                    break;
            default:booleanQuery.add(q, Occur.MUST_NOT);
                    break;
        } //End of switch
    }

    public QueryParser getParser()
    {
        return parser;
    }

    public void setParser(String fieldName)
    {
        this.parser = new QueryParser(fieldName, this.analyzer);
    }

    public void getDefaultByStatus(int status)
    {
        this.analyzer = new StandardAnalyzer();
        this.parser = new QueryParser("status", this.analyzer);
    }

    protected void doClear(File dir,boolean deleteSubDir)
    {
        for (File file: dir.listFiles())
        {
            if (file.isDirectory() && deleteSubDir)
            {
                doClear(file,deleteSubDir);
            }
            file.delete();
        }
    } //End of doClear();

    protected void doClose() throws IOException
    {
        this.searcher.getIndexReader().close();
    }

    public boolean add(Object Obj) throws Exception
    {
        User currentUser = (User)Obj;
        boolean isAdded = false;

        org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
        luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
        luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
        luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
        luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
        luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
        luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
        luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));

//        addRelatedFields(luceneDoc,city.getStateCode());

        IndexWriter writer = getIndexWriter(false);
        writer.addDocument(luceneDoc);

        closeIndexWriter();

        isAdded = true;
        System.out.println(isAdded);
        return isAdded;
    } // End of add

    public boolean update(Object Obj) throws Exception
    {
        boolean isUpdated = false;
        User currentUser = (User) Obj;

        org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
//        luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
        luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
        luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
        luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
        luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
        luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
        luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
        luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));

//        addRelatedFields(luceneDoc,city.getStateCode());

        IndexWriter writer = getIndexWriter(false);
        writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc); 
        closeIndexWriter();

        isUpdated = true;
        return isUpdated;
    } // End of update

    public boolean delete(Object Obj) throws Exception
    {
        boolean isDeleted = false;
        User currentUser = (User) Obj;      

        Term deleteTerm = new Term("login", currentUser.getLogin());

        IndexWriter writer = getIndexWriter(false);
        writer.deleteDocuments(deleteTerm); // Or use Query
        writer.forceMergeDeletes();
        closeIndexWriter();

        isDeleted = true;

        return isDeleted;
    } // End of delete

    @Override
    public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception
    {
        Object obj = null;
        org.apache.lucene.search.ScoreDoc[] hits =  searchSEO(fieldNames,fieldValues,  limit);
        int hitSize = (null == hits) ? 0 : hits.length;

        System.out.println("total:" + hitSize);

        doClose();
        return obj;
    } // End of search

    public void addThreadUser()
    {
        User user = new User();
        addUserPojo(user);    
        add(user);
    }

    public void updateThreadUser()
    {
        User user = new User();
        addUserPojo(user);
        update(user);
    }

    public void deleteThreadUser()
    {
        User user = new User();
        addUserPojo(user);   
        delete(user);
    }

    private void addUserPojo(User user)
    {
        user.setOid(3);
        user.setLogin("senthil");
        user.setFirstName("Semthil");
        user.setLastName("Semthil");
        user.setStatus(1);
        user.setCity("Combiatore");
        user.setEmailId("[email protected]");
    }

    public void searchUser()
    {
        searchUser(new String[] {"login"}, new String[] {"Se"}, null);
    }

    public static void main(String[] args)
    {
        SearchHelper test = new SearchHelper();
        test.searchUser();
    }
}
3

There are 3 answers

1
Parker On

Use the LowerCaseFilter as the post you referenced suggests:

    TokenStream stream = new StandardFilter(Version.LUCENE_CURRENT, tokenizer);
    stream = new LowerCaseFilter(Version.LUCENE_CURRENT, stream);

A more complete example is in this post.

3
knutwalker On

You are usingStringField to index your data but this field will bypass the analyzer chain and always index your term verbatim as one token, regardless of your analyzer. You should use TextField if you want to have your data analyzed and the StandardAnalyzer already does lower-casing. Other than that, the WildcardQuery does not analyze its term, so if you search for Banglore, it won't match the now-lower-case banglore from the index. You have to lowercase the searchterm yourself (or use an analyzer on it).

0
Birbal Singh On

You can use custome compare class

class CaseIgonreCompare extends FieldComparator<String>{

private String field;
private String bottom;
private String topValue;
private BinaryDocValues cache;
private String[] values;

public CaseIgonreCompare(String field, int numHits) {
    this.field = field;
    this.values = new String[numHits];
}

@Override
public int compare(int arg0, int arg1) {
    return compareValues(values[arg0], values[arg1]);
}

@Override
public int compareBottom(int arg0) throws IOException {
    return compareValues(bottom, cache.get(arg0).utf8ToString());
}

@Override
public int compareTop(int arg0) throws IOException {
    return compareValues(topValue, cache.get(arg0).utf8ToString());
}

public int compareValues(String first, String second) {
    int val = first.length() - second.length();
    return val == 0 ? first.compareToIgnoreCase(second) : val;
};

@Override
public void copy(int arg0, int arg1) throws IOException {
   values[arg0] = cache.get(arg1).utf8ToString();
}

@Override
public void setBottom(int arg0) {
    this.bottom  = values[arg0];
}

@Override
public FieldComparator<String> setNextReader(AtomicReaderContext arg0)
        throws IOException {
    this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(), 
            field  , true);
    return this;
}

@Override
public void setTopValue(String arg0) {
    this.topValue = arg0;
}

@Override
public String value(int arg0) {
    return values[arg0];
}
}