ElasticSearch synonym and word delimiter analyzer are not compatible

1.5k views Asked by At

I have a below mapping document, which to be precise does apply word delimiter analyzer at index and search time specifically only to model field and synonym analyzer which does a search time analysis on search string.

Mapping

POST /stackoverflow
{
"settings":{
    "analysis":{
        "analyzer":{
            "keyword_analyzer":{
                "tokenizer":"keyword",
                "filter":[
                    "lowercase",
                    "asciifolding"
                ]
            },
            "synonym_analyzer":{
                "tokenizer":"standard",
                "filter":[
                    "lowercase",
                    "synonym"
                ],
                "expand":false,
                "ignore_case":true
            },
            "word_delimiter_analyzer":{
                "tokenizer":"whitespace",
                "filter":[
                    "lowercase",
                    "word_delimiter"

                ],
                "ignore_case":true
            }
        },
        "filter":{
            "synonym":{
                "type":"synonym",
                "synonyms_path":"synonyms.txt"
            },
            "word_delimiter":{
              "type":"word_delimiter",
              "generate_word_parts":true,
              "preserve_original": true
            }
        }
    }
},
"mappings":{
    "vehicles":{
        "dynamic":"false",
        "dynamic_templates":[
            {
                "no_index_template":{
                    "match":"*",
                    "mapping":{
                        "index":"no",
                        "include_in_all":false
                    }
                }
            }
        ],
        "_all":{
            "enabled":false
        },
        "properties":{
            "id":{
                "type":"long",
                "ignore_malformed":true
            },
            "model":{
                "type":"nested",
                "include_in_root":true,
                "properties":{
                    "label":{
                        "type":"string",
                        "analyzer": "word_delimiter_analyzer"
                    }
                }
            },
            "make":{
                "type":"String",
                "analyzer":"keyword_analyzer"
            }
        }
    }
}
}

and some sample data is

POST /stackoverflow/vehicles/6
{

    "make" : "chevrolet",
    "model" : {
       "label" : "Silverado 2500HD"
    }
}

The below is the search query

GET /stackoverflow/_search?explain
{  
   "from":0,
   "size":10,
   "query":{  
       "filtered":{  
         "query":{ 
         "multi_match":{  
            "query":"HD2500",
             "fields":[  
                "make","model.label"
              ],
            "type":"cross_fields","operator" : "OR",
            "analyzer" : "synonym_analyzer"
          }
       }
    }
   }
 }

THe above search query does not work, rather if i remove the synonym_analzer from the search query it works perfectly fine. I really dont understand the logic behind how synonym analyzer is tampering the result.

In my synonym.txt file i dont have any reference to HD2500, and all the synonym analyzer does is split the token via whitespace and converts it to lowercase and then try to match a synonym string and then passes it to field level analyzers, i am confused where it is getting broken.

Any help is highly appreciated

0

There are 0 answers