Elasticsearch Aggregations with two nested fields

35 views Asked by At

I have a Elasticsearch index for testing with following mapping having two nested fields:

test/
{
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "observation_session_id": {
        "type": "long"
      },
      "hazard_categories": {
        "type": "nested",
        "include_in_root": true,
        "properties": {
          "sentence": {
            "type": "keyword"
          },
          "entities": {
            "type": "nested",
            "include_in_root": true,
            "properties": {
              "key": {
                "type": "keyword"
              },
              "value": {
                "type": "byte"
              }
            }
          },
          "category": {
            "type": "nested",
            "include_in_root": true,
            "properties": {
              "key": {
                "type": "keyword"
              },
              "value": {
                "type": "byte"
              }
            }
          }
        }
      }
    }
  }
}

And I am indexing following documents:

test/_doc/1
{
  "observation_session_id": 1,
  "hazard_categories": [{
    "sentence": "Test Sentence 1",
    "entities": [
      {
        "key": "spilled",
        "value": 1
      },
      {
        "key": "clean up",
        "value": 1
      }
    ],
    "category": [
      {
        "key": "Housekeeping",
        "value": 1
      },
      {
        "key": "Controls",
        "value": 1
      }
    ]
  },
 {
    "sentence": "Test Sentence 1.1",
    "category": [
      {
        "key": "PPE",
        "value": 1
      },
      {
        "key": "Housekeeping",
        "value": 1
      }
    ]
  }]
}

test/_doc/2
{
  "observation_session_id": 2,
  "hazard_categories": [{
    "sentence": "Test Sentence 2",
    "entities": [
      {
        "key": "spilled",
        "value": 1
      }
    ],
    "category": [
      {
        "key": "Housekeeping",
        "value": 1
      }
    ]
  },
 {
    "sentence": "Test Sentence 2.1",
    "entities": [
      {
        "key": "monitoring",
        "value": 1
      }
    ],
    "category": [
      {
        "key": "Controls",
        "value": 1
      }
    ]
  },
  {
    "sentence": "Test Sentence 2.2",
    "entities": [
      {
        "key": "gloves",
        "value": 1
      },
       {
        "key": "ppe kit",
        "value": 1
      }
    ],
    "category": [
      {
        "key": "PPE",
        "value": 1
      }
    ]
  }]
}

test/_doc/3
{
  "observation_session_id": 3,
  "hazard_categories": [{
    "sentence": "Test Sentence 3",
   
    "category": [
      {
        "key": "Housekeeping",
        "value": 1
      },
      {
        "key": "Controls",
        "value": 1
      },
      {
        "key": "PPE",
        "value": 1
      },
      {
        "key": "Care",
        "value": 1
      }
    ]
  }]
}

/test/_doc/4
{
  "observation_session_id": 4,
  "hazard_categories": [{
    "sentence": "Test Sentence 4",
    "entities": [
      {
        "key": "cover",
        "value": 1
      },
       {
        "key": "wire",
        "value": 1
      }
    ],
    "category": [
      {
        "key": "Mobile Equipment",
        "value": 1
      },
      {
        "key": "Electricals",
        "value": 1
      },
      {
        "key": "Lifting",
        "value": 1
      }
    ]
  }]
}

I want to get list of entities for a matching category.

Using this query I am getting matching documents but want to retain only the matching entities, since it returns non matching entities too from the same document.

{
  "query": {
    "nested": {
      "path": "hazard_categories",
      "query": {
        "bool": {
          "must": [
            {
              "nested": {
                "path": "hazard_categories.category",
                "query": {
                  "bool": {
                    "must": [
                      {
                        "match": {
                          "hazard_categories.category.key": "Housekeeping"
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "aggs": {
    "hazard_categories": {
      "nested": {
        "path": "hazard_categories.entities"
      },
      "aggs": {
        "total_entities": {
          "terms": {
            "field": "hazard_categories.entities.key"
          }
        }
      }
    }
  }
}

It returns:

{
  "buckets": [
    {
      "key": "spilled",
      "doc_count": 2
    },
    {
      "key": "clean up",
      "doc_count": 1
    },
    {
      "key": "gloves",
      "doc_count": 1
    },
    {
      "key": "monitoring",
      "doc_count": 1
    },
    {
      "key": "ppe kit",
      "doc_count": 1
    }
  ]
}

whereas i want to get

{
  "buckets": [
    {
      "key": "spilled",
      "doc_count": 2
    },
    {
      "key": "clean up",
      "doc_count": 1
    }
  ]
}

0

There are 0 answers