I have a Elasticsearch index for testing with following mapping having two nested fields:
test/
{
"mappings": {
"dynamic": "strict",
"properties": {
"observation_session_id": {
"type": "long"
},
"hazard_categories": {
"type": "nested",
"include_in_root": true,
"properties": {
"sentence": {
"type": "keyword"
},
"entities": {
"type": "nested",
"include_in_root": true,
"properties": {
"key": {
"type": "keyword"
},
"value": {
"type": "byte"
}
}
},
"category": {
"type": "nested",
"include_in_root": true,
"properties": {
"key": {
"type": "keyword"
},
"value": {
"type": "byte"
}
}
}
}
}
}
}
}
And I am indexing following documents:
test/_doc/1
{
"observation_session_id": 1,
"hazard_categories": [{
"sentence": "Test Sentence 1",
"entities": [
{
"key": "spilled",
"value": 1
},
{
"key": "clean up",
"value": 1
}
],
"category": [
{
"key": "Housekeeping",
"value": 1
},
{
"key": "Controls",
"value": 1
}
]
},
{
"sentence": "Test Sentence 1.1",
"category": [
{
"key": "PPE",
"value": 1
},
{
"key": "Housekeeping",
"value": 1
}
]
}]
}
test/_doc/2
{
"observation_session_id": 2,
"hazard_categories": [{
"sentence": "Test Sentence 2",
"entities": [
{
"key": "spilled",
"value": 1
}
],
"category": [
{
"key": "Housekeeping",
"value": 1
}
]
},
{
"sentence": "Test Sentence 2.1",
"entities": [
{
"key": "monitoring",
"value": 1
}
],
"category": [
{
"key": "Controls",
"value": 1
}
]
},
{
"sentence": "Test Sentence 2.2",
"entities": [
{
"key": "gloves",
"value": 1
},
{
"key": "ppe kit",
"value": 1
}
],
"category": [
{
"key": "PPE",
"value": 1
}
]
}]
}
test/_doc/3
{
"observation_session_id": 3,
"hazard_categories": [{
"sentence": "Test Sentence 3",
"category": [
{
"key": "Housekeeping",
"value": 1
},
{
"key": "Controls",
"value": 1
},
{
"key": "PPE",
"value": 1
},
{
"key": "Care",
"value": 1
}
]
}]
}
/test/_doc/4
{
"observation_session_id": 4,
"hazard_categories": [{
"sentence": "Test Sentence 4",
"entities": [
{
"key": "cover",
"value": 1
},
{
"key": "wire",
"value": 1
}
],
"category": [
{
"key": "Mobile Equipment",
"value": 1
},
{
"key": "Electricals",
"value": 1
},
{
"key": "Lifting",
"value": 1
}
]
}]
}
I want to get list of entities for a matching category.
Using this query I am getting matching documents but want to retain only the matching entities, since it returns non matching entities too from the same document.
{
"query": {
"nested": {
"path": "hazard_categories",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "hazard_categories.category",
"query": {
"bool": {
"must": [
{
"match": {
"hazard_categories.category.key": "Housekeeping"
}
}
]
}
}
}
}
]
}
}
}
},
"aggs": {
"hazard_categories": {
"nested": {
"path": "hazard_categories.entities"
},
"aggs": {
"total_entities": {
"terms": {
"field": "hazard_categories.entities.key"
}
}
}
}
}
}
It returns:
{
"buckets": [
{
"key": "spilled",
"doc_count": 2
},
{
"key": "clean up",
"doc_count": 1
},
{
"key": "gloves",
"doc_count": 1
},
{
"key": "monitoring",
"doc_count": 1
},
{
"key": "ppe kit",
"doc_count": 1
}
]
}
whereas i want to get
{
"buckets": [
{
"key": "spilled",
"doc_count": 2
},
{
"key": "clean up",
"doc_count": 1
}
]
}