I'm attempting to build a watcher alert in Elastic (hosted cloud version) that will allow me to alert on high jvm memory pressure.
The data is already in elastic, I can retrieve it if I remove the "logic portions" and reported as the following 8 values:
- memory_max_g1_survivor_space
- memory_max_g1_eden_space
- memory_max_g1_old_gen
- memory_used_g1_survivor_space
- memory_used_g1_eden_space
- memory_used_g1_old_gen
- host.name
- service.name
I can successfully retrieve these values if I only query them. But my issue is that I need to determine if it's over 90% and if so alert.
This is the current iteration, and it's giving a compile error, and I'm not sure why, or hot to triage as it will only tell me "compile error" and no other details.
If I remove the "actions" and "transform" the proper documents will retrieve from Elastic. Reading their docs, I've figured out that I need to use a transform (to manipulate the data into a usable subset), a condition (to trigger if data is found over 90%), and an action to actually fire if the condition is met.
This should retrieve the documents in question (this works), for each document retrieved check the heap pressure utilization (the query in the code below - used/max *100), and if over 90 fire an email off.
I'm brand new to this, and as it seems this is json with embedded java code (the "painless script") ... I cannot find an editor that can understand this and help me fix this. (Suggestions welcome here if there is any tooling to help detangle this)
The code giving me errors:
{
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"search": {
"request": {
"indices": [
"metrics-prod-*"
],
"body": {
"aggs": {
"by_service": {
"terms": {
"field": "host.name.keyword",
"size": 100
}
}
},
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "now-10m",
"lte": "now"
}
}
}
],
"filter": [
{
"match_phrase": {
"name": "jvm_memory"
}
}
]
}
}
}
}
}
},
"transform": {
"script": {
"source": """
def[] heap_pressure_values;
for (def i = 0; def mues = {{ctx.payload.hits.hits.[i]._source.memory_used_g1_eden_space}}, def muog = {{ctx.payload.hits.hits.[i]._source.memory_used_g1_old_gen}}, def muss = {{ctx.payload.hits.hits.[i]._source.memory_used_g1_survivor_space}}, def mmes = {{ctx.payload.hits.hits.[i]._source.memory_max_g1_eden_space}}, def mmog = {{ctx.payload.hits.hits.[i]._source.memory_max_g1_old_gen}}, def mmss = {{ctx.payload.hits.hits.[i]._source.memory_max_g1_survivor_space}}, i < ctx.payload.hits.hits._source.length; i++) {
if ((((mues+muog+muss) / (mmes+mmog+mmss)) * 100) > 90) {
def value = ((mues+muog+muss) / (mmes+mmog+mmss)) * 100;
def hostName = {{ctx.payload.hits.hits.[i]._source.host.name}};
def serviceName = {{ctx.payload.hits.hits.[i]._source.service.name}};
heap_pressure_values.add(value, hostName, serviceName);
} else {
def no_value = 0;
}
}
return heap_pressure_values;
"""
}
},
"actions": {
"my-logging-action": {
"condition": {
"script": {
"lang": "painless",
"source": "ctx.metadata.heap_pressure_values.size() > 0"
}
},
"logging": {
"text": "ctx.metadata.heap_pressure_values.values",
"level": "warn"
}
}
}
}
The complete error output it gives me:
And the data returned by a query of our dataset - This is what I'm attempting to alert on, and retrieves properly - Some fields redacted that are not used / relevant:
{
"watch_id": "_inlined_",
"node": "XXXXXXXXXXXXXXXXXXX",
"state": "executed",
"user": "XXXXXXXXXXXXXXXXXXX",
"status": {
"state": {
"active": true,
"timestamp": "2022-06-01T19:06:48.934Z"
},
"last_checked": "2022-06-01T19:06:48.934Z",
"last_met_condition": "2022-06-01T19:06:48.934Z",
"actions": {},
"execution_state": "executed",
"version": -1
},
"trigger_event": {
"type": "manual",
"triggered_time": "2022-06-01T19:06:48.934Z",
"manual": {
"schedule": {
"scheduled_time": "2022-06-01T19:06:48.934Z"
}
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"metrics-prod-*"
],
"rest_total_hits_as_int": true,
"body": {
"aggs": {
"by_service": {
"terms": {
"field": "host.name.keyword",
"size": 100
}
}
},
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "now-10m",
"lte": "now"
}
}
}
],
"filter": [
{
"match_phrase": {
"name": "jvm_memory"
}
}
]
}
}
}
}
}
},
"condition": {
"always": {}
},
"metadata": {
"name": "Prod JVM Heap Pressure Warning",
"xpack": {
"type": "json"
}
},
"result": {
"execution_time": "2022-06-01T19:06:48.934Z",
"execution_duration": 968,
"input": {
"type": "search",
"status": "success",
"payload": {
"_shards": {
"total": 64,
"failed": 0,
"successful": 64,
"skipped": 0
},
"hits": {
"hits": [
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 245018624,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10945,
"memory_committed_g1_eden_space": 121399934976,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_used_codeheap_profiled_nmethods": 87202944,
"memory_committed_g1_survivor_space": 1409286144,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_used_metaspace": 235112280,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api5-prod",
"memory_max_g1_eden_space": -1,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_survivor_space": 1409286144,
"memory_used_g1_eden_space": 7985954816,
"service.name": "api5-fpa-backend",
"memory_committed_codeheap_non_profiled_nmethods": 93454336,
"memory_used_codeheap_non_profiled_nmethods": 93428096,
"memory_committed_g1_old_gen": 295950090240,
"memory_used_codeheap_non_nmethods": 1789568,
"@timestamp": "2022-06-01T19:06:25.924Z",
"memory_used_g1_old_gen": 187521906064,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 5701632,
"memory_committed_codeheap_profiled_nmethods": 88866816
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 133201920,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10936,
"memory_committed_g1_eden_space": 3372220416,
"memory_max_metaspace": -1,
"service.type": "fpa-shared",
"memory_used_codeheap_profiled_nmethods": 49291520,
"memory_max_codeheap_non_profiled_nmethods": 122912768,
"memory_used_compressed_class_space": 14317552,
"memory_max_codeheap_non_nmethods": 5836800,
"memory_committed_g1_survivor_space": 12582912,
"memory_used_metaspace": 128057776,
"memory_max_g1_old_gen": 21474836480,
"host.name": "api0-prod",
"memory_max_g1_eden_space": -1,
"memory_used_g1_survivor_space": 12582912,
"memory_max_codeheap_profiled_nmethods": 122908672,
"memory_used_g1_eden_space": 2847932416,
"service.name": "fpa-shared",
"memory_committed_codeheap_non_profiled_nmethods": 52822016,
"memory_used_codeheap_non_profiled_nmethods": 51804160,
"memory_committed_g1_old_gen": 1992294400,
"memory_used_codeheap_non_nmethods": 1549440,
"@timestamp": "2022-06-01T19:06:26.719Z",
"memory_used_g1_old_gen": 210182944,
"memory_max_compressed_class_space": 1073741824,
"name": "jvm_memory",
"memory_committed_compressed_class_space": 16199680,
"memory_committed_codeheap_non_nmethods": 2555904,
"memory_committed_codeheap_profiled_nmethods": 54460416
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 243269632,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10947,
"memory_committed_g1_eden_space": 52848230400,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_used_codeheap_profiled_nmethods": 85598464,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_committed_g1_survivor_space": 1207959552,
"memory_used_metaspace": 233772080,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api4-prod",
"memory_max_g1_eden_space": -1,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_survivor_space": 1207959552,
"memory_used_g1_eden_space": 20703084544,
"service.name": "api4-fpa-backend",
"memory_committed_codeheap_non_profiled_nmethods": 95158272,
"memory_used_codeheap_non_profiled_nmethods": 95012352,
"memory_committed_g1_old_gen": 364703121408,
"memory_used_codeheap_non_nmethods": 1787264,
"@timestamp": "2022-06-01T19:06:27.236Z",
"memory_used_g1_old_gen": 187802866888,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 4849664,
"memory_committed_codeheap_profiled_nmethods": 87949312
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 256114688,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10935,
"memory_committed_g1_eden_space": 187535720448,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_used_codeheap_profiled_nmethods": 86692992,
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_committed_g1_survivor_space": 1543503872,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_used_metaspace": 245723584,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api6-prod",
"memory_max_g1_eden_space": -1,
"memory_used_g1_survivor_space": 1543503872,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_eden_space": 63820529664,
"service.name": "api6-fpa-backend",
"memory_used_codeheap_non_profiled_nmethods": 107003392,
"memory_committed_codeheap_non_profiled_nmethods": 107020288,
"memory_committed_g1_old_gen": 229680087040,
"memory_used_codeheap_non_nmethods": 1844352,
"@timestamp": "2022-06-01T19:04:03.306Z",
"memory_used_g1_old_gen": 193096379968,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 4980736,
"memory_committed_codeheap_profiled_nmethods": 88342528
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 245628928,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 13888,
"memory_committed_g1_eden_space": 67343745024,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_used_codeheap_profiled_nmethods": 87726208,
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_committed_g1_survivor_space": 704643072,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_used_metaspace": 236316808,
"memory_max_g1_old_gen": 429496729600,
"host.name": "api8-prod",
"memory_max_g1_eden_space": -1,
"memory_used_g1_survivor_space": 704643072,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_eden_space": 34191966208,
"service.name": "api8-fpa-backend",
"memory_used_codeheap_non_profiled_nmethods": 101222400,
"memory_committed_codeheap_non_profiled_nmethods": 101711872,
"memory_committed_g1_old_gen": 361448341504,
"memory_used_codeheap_non_nmethods": 1786880,
"@timestamp": "2022-06-01T19:04:04.210Z",
"memory_used_g1_old_gen": 195107831232,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 5177344,
"memory_committed_codeheap_profiled_nmethods": 91881472
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 246591488,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10919,
"memory_committed_g1_eden_space": 20937965568,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_used_codeheap_profiled_nmethods": 89098624,
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_committed_g1_survivor_space": 1073741824,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_used_metaspace": 237059432,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api2-prod",
"memory_max_g1_eden_space": -1,
"memory_used_g1_survivor_space": 1073741824,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_eden_space": 12884901888,
"service.name": "api2-fpa-backend",
"memory_used_codeheap_non_profiled_nmethods": 97661312,
"memory_committed_codeheap_non_profiled_nmethods": 97845248,
"memory_committed_g1_old_gen": 396747603968,
"memory_used_codeheap_non_nmethods": 1804416,
"@timestamp": "2022-06-01T19:04:19.920Z",
"memory_used_g1_old_gen": 183998486016,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 4784128,
"memory_committed_codeheap_profiled_nmethods": 91947008
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 247201792,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10911,
"memory_committed_g1_eden_space": 196326981632,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_used_codeheap_profiled_nmethods": 89875712,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_committed_g1_survivor_space": 167772160,
"memory_used_metaspace": 236621976,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api1-prod",
"memory_max_g1_eden_space": -1,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_survivor_space": 167772160,
"memory_used_g1_eden_space": 77108084736,
"service.name": "api1-fpa-backend",
"memory_committed_codeheap_non_profiled_nmethods": 97583104,
"memory_used_codeheap_non_profiled_nmethods": 97579264,
"memory_committed_g1_old_gen": 222264557568,
"memory_used_codeheap_non_nmethods": 1807488,
"@timestamp": "2022-06-01T19:04:16.731Z",
"memory_used_g1_old_gen": 188840619040,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 5177344,
"memory_committed_codeheap_profiled_nmethods": 92078080
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 246329344,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10947,
"memory_committed_g1_eden_space": 114923929600,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_used_codeheap_profiled_nmethods": 86866816,
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_committed_g1_survivor_space": 2717908992,
"memory_used_metaspace": 236733944,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api3-prod",
"memory_max_g1_eden_space": -1,
"memory_used_g1_survivor_space": 2717908992,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_eden_space": 63216549888,
"service.name": "api3-fpa-backend",
"memory_committed_codeheap_non_profiled_nmethods": 96403456,
"memory_used_codeheap_non_profiled_nmethods": 96372096,
"memory_committed_g1_old_gen": 301117472768,
"memory_used_codeheap_non_nmethods": 1789824,
"@timestamp": "2022-06-01T19:04:15.370Z",
"memory_used_g1_old_gen": 197162180080,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 5177344,
"memory_committed_codeheap_profiled_nmethods": 88145920
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 252706816,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 13895,
"memory_committed_g1_eden_space": 191998459904,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_used_codeheap_profiled_nmethods": 88067072,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_committed_g1_survivor_space": 268435456,
"memory_used_metaspace": 242709496,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api7-prod",
"memory_max_g1_eden_space": -1,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_survivor_space": 268435456,
"memory_used_g1_eden_space": 128849018880,
"service.name": "api7-fpa-backend",
"memory_committed_codeheap_non_profiled_nmethods": 105381888,
"memory_used_codeheap_non_profiled_nmethods": 105210496,
"memory_committed_g1_old_gen": 226492416000,
"memory_used_codeheap_non_nmethods": 1809024,
"@timestamp": "2022-06-01T19:05:02.265Z",
"memory_used_g1_old_gen": 190759635976,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 5767168,
"memory_committed_codeheap_profiled_nmethods": 91095040
},
"_id": "XXXXXXXXXXXXXXXXXXX",
"_score": 1
},
{
"_index": "metrics-prod-2022-06",
"_type": "_doc",
"_source": {
"memory_committed_metaspace": 256114688,
"memory_max_g1_survivor_space": -1,
"type": "gauge",
"sequenceId": 10936,
"memory_committed_g1_eden_space": 187535720448,
"memory_max_metaspace": -1,
"service.type": "fpa-backend",
"memory_used_codeheap_profiled_nmethods": 86692992,
"memory_max_codeheap_non_profiled_nmethods": 121442304,
"memory_committed_g1_survivor_space": 1543503872,
"memory_max_codeheap_non_nmethods": 8773632,
"memory_used_metaspace": 245723584,
"memory_max_g1_old_gen": 418759311360,
"host.name": "api6-prod",
"memory_max_g1_eden_space": -1,
"memory_used_g1_survivor_space": 1543503872,
"memory_max_codeheap_profiled_nmethods": 121442304,
"memory_used_g1_eden_space": 64458063872,
"service.name": "api6-fpa-backend",
"memory_used_codeheap_non_profiled_nmethods": 107003392,
"memory_committed_codeheap_non_profiled_nmethods": 107020288,
"memory_committed_g1_old_gen": 229680087040,
"memory_used_codeheap_non_nmethods": 1844352,
"@timestamp": "2022-06-01T19:05:03.287Z",
"memory_used_g1_old_gen": 193096379968,
"name": "jvm_memory",
"memory_committed_codeheap_non_nmethods": 4980736,
"memory_committed_codeheap_profiled_nmethods": 88342528
},
"_id": "XXXXXXXXXXXXXX",
"_score": 1
}
],
"total": 90,
"max_score": 1
},
"took": 967,
"timed_out": false,
"aggregations": {
"by_service": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"doc_count": 10,
"key": "api0-prod"
},
{
"doc_count": 10,
"key": "api1-prod"
},
{
"doc_count": 10,
"key": "api2-prod"
},
{
"doc_count": 10,
"key": "api3-prod"
},
{
"doc_count": 10,
"key": "api4-prod"
},
{
"doc_count": 10,
"key": "api5-prod"
},
{
"doc_count": 10,
"key": "api6-prod"
},
{
"doc_count": 10,
"key": "api7-prod"
},
{
"doc_count": 10,
"key": "api8-prod"
}
]
}
}
},
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"metrics-prod-*"
],
"rest_total_hits_as_int": true,
"body": {
"aggs": {
"by_service": {
"terms": {
"field": "host.name.keyword",
"size": 100
}
}
},
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "now-10m",
"lte": "now"
}
}
}
],
"filter": [
{
"match_phrase": {
"name": "jvm_memory"
}
}
]
}
}
}
}
}
},
"condition": {
"type": "always",
"status": "success",
"met": true
},
"actions": []
},
"messages": []
}
Tldr;
Painless can be a bit difficult to work with. But here are some suggestions to better understand how to work with it.
Kibana
use the Painless lab tabs, available in the dev tools to write and test the core part of your algorithmDebug.explain(variable);
to debug painlessFix