.maintain/monitoring: Add alert when continuous task ends (#7250)

* .maintain/monitoring: Add alert when continuous task ends

Through the `polkadot_tasks_ended_total` Prometheus metric one can tell
when a task ended. Use this metric to alert when specific
known-to-be-continuous tasks end on a node.

* .maintain/monitoring: Don't hard-code task names
This commit is contained in:
Max Inden
2020-10-05 10:40:24 +02:00
committed by GitHub
parent 400c9f7e5d
commit 0ff724c939
@@ -126,6 +126,16 @@ groups:
# Others # Others
############################################################################## ##############################################################################
- alert: ContinuousTaskEnded
expr: '(polkadot_tasks_spawned_total == 1) - on(instance, task_name)
(polkadot_tasks_ended_total == 1)'
for: 5m
labels:
severity: warning
annotations:
message: 'Continuous task {{ $labels.task_name }} on node
{{ $labels.instance }} ended unexpectedly.'
- alert: AuthorityDiscoveryDiscoveryFailureHigh - alert: AuthorityDiscoveryDiscoveryFailureHigh
expr: 'polkadot_authority_discovery_handle_value_found_event_failure / expr: 'polkadot_authority_discovery_handle_value_found_event_failure /
ignoring(name) ignoring(name)