mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-30 03:31:05 +00:00
.maintain/monitoring: Add alert when continuous task ends (#7250)
* .maintain/monitoring: Add alert when continuous task ends Through the `polkadot_tasks_ended_total` Prometheus metric one can tell when a task ended. Use this metric to alert when specific known-to-be-continuous tasks end on a node. * .maintain/monitoring: Don't hard-code task names
This commit is contained in:
@@ -126,6 +126,16 @@ groups:
|
|||||||
# Others
|
# Others
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
|
- alert: ContinuousTaskEnded
|
||||||
|
expr: '(polkadot_tasks_spawned_total == 1) - on(instance, task_name)
|
||||||
|
(polkadot_tasks_ended_total == 1)'
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
message: 'Continuous task {{ $labels.task_name }} on node
|
||||||
|
{{ $labels.instance }} ended unexpectedly.'
|
||||||
|
|
||||||
- alert: AuthorityDiscoveryDiscoveryFailureHigh
|
- alert: AuthorityDiscoveryDiscoveryFailureHigh
|
||||||
expr: 'polkadot_authority_discovery_handle_value_found_event_failure /
|
expr: 'polkadot_authority_discovery_handle_value_found_event_failure /
|
||||||
ignoring(name)
|
ignoring(name)
|
||||||
|
|||||||
Reference in New Issue
Block a user