mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-30 01:11:04 +00:00
.maintain/monitoring/alerting-rules: Remove HighCPUUsage alert (#6648)
The `HighCPUUsage` alert is based on the `cpu_usage_percentage` metric. Instead of exposing the overall CPU usage in percent, the metric exposes the per core usage summed over all cores. This commit removes the alert for two reasons: 1. Substrate itself does not expose the core count and thus one can not alert based on the `cpu_usage_percentage` metric. 2. Alerting based on CPU usage is generic and not specific to Substrate or Blockchains. Thus any CPU usage alert suffice.
This commit is contained in:
@@ -42,34 +42,8 @@ tests:
|
||||
}'
|
||||
values: '1+1x3 4+0x13' # 1 2 3 4 4 4 4 4 4 4 4 4 ...
|
||||
|
||||
- series: 'polkadot_cpu_usage_percentage{
|
||||
job="polkadot",
|
||||
pod="polkadot-abcdef01234-abcdef",
|
||||
instance="polkadot-abcdef01234-abcdef",
|
||||
}'
|
||||
values: '0+20x5 100+0x5' # 0 20 40 60 80 100 100 100 100 100 100
|
||||
|
||||
alert_rule_test:
|
||||
|
||||
######################################################################
|
||||
# Resource usage
|
||||
######################################################################
|
||||
|
||||
- eval_time: 9m
|
||||
alertname: HighCPUUsage
|
||||
exp_alerts:
|
||||
- eval_time: 10m
|
||||
alertname: HighCPUUsage
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
severity: warning
|
||||
pod: polkadot-abcdef01234-abcdef
|
||||
instance: polkadot-abcdef01234-abcdef
|
||||
job: polkadot
|
||||
exp_annotations:
|
||||
message: "The node polkadot-abcdef01234-abcdef has a CPU
|
||||
usage higher than 100% for more than 5 minutes"
|
||||
|
||||
######################################################################
|
||||
# Block production
|
||||
######################################################################
|
||||
|
||||
@@ -2,19 +2,6 @@ groups:
|
||||
- name: polkadot.rules
|
||||
rules:
|
||||
|
||||
##############################################################################
|
||||
# Resource usage
|
||||
##############################################################################
|
||||
|
||||
- alert: HighCPUUsage
|
||||
expr: polkadot_cpu_usage_percentage >= 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
message: 'The node {{ $labels.instance }} has a CPU usage higher than 100%
|
||||
for more than 5 minutes'
|
||||
|
||||
##############################################################################
|
||||
# Block production
|
||||
##############################################################################
|
||||
|
||||
Reference in New Issue
Block a user