From fe9c01fc683cdf685fda4220b8832be938492b0c Mon Sep 17 00:00:00 2001 From: Max Inden Date: Fri, 17 Jul 2020 09:43:57 +0200 Subject: [PATCH] .maintain/monitoring/alerting-rules: Remove HighCPUUsage alert (#6648) The `HighCPUUsage` alert is based on the `cpu_usage_percentage` metric. Instead of exposing the overall CPU usage in percent, the metric exposes the per core usage summed over all cores. This commit removes the alert for two reasons: 1. Substrate itself does not expose the core count and thus one can not alert based on the `cpu_usage_percentage` metric. 2. Alerting based on CPU usage is generic and not specific to Substrate or Blockchains. Thus any CPU usage alert suffice. --- .../alerting-rules/alerting-rule-tests.yaml | 26 ------------------- .../alerting-rules/alerting-rules.yaml | 13 ---------- 2 files changed, 39 deletions(-) diff --git a/substrate/.maintain/monitoring/alerting-rules/alerting-rule-tests.yaml b/substrate/.maintain/monitoring/alerting-rules/alerting-rule-tests.yaml index 288750be3c..5b0daba3d8 100644 --- a/substrate/.maintain/monitoring/alerting-rules/alerting-rule-tests.yaml +++ b/substrate/.maintain/monitoring/alerting-rules/alerting-rule-tests.yaml @@ -42,34 +42,8 @@ tests: }' values: '1+1x3 4+0x13' # 1 2 3 4 4 4 4 4 4 4 4 4 ... - - series: 'polkadot_cpu_usage_percentage{ - job="polkadot", - pod="polkadot-abcdef01234-abcdef", - instance="polkadot-abcdef01234-abcdef", - }' - values: '0+20x5 100+0x5' # 0 20 40 60 80 100 100 100 100 100 100 - alert_rule_test: - ###################################################################### - # Resource usage - ###################################################################### - - - eval_time: 9m - alertname: HighCPUUsage - exp_alerts: - - eval_time: 10m - alertname: HighCPUUsage - exp_alerts: - - exp_labels: - severity: warning - pod: polkadot-abcdef01234-abcdef - instance: polkadot-abcdef01234-abcdef - job: polkadot - exp_annotations: - message: "The node polkadot-abcdef01234-abcdef has a CPU - usage higher than 100% for more than 5 minutes" - ###################################################################### # Block production ###################################################################### diff --git a/substrate/.maintain/monitoring/alerting-rules/alerting-rules.yaml b/substrate/.maintain/monitoring/alerting-rules/alerting-rules.yaml index 2ed3889a2c..12f46e17ad 100644 --- a/substrate/.maintain/monitoring/alerting-rules/alerting-rules.yaml +++ b/substrate/.maintain/monitoring/alerting-rules/alerting-rules.yaml @@ -2,19 +2,6 @@ groups: - name: polkadot.rules rules: - ############################################################################## - # Resource usage - ############################################################################## - - - alert: HighCPUUsage - expr: polkadot_cpu_usage_percentage >= 100 - for: 5m - labels: - severity: warning - annotations: - message: 'The node {{ $labels.instance }} has a CPU usage higher than 100% - for more than 5 minutes' - ############################################################################## # Block production ##############################################################################