mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-04-26 02:57:57 +00:00
46891e849f
Move scripts used in CI to the new location - **./scripts/ci/** * Move github scripts * Move more files * Move ci scripts and fix dependencies * Update docs/node-template-release.md Co-authored-by: João Paulo Silva de Souza <77391175+joao-paulo-parity@users.noreply.github.com> * Remove Cargo.lock * Apply suggestions from code review Co-authored-by: Denis Pisarev <denis.pisarev@parity.io> * Make more paths uniform Co-authored-by: João Paulo Silva de Souza <77391175+joao-paulo-parity@users.noreply.github.com> Co-authored-by: Denis Pisarev <denis.pisarev@parity.io>
172 lines
6.4 KiB
YAML
172 lines
6.4 KiB
YAML
groups:
|
|
- name: substrate.rules
|
|
rules:
|
|
|
|
##############################################################################
|
|
# Block production
|
|
##############################################################################
|
|
|
|
- alert: BlockProductionSlow
|
|
annotations:
|
|
message: 'Best block on instance {{ $labels.instance }} increases by
|
|
less than 1 per minute for more than 3 minutes.'
|
|
expr: increase(substrate_block_height{status="best"}[1m]) < 1
|
|
for: 3m
|
|
labels:
|
|
severity: warning
|
|
- alert: BlockProductionSlow
|
|
annotations:
|
|
message: 'Best block on instance {{ $labels.instance }} increases by
|
|
less than 1 per minute for more than 10 minutes.'
|
|
expr: increase(substrate_block_height{status="best"}[1m]) < 1
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
|
|
##############################################################################
|
|
# Block finalization
|
|
##############################################################################
|
|
|
|
- alert: BlockFinalizationSlow
|
|
expr: increase(substrate_block_height{status="finalized"}[1m]) < 1
|
|
for: 3m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'Finalized block on instance {{ $labels.instance }} increases by
|
|
less than 1 per minute for more than 3 minutes.'
|
|
- alert: BlockFinalizationSlow
|
|
expr: increase(substrate_block_height{status="finalized"}[1m]) < 1
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
message: 'Finalized block on instance {{ $labels.instance }} increases by
|
|
less than 1 per minute for more than 10 minutes.'
|
|
- alert: BlockFinalizationLaggingBehind
|
|
# Under the assumption of an average block production of 6 seconds,
|
|
# "best" and "finalized" being more than 10 blocks apart would imply
|
|
# more than a 1 minute delay between block production and finalization.
|
|
expr: '(substrate_block_height{status="best"} - ignoring(status)
|
|
substrate_block_height{status="finalized"}) > 10'
|
|
for: 8m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
message: "Block finalization on instance {{ $labels.instance }} is behind
|
|
block production by {{ $value }} for more than 8 minutes."
|
|
|
|
##############################################################################
|
|
# Transaction queue
|
|
##############################################################################
|
|
|
|
- alert: TransactionQueueSizeIncreasing
|
|
expr: 'increase(substrate_sub_txpool_validations_scheduled[5m]) -
|
|
increase(substrate_sub_txpool_validations_finished[5m]) > 0'
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'The transaction pool size on node {{ $labels.instance }} has
|
|
been monotonically increasing for more than 10 minutes.'
|
|
- alert: TransactionQueueSizeIncreasing
|
|
expr: 'increase(substrate_sub_txpool_validations_scheduled[5m]) -
|
|
increase(substrate_sub_txpool_validations_finished[5m]) > 0'
|
|
for: 30m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'The transaction pool size on node {{ $labels.instance }} has
|
|
been monotonically increasing for more than 30 minutes.'
|
|
- alert: TransactionQueueSizeHigh
|
|
expr: 'substrate_sub_txpool_validations_scheduled -
|
|
substrate_sub_txpool_validations_finished > 10000'
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'The transaction pool size on node {{ $labels.instance }} has
|
|
been above 10_000 for more than 5 minutes.'
|
|
|
|
##############################################################################
|
|
# Networking
|
|
##############################################################################
|
|
|
|
- alert: NumberOfPeersLow
|
|
expr: substrate_sub_libp2p_peers_count < 3
|
|
for: 3m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'The node {{ $labels.instance }} has less than 3 peers for more
|
|
than 3 minutes'
|
|
- alert: NumberOfPeersLow
|
|
expr: substrate_sub_libp2p_peers_count < 3
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
message: 'The node {{ $labels.instance }} has less than 3 peers for more
|
|
than 15 minutes'
|
|
- alert: NoIncomingConnection
|
|
expr: increase(substrate_sub_libp2p_incoming_connections_total[20m]) == 0
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'The node {{ $labels.instance }} has not received any new incoming
|
|
TCP connection in the past 20 minutes. Is it connected to the Internet?'
|
|
|
|
##############################################################################
|
|
# System
|
|
##############################################################################
|
|
|
|
- alert: NumberOfFileDescriptorsHigh
|
|
expr: 'node_filefd_allocated{chain!=""} > 10000'
|
|
for: 3m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'The node {{ $labels.instance }} has more than 10_000 file
|
|
descriptors allocated for more than 3 minutes'
|
|
|
|
##############################################################################
|
|
# Others
|
|
##############################################################################
|
|
|
|
- alert: AuthorityDiscoveryDiscoveryFailureHigh
|
|
expr: 'substrate_authority_discovery_handle_value_found_event_failure /
|
|
ignoring(name)
|
|
substrate_authority_discovery_dht_event_received{name="value_found"} > 0.5'
|
|
for: 2h
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'Authority discovery on node {{ $labels.instance }} fails to
|
|
process more than 50 % of the values found on the DHT for more than 2
|
|
hours.'
|
|
|
|
- alert: UnboundedChannelPersistentlyLarge
|
|
expr: '(
|
|
(substrate_unbounded_channel_len{action = "send"} -
|
|
ignoring(action) substrate_unbounded_channel_len{action = "received"})
|
|
or on(instance) substrate_unbounded_channel_len{action = "send"}
|
|
) >= 200'
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'Channel {{ $labels.entity }} on node {{ $labels.instance }} contains
|
|
more than 200 items for more than 5 minutes. Node might be frozen.'
|
|
|
|
- alert: UnboundedChannelVeryLarge
|
|
expr: '(
|
|
(substrate_unbounded_channel_len{action = "send"} -
|
|
ignoring(action) substrate_unbounded_channel_len{action = "received"})
|
|
or on(instance) substrate_unbounded_channel_len{action = "send"}
|
|
) > 15000'
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
message: 'Channel {{ $labels.entity }} on node {{ $labels.instance }} contains more than
|
|
15000 items.'
|