From bd79b3debc4f2fde1c2f42b4729d54c790a05da0 Mon Sep 17 00:00:00 2001 From: Max Inden Date: Tue, 23 Jun 2020 17:25:42 +0200 Subject: [PATCH] client/network/service: Add primary dimension to connection metrics (#6472) * client/network/service: Add primary dimension to connection metrics Two nodes can be interconnected via one or more connections. The first of those connections is called the primary connection. This commit adds another dimension to the `sub_libp2p_connections_{closed,opened}_total` metrics to differentiate primary and non-primary connections being opened / closed. By intuition more than one connection between two nodes is rare. Tracking the fact whether a connection is primary or not will help prove or disprove this intuition. * .maintain/monitoring: Ensure to sum over all connections_closed variants * client/network/service: Rename is_primary to is_first * client/network/service: Split by metric name with two additional metrics * Revert ".maintain/monitoring: Ensure to sum over all connections_closed variants" This reverts commit 2d2f93e414440b9fc9e8f7fae6fe48bd95af6b8f. * client/network/service: Remove labels from distinct metrics --- substrate/client/network/src/service.rs | 66 +++++++++++++++---------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/substrate/client/network/src/service.rs b/substrate/client/network/src/service.rs index 93560a6c0b..8cce3367f7 100644 --- a/substrate/client/network/src/service.rs +++ b/substrate/client/network/src/service.rs @@ -854,6 +854,8 @@ struct Metrics { // This list is ordered alphabetically connections_closed_total: CounterVec, connections_opened_total: CounterVec, + distinct_peers_connections_closed_total: Counter, + distinct_peers_connections_opened_total: Counter, import_queue_blocks_submitted: Counter, import_queue_finality_proofs_submitted: Counter, import_queue_justifications_submitted: Counter, @@ -889,17 +891,25 @@ impl Metrics { connections_closed_total: register(CounterVec::new( Opts::new( "sub_libp2p_connections_closed_total", - "Total number of connections closed, by reason and direction" + "Total number of connections closed, by direction and reason" ), &["direction", "reason"] )?, registry)?, connections_opened_total: register(CounterVec::new( Opts::new( "sub_libp2p_connections_opened_total", - "Total number of connections opened" + "Total number of connections opened by direction" ), &["direction"] )?, registry)?, + distinct_peers_connections_closed_total: register(Counter::new( + "sub_libp2p_distinct_peers_connections_closed_total", + "Total number of connections closed with distinct peers" + )?, registry)?, + distinct_peers_connections_opened_total: register(Counter::new( + "sub_libp2p_distinct_peers_connections_opened_total", + "Total number of connections opened with distinct peers" + )?, registry)?, import_queue_blocks_submitted: register(Counter::new( "import_queue_blocks_submitted", "Number of blocks submitted to the import queue.", @@ -1214,40 +1224,44 @@ impl Future for NetworkWorker { } this.event_streams.send(ev); }, - Poll::Ready(SwarmEvent::ConnectionEstablished { peer_id, endpoint, .. }) => { + Poll::Ready(SwarmEvent::ConnectionEstablished { peer_id, endpoint, num_established }) => { trace!(target: "sub-libp2p", "Libp2p => Connected({:?})", peer_id); + if let Some(metrics) = this.metrics.as_ref() { - match endpoint { - ConnectedPoint::Dialer { .. } => - metrics.connections_opened_total.with_label_values(&["out"]).inc(), - ConnectedPoint::Listener { .. } => - metrics.connections_opened_total.with_label_values(&["in"]).inc(), - } - } - }, - Poll::Ready(SwarmEvent::ConnectionClosed { peer_id, cause, endpoint, .. }) => { - trace!(target: "sub-libp2p", "Libp2p => Disconnected({:?}, {:?})", peer_id, cause); - if let Some(metrics) = this.metrics.as_ref() { - let dir = match endpoint { + let direction = match endpoint { ConnectedPoint::Dialer { .. } => "out", ConnectedPoint::Listener { .. } => "in", }; + metrics.connections_opened_total.with_label_values(&[direction]).inc(); - match cause { - ConnectionError::IO(_) => - metrics.connections_closed_total.with_label_values(&[dir, "transport-error"]).inc(), + if num_established.get() == 1 { + metrics.distinct_peers_connections_opened_total.inc(); + } + } + }, + Poll::Ready(SwarmEvent::ConnectionClosed { peer_id, cause, endpoint, num_established }) => { + trace!(target: "sub-libp2p", "Libp2p => Disconnected({:?}, {:?})", peer_id, cause); + if let Some(metrics) = this.metrics.as_ref() { + let direction = match endpoint { + ConnectedPoint::Dialer { .. } => "out", + ConnectedPoint::Listener { .. } => "in", + }; + let reason = match cause { + ConnectionError::IO(_) => "transport-error", ConnectionError::Handler(NodeHandlerWrapperError::Handler(EitherError::A(EitherError::A( EitherError::A(EitherError::A(EitherError::B( - EitherError::A(PingFailure::Timeout)))))))) => - metrics.connections_closed_total.with_label_values(&[dir, "ping-timeout"]).inc(), + EitherError::A(PingFailure::Timeout)))))))) => "ping-timeout", ConnectionError::Handler(NodeHandlerWrapperError::Handler(EitherError::A(EitherError::A( EitherError::A(EitherError::A(EitherError::A( - EitherError::B(LegacyConnectionKillError)))))))) => - metrics.connections_closed_total.with_label_values(&[dir, "force-closed"]).inc(), - ConnectionError::Handler(NodeHandlerWrapperError::Handler(_)) => - metrics.connections_closed_total.with_label_values(&[dir, "protocol-error"]).inc(), - ConnectionError::Handler(NodeHandlerWrapperError::KeepAliveTimeout) => - metrics.connections_closed_total.with_label_values(&[dir, "keep-alive-timeout"]).inc(), + EitherError::B(LegacyConnectionKillError)))))))) => "force-closed", + ConnectionError::Handler(NodeHandlerWrapperError::Handler(_)) => "protocol-error", + ConnectionError::Handler(NodeHandlerWrapperError::KeepAliveTimeout) => "keep-alive-timeout", + }; + metrics.connections_closed_total.with_label_values(&[direction, reason]).inc(); + + // `num_established` represents the number of *remaining* connections. + if num_established == 0 { + metrics.distinct_peers_connections_closed_total.inc(); } } },