mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 10:01:17 +00:00
Refactor & detach network metrics. (#6986)
* Refactor sc-network/service metrics. 1. Aggregate sc-network metrics into a submodule, introducing two more sourced metrics to avoid duplicate atomics. 2. Decouple periodic sc-service network metrics from other metrics, so that they can be updated independently. * Update client/service/src/metrics.rs * Update client/service/src/metrics.rs
This commit is contained in:
@@ -23,7 +23,7 @@ use futures::prelude::*;
|
||||
use log::{info, trace, warn};
|
||||
use parity_util_mem::MallocSizeOf;
|
||||
use sc_client_api::{BlockchainEvents, UsageProvider};
|
||||
use sc_network::{network_state::NetworkState, NetworkStatus};
|
||||
use sc_network::NetworkStatus;
|
||||
use sp_blockchain::HeaderMetadata;
|
||||
use sp_runtime::traits::{Block as BlockT, Header};
|
||||
use sp_transaction_pool::TransactionPool;
|
||||
@@ -81,7 +81,7 @@ impl<T: TransactionPool + MallocSizeOf> TransactionPoolAndMaybeMallogSizeOf for
|
||||
/// Builds the informant and returns a `Future` that drives the informant.
|
||||
pub fn build<B: BlockT, C>(
|
||||
client: Arc<C>,
|
||||
network_status_sinks: Arc<status_sinks::StatusSinks<(NetworkStatus<B>, NetworkState)>>,
|
||||
network_status_sinks: Arc<status_sinks::StatusSinks<NetworkStatus<B>>>,
|
||||
pool: Arc<impl TransactionPoolAndMaybeMallogSizeOf>,
|
||||
format: OutputFormat,
|
||||
) -> impl futures::Future<Output = ()>
|
||||
@@ -96,7 +96,7 @@ where
|
||||
network_status_sinks.push(Duration::from_millis(5000), network_status_sink);
|
||||
|
||||
let display_notifications = network_status_stream
|
||||
.for_each(move |(net_status, _)| {
|
||||
.for_each(move |net_status| {
|
||||
let info = client_1.usage_info();
|
||||
if let Some(ref usage) = info.usage {
|
||||
trace!(target: "usage", "Usage statistics: {}", usage);
|
||||
|
||||
@@ -43,10 +43,6 @@ pub struct NetworkState {
|
||||
pub connected_peers: HashMap<String, Peer>,
|
||||
/// List of node that we know of but that we're not connected to.
|
||||
pub not_connected_peers: HashMap<String, NotConnectedPeer>,
|
||||
/// The total number of bytes received.
|
||||
pub total_bytes_inbound: u64,
|
||||
/// The total number of bytes sent.
|
||||
pub total_bytes_outbound: u64,
|
||||
/// State of the peerset manager.
|
||||
pub peerset: serde_json::Value,
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
//! which is then processed by [`NetworkWorker::poll`].
|
||||
|
||||
use crate::{
|
||||
ExHashT, NetworkStateInfo,
|
||||
ExHashT, NetworkStateInfo, NetworkStatus,
|
||||
behaviour::{self, Behaviour, BehaviourOut},
|
||||
config::{parse_str_addr, NonReservedPeerMode, Params, Role, TransportConfig},
|
||||
DhtEvent,
|
||||
@@ -49,12 +49,8 @@ use libp2p::kad::record;
|
||||
use libp2p::ping::handler::PingFailure;
|
||||
use libp2p::swarm::{NetworkBehaviour, SwarmBuilder, SwarmEvent, protocols_handler::NodeHandlerWrapperError};
|
||||
use log::{error, info, trace, warn};
|
||||
use metrics::{Metrics, MetricSources, Histogram, HistogramVec};
|
||||
use parking_lot::Mutex;
|
||||
use prometheus_endpoint::{
|
||||
register, Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, Opts,
|
||||
PrometheusError, Registry, U64,
|
||||
SourcedCounter, MetricSource
|
||||
};
|
||||
use sc_peerset::PeersetHandle;
|
||||
use sp_consensus::import_queue::{BlockImportError, BlockImportResult, ImportQueue, Link};
|
||||
use sp_runtime::{
|
||||
@@ -80,6 +76,7 @@ use wasm_timer::Instant;
|
||||
|
||||
pub use behaviour::{ResponseFailure, InboundFailure, RequestFailure, OutboundFailure};
|
||||
|
||||
mod metrics;
|
||||
mod out_events;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
@@ -365,10 +362,11 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
|
||||
// Initialize the metrics.
|
||||
let metrics = match ¶ms.metrics_registry {
|
||||
Some(registry) => {
|
||||
// Sourced metrics.
|
||||
BandwidthCounters::register(registry, bandwidth.clone())?;
|
||||
// Other (i.e. new) metrics.
|
||||
Some(Metrics::register(registry)?)
|
||||
Some(metrics::register(registry, MetricSources {
|
||||
bandwidth: bandwidth.clone(),
|
||||
major_syncing: is_major_syncing.clone(),
|
||||
connected_peers: num_connected.clone(),
|
||||
})?)
|
||||
}
|
||||
None => None
|
||||
};
|
||||
@@ -423,6 +421,19 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
|
||||
})
|
||||
}
|
||||
|
||||
/// High-level network status information.
|
||||
pub fn status(&self) -> NetworkStatus<B> {
|
||||
NetworkStatus {
|
||||
sync_state: self.sync_state(),
|
||||
best_seen_block: self.best_seen_block(),
|
||||
num_sync_peers: self.num_sync_peers(),
|
||||
num_connected_peers: self.num_connected_peers(),
|
||||
num_active_peers: self.num_active_peers(),
|
||||
total_bytes_inbound: self.total_bytes_inbound(),
|
||||
total_bytes_outbound: self.total_bytes_outbound(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the total number of bytes received so far.
|
||||
pub fn total_bytes_inbound(&self) -> u64 {
|
||||
self.service.bandwidth.total_inbound()
|
||||
@@ -562,8 +573,6 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
|
||||
peer_id: Swarm::<B, H>::local_peer_id(&swarm).to_base58(),
|
||||
listened_addresses: Swarm::<B, H>::listeners(&swarm).cloned().collect(),
|
||||
external_addresses: Swarm::<B, H>::external_addresses(&swarm).cloned().collect(),
|
||||
total_bytes_inbound: self.service.bandwidth.total_inbound(),
|
||||
total_bytes_outbound: self.service.bandwidth.total_outbound(),
|
||||
connected_peers,
|
||||
not_connected_peers,
|
||||
peerset: swarm.user_protocol_mut().peerset_debug_info(),
|
||||
@@ -1175,265 +1184,6 @@ pub struct NetworkWorker<B: BlockT + 'static, H: ExHashT> {
|
||||
peers_notifications_sinks: Arc<Mutex<HashMap<(PeerId, ConsensusEngineId), NotificationsSink>>>,
|
||||
}
|
||||
|
||||
struct Metrics {
|
||||
// This list is ordered alphabetically
|
||||
connections_closed_total: CounterVec<U64>,
|
||||
connections_opened_total: CounterVec<U64>,
|
||||
distinct_peers_connections_closed_total: Counter<U64>,
|
||||
distinct_peers_connections_opened_total: Counter<U64>,
|
||||
import_queue_blocks_submitted: Counter<U64>,
|
||||
import_queue_finality_proofs_submitted: Counter<U64>,
|
||||
import_queue_justifications_submitted: Counter<U64>,
|
||||
incoming_connections_errors_total: CounterVec<U64>,
|
||||
incoming_connections_total: Counter<U64>,
|
||||
is_major_syncing: Gauge<U64>,
|
||||
issued_light_requests: Counter<U64>,
|
||||
kademlia_query_duration: HistogramVec,
|
||||
kademlia_random_queries_total: CounterVec<U64>,
|
||||
kademlia_records_count: GaugeVec<U64>,
|
||||
kademlia_records_sizes_total: GaugeVec<U64>,
|
||||
kbuckets_num_nodes: GaugeVec<U64>,
|
||||
listeners_local_addresses: Gauge<U64>,
|
||||
listeners_errors_total: Counter<U64>,
|
||||
notifications_sizes: HistogramVec,
|
||||
notifications_streams_closed_total: CounterVec<U64>,
|
||||
notifications_streams_opened_total: CounterVec<U64>,
|
||||
peers_count: Gauge<U64>,
|
||||
peerset_num_discovered: Gauge<U64>,
|
||||
peerset_num_requested: Gauge<U64>,
|
||||
pending_connections: Gauge<U64>,
|
||||
pending_connections_errors_total: CounterVec<U64>,
|
||||
requests_in_failure_total: CounterVec<U64>,
|
||||
requests_in_success_total: HistogramVec,
|
||||
requests_out_failure_total: CounterVec<U64>,
|
||||
requests_out_success_total: HistogramVec,
|
||||
requests_out_started_total: CounterVec<U64>,
|
||||
}
|
||||
|
||||
/// The source for bandwidth metrics.
|
||||
#[derive(Clone)]
|
||||
struct BandwidthCounters(Arc<transport::BandwidthSinks>);
|
||||
|
||||
impl BandwidthCounters {
|
||||
fn register(registry: &Registry, sinks: Arc<transport::BandwidthSinks>)
|
||||
-> Result<(), PrometheusError>
|
||||
{
|
||||
register(SourcedCounter::new(
|
||||
&Opts::new(
|
||||
"sub_libp2p_network_bytes_total",
|
||||
"Total bandwidth usage"
|
||||
).variable_label("direction"),
|
||||
BandwidthCounters(sinks),
|
||||
)?, registry)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricSource for BandwidthCounters {
|
||||
type N = u64;
|
||||
|
||||
fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
|
||||
set(&[&"in"], self.0.total_inbound());
|
||||
set(&[&"out"], self.0.total_outbound());
|
||||
}
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
Ok(Self {
|
||||
// This list is ordered alphabetically
|
||||
connections_closed_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_connections_closed_total",
|
||||
"Total number of connections closed, by direction and reason"
|
||||
),
|
||||
&["direction", "reason"]
|
||||
)?, registry)?,
|
||||
connections_opened_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_connections_opened_total",
|
||||
"Total number of connections opened by direction"
|
||||
),
|
||||
&["direction"]
|
||||
)?, registry)?,
|
||||
distinct_peers_connections_closed_total: register(Counter::new(
|
||||
"sub_libp2p_distinct_peers_connections_closed_total",
|
||||
"Total number of connections closed with distinct peers"
|
||||
)?, registry)?,
|
||||
distinct_peers_connections_opened_total: register(Counter::new(
|
||||
"sub_libp2p_distinct_peers_connections_opened_total",
|
||||
"Total number of connections opened with distinct peers"
|
||||
)?, registry)?,
|
||||
import_queue_blocks_submitted: register(Counter::new(
|
||||
"import_queue_blocks_submitted",
|
||||
"Number of blocks submitted to the import queue.",
|
||||
)?, registry)?,
|
||||
import_queue_finality_proofs_submitted: register(Counter::new(
|
||||
"import_queue_finality_proofs_submitted",
|
||||
"Number of finality proofs submitted to the import queue.",
|
||||
)?, registry)?,
|
||||
import_queue_justifications_submitted: register(Counter::new(
|
||||
"import_queue_justifications_submitted",
|
||||
"Number of justifications submitted to the import queue.",
|
||||
)?, registry)?,
|
||||
incoming_connections_errors_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_incoming_connections_handshake_errors_total",
|
||||
"Total number of incoming connections that have failed during the \
|
||||
initial handshake"
|
||||
),
|
||||
&["reason"]
|
||||
)?, registry)?,
|
||||
incoming_connections_total: register(Counter::new(
|
||||
"sub_libp2p_incoming_connections_total",
|
||||
"Total number of incoming connections on the listening sockets"
|
||||
)?, registry)?,
|
||||
is_major_syncing: register(Gauge::new(
|
||||
"sub_libp2p_is_major_syncing", "Whether the node is performing a major sync or not.",
|
||||
)?, registry)?,
|
||||
issued_light_requests: register(Counter::new(
|
||||
"issued_light_requests",
|
||||
"Number of light client requests that our node has issued.",
|
||||
)?, registry)?,
|
||||
kademlia_query_duration: register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_kademlia_query_duration",
|
||||
"Duration of Kademlia queries per query type"
|
||||
),
|
||||
buckets: prometheus_endpoint::exponential_buckets(0.5, 2.0, 10)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["type"]
|
||||
)?, registry)?,
|
||||
kademlia_random_queries_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kademlia_random_queries_total",
|
||||
"Number of random Kademlia queries started"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
kademlia_records_count: register(GaugeVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kademlia_records_count",
|
||||
"Number of records in the Kademlia records store"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
kademlia_records_sizes_total: register(GaugeVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kademlia_records_sizes_total",
|
||||
"Total size of all the records in the Kademlia records store"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
kbuckets_num_nodes: register(GaugeVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kbuckets_num_nodes",
|
||||
"Number of nodes in the Kademlia k-buckets"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
listeners_local_addresses: register(Gauge::new(
|
||||
"sub_libp2p_listeners_local_addresses", "Number of local addresses we're listening on"
|
||||
)?, registry)?,
|
||||
listeners_errors_total: register(Counter::new(
|
||||
"sub_libp2p_listeners_errors_total",
|
||||
"Total number of non-fatal errors reported by a listener"
|
||||
)?, registry)?,
|
||||
notifications_sizes: register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_notifications_sizes",
|
||||
"Sizes of the notifications send to and received from all nodes"
|
||||
),
|
||||
buckets: prometheus_endpoint::exponential_buckets(64.0, 4.0, 8)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["direction", "protocol"]
|
||||
)?, registry)?,
|
||||
notifications_streams_closed_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_notifications_streams_closed_total",
|
||||
"Total number of notification substreams that have been closed"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
notifications_streams_opened_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_notifications_streams_opened_total",
|
||||
"Total number of notification substreams that have been opened"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
peers_count: register(Gauge::new(
|
||||
"sub_libp2p_peers_count", "Number of network gossip peers",
|
||||
)?, registry)?,
|
||||
peerset_num_discovered: register(Gauge::new(
|
||||
"sub_libp2p_peerset_num_discovered", "Number of nodes stored in the peerset manager",
|
||||
)?, registry)?,
|
||||
peerset_num_requested: register(Gauge::new(
|
||||
"sub_libp2p_peerset_num_requested", "Number of nodes that the peerset manager wants us to be connected to",
|
||||
)?, registry)?,
|
||||
pending_connections: register(Gauge::new(
|
||||
"sub_libp2p_pending_connections",
|
||||
"Number of connections in the process of being established",
|
||||
)?, registry)?,
|
||||
pending_connections_errors_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_pending_connections_errors_total",
|
||||
"Total number of pending connection errors"
|
||||
),
|
||||
&["reason"]
|
||||
)?, registry)?,
|
||||
requests_in_failure_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_requests_in_failure_total",
|
||||
"Total number of incoming requests that the node has failed to answer"
|
||||
),
|
||||
&["protocol", "reason"]
|
||||
)?, registry)?,
|
||||
requests_in_success_total: register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_requests_in_success_total",
|
||||
"Total number of requests received and answered"
|
||||
),
|
||||
buckets: prometheus_endpoint::exponential_buckets(0.001, 2.0, 16)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
requests_out_failure_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_requests_out_failure_total",
|
||||
"Total number of requests that have failed"
|
||||
),
|
||||
&["protocol", "reason"]
|
||||
)?, registry)?,
|
||||
requests_out_success_total: register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_requests_out_success_total",
|
||||
"For successful requests, time between a request's start and finish"
|
||||
),
|
||||
buckets: prometheus_endpoint::exponential_buckets(0.001, 2.0, 16)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
requests_out_started_total: register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_requests_out_started_total",
|
||||
"Total number of requests emitted"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
|
||||
type Output = ();
|
||||
|
||||
@@ -1902,7 +1652,6 @@ impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
|
||||
this.is_major_syncing.store(is_major_syncing, Ordering::Relaxed);
|
||||
|
||||
if let Some(metrics) = this.metrics.as_ref() {
|
||||
metrics.is_major_syncing.set(is_major_syncing as u64);
|
||||
for (proto, num_entries) in this.network_service.num_kbuckets_entries() {
|
||||
metrics.kbuckets_num_nodes.with_label_values(&[&proto.as_ref()]).set(num_entries as u64);
|
||||
}
|
||||
@@ -1912,7 +1661,6 @@ impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
|
||||
for (proto, num_entries) in this.network_service.kademlia_records_total_size() {
|
||||
metrics.kademlia_records_sizes_total.with_label_values(&[&proto.as_ref()]).set(num_entries as u64);
|
||||
}
|
||||
metrics.peers_count.set(num_connected_peers as u64);
|
||||
metrics.peerset_num_discovered.set(this.network_service.user_protocol().num_discovered_peers() as u64);
|
||||
metrics.peerset_num_requested.set(this.network_service.user_protocol().requested_peers().count() as u64);
|
||||
metrics.pending_connections.set(Swarm::network_info(&this.network_service).num_connections_pending as u64);
|
||||
|
||||
@@ -0,0 +1,358 @@
|
||||
// This file is part of Substrate.
|
||||
|
||||
// Copyright (C) 2017-2020 Parity Technologies (UK) Ltd.
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::transport::BandwidthSinks;
|
||||
use prometheus_endpoint::{
|
||||
self as prometheus,
|
||||
Counter, CounterVec, Gauge, GaugeVec, HistogramOpts,
|
||||
PrometheusError, Registry, U64, Opts,
|
||||
SourcedCounter, SourcedGauge, MetricSource,
|
||||
};
|
||||
use std::{
|
||||
str,
|
||||
sync::{
|
||||
atomic::{AtomicBool, AtomicUsize, Ordering},
|
||||
Arc,
|
||||
},
|
||||
};
|
||||
|
||||
pub use prometheus_endpoint::{Histogram, HistogramVec};
|
||||
|
||||
/// Registers all networking metrics with the given registry.
|
||||
pub fn register(registry: &Registry, sources: MetricSources) -> Result<Metrics, PrometheusError> {
|
||||
BandwidthCounters::register(registry, sources.bandwidth)?;
|
||||
MajorSyncingGauge::register(registry, sources.major_syncing)?;
|
||||
NumConnectedGauge::register(registry, sources.connected_peers)?;
|
||||
Metrics::register(registry)
|
||||
}
|
||||
|
||||
/// Predefined metric sources that are fed directly into prometheus.
|
||||
pub struct MetricSources {
|
||||
pub bandwidth: Arc<BandwidthSinks>,
|
||||
pub major_syncing: Arc<AtomicBool>,
|
||||
pub connected_peers: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
/// Dedicated metrics.
|
||||
pub struct Metrics {
|
||||
// This list is ordered alphabetically
|
||||
pub connections_closed_total: CounterVec<U64>,
|
||||
pub connections_opened_total: CounterVec<U64>,
|
||||
pub distinct_peers_connections_closed_total: Counter<U64>,
|
||||
pub distinct_peers_connections_opened_total: Counter<U64>,
|
||||
pub import_queue_blocks_submitted: Counter<U64>,
|
||||
pub import_queue_finality_proofs_submitted: Counter<U64>,
|
||||
pub import_queue_justifications_submitted: Counter<U64>,
|
||||
pub incoming_connections_errors_total: CounterVec<U64>,
|
||||
pub incoming_connections_total: Counter<U64>,
|
||||
pub issued_light_requests: Counter<U64>,
|
||||
pub kademlia_query_duration: HistogramVec,
|
||||
pub kademlia_random_queries_total: CounterVec<U64>,
|
||||
pub kademlia_records_count: GaugeVec<U64>,
|
||||
pub kademlia_records_sizes_total: GaugeVec<U64>,
|
||||
pub kbuckets_num_nodes: GaugeVec<U64>,
|
||||
pub listeners_local_addresses: Gauge<U64>,
|
||||
pub listeners_errors_total: Counter<U64>,
|
||||
pub notifications_sizes: HistogramVec,
|
||||
pub notifications_streams_closed_total: CounterVec<U64>,
|
||||
pub notifications_streams_opened_total: CounterVec<U64>,
|
||||
pub peerset_num_discovered: Gauge<U64>,
|
||||
pub peerset_num_requested: Gauge<U64>,
|
||||
pub pending_connections: Gauge<U64>,
|
||||
pub pending_connections_errors_total: CounterVec<U64>,
|
||||
pub requests_in_failure_total: CounterVec<U64>,
|
||||
pub requests_in_success_total: HistogramVec,
|
||||
pub requests_out_failure_total: CounterVec<U64>,
|
||||
pub requests_out_success_total: HistogramVec,
|
||||
pub requests_out_started_total: CounterVec<U64>,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
|
||||
Ok(Self {
|
||||
// This list is ordered alphabetically
|
||||
connections_closed_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_connections_closed_total",
|
||||
"Total number of connections closed, by direction and reason"
|
||||
),
|
||||
&["direction", "reason"]
|
||||
)?, registry)?,
|
||||
connections_opened_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_connections_opened_total",
|
||||
"Total number of connections opened by direction"
|
||||
),
|
||||
&["direction"]
|
||||
)?, registry)?,
|
||||
distinct_peers_connections_closed_total: prometheus::register(Counter::new(
|
||||
"sub_libp2p_distinct_peers_connections_closed_total",
|
||||
"Total number of connections closed with distinct peers"
|
||||
)?, registry)?,
|
||||
distinct_peers_connections_opened_total: prometheus::register(Counter::new(
|
||||
"sub_libp2p_distinct_peers_connections_opened_total",
|
||||
"Total number of connections opened with distinct peers"
|
||||
)?, registry)?,
|
||||
import_queue_blocks_submitted: prometheus::register(Counter::new(
|
||||
"import_queue_blocks_submitted",
|
||||
"Number of blocks submitted to the import queue.",
|
||||
)?, registry)?,
|
||||
import_queue_finality_proofs_submitted: prometheus::register(Counter::new(
|
||||
"import_queue_finality_proofs_submitted",
|
||||
"Number of finality proofs submitted to the import queue.",
|
||||
)?, registry)?,
|
||||
import_queue_justifications_submitted: prometheus::register(Counter::new(
|
||||
"import_queue_justifications_submitted",
|
||||
"Number of justifications submitted to the import queue.",
|
||||
)?, registry)?,
|
||||
incoming_connections_errors_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_incoming_connections_handshake_errors_total",
|
||||
"Total number of incoming connections that have failed during the \
|
||||
initial handshake"
|
||||
),
|
||||
&["reason"]
|
||||
)?, registry)?,
|
||||
incoming_connections_total: prometheus::register(Counter::new(
|
||||
"sub_libp2p_incoming_connections_total",
|
||||
"Total number of incoming connections on the listening sockets"
|
||||
)?, registry)?,
|
||||
issued_light_requests: prometheus::register(Counter::new(
|
||||
"issued_light_requests",
|
||||
"Number of light client requests that our node has issued.",
|
||||
)?, registry)?,
|
||||
kademlia_query_duration: prometheus::register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_kademlia_query_duration",
|
||||
"Duration of Kademlia queries per query type"
|
||||
),
|
||||
buckets: prometheus::exponential_buckets(0.5, 2.0, 10)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["type"]
|
||||
)?, registry)?,
|
||||
kademlia_random_queries_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kademlia_random_queries_total",
|
||||
"Number of random Kademlia queries started"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
kademlia_records_count: prometheus::register(GaugeVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kademlia_records_count",
|
||||
"Number of records in the Kademlia records store"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
kademlia_records_sizes_total: prometheus::register(GaugeVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kademlia_records_sizes_total",
|
||||
"Total size of all the records in the Kademlia records store"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
kbuckets_num_nodes: prometheus::register(GaugeVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_kbuckets_num_nodes",
|
||||
"Number of nodes in the Kademlia k-buckets"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
listeners_local_addresses: prometheus::register(Gauge::new(
|
||||
"sub_libp2p_listeners_local_addresses", "Number of local addresses we're listening on"
|
||||
)?, registry)?,
|
||||
listeners_errors_total: prometheus::register(Counter::new(
|
||||
"sub_libp2p_listeners_errors_total",
|
||||
"Total number of non-fatal errors reported by a listener"
|
||||
)?, registry)?,
|
||||
notifications_sizes: prometheus::register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_notifications_sizes",
|
||||
"Sizes of the notifications send to and received from all nodes"
|
||||
),
|
||||
buckets: prometheus::exponential_buckets(64.0, 4.0, 8)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["direction", "protocol"]
|
||||
)?, registry)?,
|
||||
notifications_streams_closed_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_notifications_streams_closed_total",
|
||||
"Total number of notification substreams that have been closed"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
notifications_streams_opened_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_notifications_streams_opened_total",
|
||||
"Total number of notification substreams that have been opened"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
peerset_num_discovered: prometheus::register(Gauge::new(
|
||||
"sub_libp2p_peerset_num_discovered", "Number of nodes stored in the peerset manager",
|
||||
)?, registry)?,
|
||||
peerset_num_requested: prometheus::register(Gauge::new(
|
||||
"sub_libp2p_peerset_num_requested", "Number of nodes that the peerset manager wants us to be connected to",
|
||||
)?, registry)?,
|
||||
pending_connections: prometheus::register(Gauge::new(
|
||||
"sub_libp2p_pending_connections",
|
||||
"Number of connections in the process of being established",
|
||||
)?, registry)?,
|
||||
pending_connections_errors_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_pending_connections_errors_total",
|
||||
"Total number of pending connection errors"
|
||||
),
|
||||
&["reason"]
|
||||
)?, registry)?,
|
||||
requests_in_failure_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_requests_in_failure_total",
|
||||
"Total number of incoming requests that the node has failed to answer"
|
||||
),
|
||||
&["protocol", "reason"]
|
||||
)?, registry)?,
|
||||
requests_in_success_total: prometheus::register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_requests_in_success_total",
|
||||
"Total number of requests received and answered"
|
||||
),
|
||||
buckets: prometheus::exponential_buckets(0.001, 2.0, 16)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
requests_out_failure_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_requests_out_failure_total",
|
||||
"Total number of requests that have failed"
|
||||
),
|
||||
&["protocol", "reason"]
|
||||
)?, registry)?,
|
||||
requests_out_success_total: prometheus::register(HistogramVec::new(
|
||||
HistogramOpts {
|
||||
common_opts: Opts::new(
|
||||
"sub_libp2p_requests_out_success_total",
|
||||
"For successful requests, time between a request's start and finish"
|
||||
),
|
||||
buckets: prometheus::exponential_buckets(0.001, 2.0, 16)
|
||||
.expect("parameters are always valid values; qed"),
|
||||
},
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
requests_out_started_total: prometheus::register(CounterVec::new(
|
||||
Opts::new(
|
||||
"sub_libp2p_requests_out_started_total",
|
||||
"Total number of requests emitted"
|
||||
),
|
||||
&["protocol"]
|
||||
)?, registry)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The bandwidth counter metric.
|
||||
#[derive(Clone)]
|
||||
pub struct BandwidthCounters(Arc<BandwidthSinks>);
|
||||
|
||||
impl BandwidthCounters {
|
||||
/// Registers the `BandwidthCounters` metric whose values are
|
||||
/// obtained from the given sinks.
|
||||
fn register(registry: &Registry, sinks: Arc<BandwidthSinks>) -> Result<(), PrometheusError> {
|
||||
prometheus::register(SourcedCounter::new(
|
||||
&Opts::new(
|
||||
"sub_libp2p_network_bytes_total",
|
||||
"Total bandwidth usage"
|
||||
).variable_label("direction"),
|
||||
BandwidthCounters(sinks),
|
||||
)?, registry)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricSource for BandwidthCounters {
|
||||
type N = u64;
|
||||
|
||||
fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
|
||||
set(&[&"in"], self.0.total_inbound());
|
||||
set(&[&"out"], self.0.total_outbound());
|
||||
}
|
||||
}
|
||||
|
||||
/// The "major syncing" metric.
|
||||
#[derive(Clone)]
|
||||
pub struct MajorSyncingGauge(Arc<AtomicBool>);
|
||||
|
||||
impl MajorSyncingGauge {
|
||||
/// Registers the `MajorSyncGauge` metric whose value is
|
||||
/// obtained from the given `AtomicBool`.
|
||||
fn register(registry: &Registry, value: Arc<AtomicBool>) -> Result<(), PrometheusError> {
|
||||
prometheus::register(SourcedGauge::new(
|
||||
&Opts::new(
|
||||
"sub_libp2p_is_major_syncing",
|
||||
"Whether the node is performing a major sync or not.",
|
||||
),
|
||||
MajorSyncingGauge(value),
|
||||
)?, registry)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricSource for MajorSyncingGauge {
|
||||
type N = u64;
|
||||
|
||||
fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
|
||||
set(&[], self.0.load(Ordering::Relaxed) as u64);
|
||||
}
|
||||
}
|
||||
|
||||
/// The connected peers metric.
|
||||
#[derive(Clone)]
|
||||
pub struct NumConnectedGauge(Arc<AtomicUsize>);
|
||||
|
||||
impl NumConnectedGauge {
|
||||
/// Registers the `MajorSyncingGauge` metric whose value is
|
||||
/// obtained from the given `AtomicUsize`.
|
||||
fn register(registry: &Registry, value: Arc<AtomicUsize>) -> Result<(), PrometheusError> {
|
||||
prometheus::register(SourcedGauge::new(
|
||||
&Opts::new(
|
||||
"sub_libp2p_peers_count",
|
||||
"Number of connected peers",
|
||||
),
|
||||
NumConnectedGauge(value),
|
||||
)?, registry)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricSource for NumConnectedGauge {
|
||||
type N = u64;
|
||||
|
||||
fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
|
||||
set(&[], self.0.load(Ordering::Relaxed) as u64);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,8 +87,6 @@ fn api<T: Into<Option<Status>>>(sync: T) -> System<Block> {
|
||||
external_addresses: Default::default(),
|
||||
connected_peers: Default::default(),
|
||||
not_connected_peers: Default::default(),
|
||||
total_bytes_inbound: 0,
|
||||
total_bytes_outbound: 0,
|
||||
peerset: serde_json::Value::Null,
|
||||
}).unwrap());
|
||||
},
|
||||
@@ -282,8 +280,6 @@ fn system_network_state() {
|
||||
external_addresses: Default::default(),
|
||||
connected_peers: Default::default(),
|
||||
not_connected_peers: Default::default(),
|
||||
total_bytes_inbound: 0,
|
||||
total_bytes_outbound: 0,
|
||||
peerset: serde_json::Value::Null,
|
||||
}
|
||||
);
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
NetworkStatus, NetworkState, error::Error, DEFAULT_PROTOCOL_ID, MallocSizeOfWasm,
|
||||
error::Error, DEFAULT_PROTOCOL_ID, MallocSizeOfWasm,
|
||||
TelemetryConnectionSinks, RpcHandlers, NetworkStatusSinks,
|
||||
start_rpc_servers, build_network_future, TransactionPoolAdapter, TaskManager, SpawnTaskHandle,
|
||||
status_sinks, metrics::MetricsService,
|
||||
metrics::MetricsService,
|
||||
client::{light, Client, ClientConfig},
|
||||
config::{Configuration, KeystoreConfig, PrometheusConfig},
|
||||
};
|
||||
@@ -472,7 +472,9 @@ pub fn spawn_tasks<TBl, TBackend, TExPool, TRpc, TCl>(
|
||||
transaction_pool,
|
||||
rpc_extensions_builder,
|
||||
remote_blockchain,
|
||||
network, network_status_sinks, system_rpc_tx,
|
||||
network,
|
||||
network_status_sinks,
|
||||
system_rpc_tx,
|
||||
telemetry_connection_sinks,
|
||||
} = params;
|
||||
|
||||
@@ -521,15 +523,13 @@ pub fn spawn_tasks<TBl, TBackend, TExPool, TRpc, TCl>(
|
||||
MetricsService::new()
|
||||
};
|
||||
|
||||
// Periodically notify the telemetry.
|
||||
spawn_handle.spawn("telemetry-periodic-send", telemetry_periodic_send(
|
||||
client.clone(), transaction_pool.clone(), metrics_service, network_status_sinks.clone()
|
||||
));
|
||||
|
||||
// Periodically send the network state to the telemetry.
|
||||
spawn_handle.spawn(
|
||||
"telemetry-periodic-network-state",
|
||||
telemetry_periodic_network_state(network_status_sinks.clone()),
|
||||
// Periodically updated metrics and telemetry updates.
|
||||
spawn_handle.spawn("telemetry-periodic-send",
|
||||
metrics_service.run(
|
||||
client.clone(),
|
||||
transaction_pool.clone(),
|
||||
network_status_sinks.clone()
|
||||
)
|
||||
);
|
||||
|
||||
// RPC
|
||||
@@ -574,7 +574,7 @@ pub fn spawn_tasks<TBl, TBackend, TExPool, TRpc, TCl>(
|
||||
// Spawn informant task
|
||||
spawn_handle.spawn("informant", sc_informant::build(
|
||||
client.clone(),
|
||||
network_status_sinks.clone().0,
|
||||
network_status_sinks.status.clone(),
|
||||
transaction_pool.clone(),
|
||||
config.informant_output_format,
|
||||
));
|
||||
@@ -606,47 +606,6 @@ async fn transaction_notifications<TBl, TExPool>(
|
||||
.await;
|
||||
}
|
||||
|
||||
// Periodically notify the telemetry.
|
||||
async fn telemetry_periodic_send<TBl, TExPool, TCl>(
|
||||
client: Arc<TCl>,
|
||||
transaction_pool: Arc<TExPool>,
|
||||
mut metrics_service: MetricsService,
|
||||
network_status_sinks: NetworkStatusSinks<TBl>,
|
||||
)
|
||||
where
|
||||
TBl: BlockT,
|
||||
TCl: ProvideRuntimeApi<TBl> + UsageProvider<TBl>,
|
||||
TExPool: MaintainedTransactionPool<Block=TBl, Hash = <TBl as BlockT>::Hash>,
|
||||
{
|
||||
let (state_tx, state_rx) = tracing_unbounded::<(NetworkStatus<_>, NetworkState)>("mpsc_netstat1");
|
||||
network_status_sinks.0.push(std::time::Duration::from_millis(5000), state_tx);
|
||||
state_rx.for_each(move |(net_status, _)| {
|
||||
let info = client.usage_info();
|
||||
metrics_service.tick(
|
||||
&info,
|
||||
&transaction_pool.status(),
|
||||
&net_status,
|
||||
);
|
||||
ready(())
|
||||
}).await;
|
||||
}
|
||||
|
||||
async fn telemetry_periodic_network_state<TBl: BlockT>(
|
||||
network_status_sinks: NetworkStatusSinks<TBl>,
|
||||
) {
|
||||
// Periodically send the network state to the telemetry.
|
||||
let (netstat_tx, netstat_rx) = tracing_unbounded::<(NetworkStatus<_>, NetworkState)>("mpsc_netstat2");
|
||||
network_status_sinks.0.push(std::time::Duration::from_secs(30), netstat_tx);
|
||||
netstat_rx.for_each(move |(_, network_state)| {
|
||||
telemetry!(
|
||||
SUBSTRATE_INFO;
|
||||
"system.network_state";
|
||||
"state" => network_state,
|
||||
);
|
||||
ready(())
|
||||
}).await;
|
||||
}
|
||||
|
||||
fn build_telemetry<TBl: BlockT>(
|
||||
config: &mut Configuration,
|
||||
endpoints: sc_telemetry::TelemetryEndpoints,
|
||||
@@ -887,7 +846,7 @@ pub fn build_network<TBl, TExPool, TImpQu, TCl>(
|
||||
let has_bootnodes = !network_params.network_config.boot_nodes.is_empty();
|
||||
let network_mut = sc_network::NetworkWorker::new(network_params)?;
|
||||
let network = network_mut.service().clone();
|
||||
let network_status_sinks = NetworkStatusSinks::new(Arc::new(status_sinks::StatusSinks::new()));
|
||||
let network_status_sinks = NetworkStatusSinks::new();
|
||||
|
||||
let (system_rpc_tx, system_rpc_rx) = tracing_unbounded("mpsc_system_rpc");
|
||||
|
||||
|
||||
@@ -126,24 +126,37 @@ impl RpcHandlers {
|
||||
/// Sinks to propagate network status updates.
|
||||
/// For each element, every time the `Interval` fires we push an element on the sender.
|
||||
#[derive(Clone)]
|
||||
pub struct NetworkStatusSinks<Block: BlockT>(
|
||||
Arc<status_sinks::StatusSinks<(NetworkStatus<Block>, NetworkState)>>,
|
||||
);
|
||||
pub struct NetworkStatusSinks<Block: BlockT> {
|
||||
status: Arc<status_sinks::StatusSinks<NetworkStatus<Block>>>,
|
||||
state: Arc<status_sinks::StatusSinks<NetworkState>>,
|
||||
}
|
||||
|
||||
impl<Block: BlockT> NetworkStatusSinks<Block> {
|
||||
fn new(
|
||||
sinks: Arc<status_sinks::StatusSinks<(NetworkStatus<Block>, NetworkState)>>
|
||||
) -> Self {
|
||||
Self(sinks)
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
status: Arc::new(status_sinks::StatusSinks::new()),
|
||||
state: Arc::new(status_sinks::StatusSinks::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a receiver that periodically receives a status of the network.
|
||||
pub fn network_status(&self, interval: Duration)
|
||||
-> TracingUnboundedReceiver<(NetworkStatus<Block>, NetworkState)> {
|
||||
/// Returns a receiver that periodically yields a [`NetworkStatus`].
|
||||
pub fn status_stream(&self, interval: Duration)
|
||||
-> TracingUnboundedReceiver<NetworkStatus<Block>>
|
||||
{
|
||||
let (sink, stream) = tracing_unbounded("mpsc_network_status");
|
||||
self.0.push(interval, sink);
|
||||
self.status.push(interval, sink);
|
||||
stream
|
||||
}
|
||||
|
||||
/// Returns a receiver that periodically yields a [`NetworkState`].
|
||||
pub fn state_stream(&self, interval: Duration)
|
||||
-> TracingUnboundedReceiver<NetworkState>
|
||||
{
|
||||
let (sink, stream) = tracing_unbounded("mpsc_network_state");
|
||||
self.state.push(interval, sink);
|
||||
stream
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Sinks to propagate telemetry connection established events.
|
||||
@@ -319,20 +332,16 @@ async fn build_network_future<
|
||||
// the network.
|
||||
_ = (&mut network).fuse() => {}
|
||||
|
||||
// At a regular interval, we send the state of the network on what is called
|
||||
// the "status sinks".
|
||||
ready_sink = status_sinks.0.next().fuse() => {
|
||||
let status = NetworkStatus {
|
||||
sync_state: network.sync_state(),
|
||||
best_seen_block: network.best_seen_block(),
|
||||
num_sync_peers: network.num_sync_peers(),
|
||||
num_connected_peers: network.num_connected_peers(),
|
||||
num_active_peers: network.num_active_peers(),
|
||||
total_bytes_inbound: network.total_bytes_inbound(),
|
||||
total_bytes_outbound: network.total_bytes_outbound(),
|
||||
};
|
||||
let state = network.network_state();
|
||||
ready_sink.send((status, state));
|
||||
// At a regular interval, we send high-level status as well as
|
||||
// detailed state information of the network on what are called
|
||||
// "status sinks".
|
||||
|
||||
status_sink = status_sinks.status.next().fuse() => {
|
||||
status_sink.send(network.status());
|
||||
}
|
||||
|
||||
state_sink = status_sinks.state.next().fuse() => {
|
||||
state_sink.send(network.network_state());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,14 +18,19 @@
|
||||
|
||||
use std::{convert::TryFrom, time::SystemTime};
|
||||
|
||||
use crate::{NetworkStatus, config::Configuration};
|
||||
use crate::{NetworkStatus, NetworkState, NetworkStatusSinks, config::Configuration};
|
||||
use futures_timer::Delay;
|
||||
use prometheus_endpoint::{register, Gauge, U64, Registry, PrometheusError, Opts, GaugeVec};
|
||||
use sc_telemetry::{telemetry, SUBSTRATE_INFO};
|
||||
use sp_api::ProvideRuntimeApi;
|
||||
use sp_runtime::traits::{NumberFor, Block, SaturatedConversion, UniqueSaturatedInto};
|
||||
use sp_transaction_pool::PoolStatus;
|
||||
use sp_transaction_pool::{PoolStatus, MaintainedTransactionPool};
|
||||
use sp_utils::metrics::register_globals;
|
||||
use sc_client_api::ClientInfo;
|
||||
use sp_utils::mpsc::TracingUnboundedReceiver;
|
||||
use sc_client_api::{ClientInfo, UsageProvider};
|
||||
use sc_network::config::Role;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use wasm_timer::Instant;
|
||||
|
||||
struct PrometheusMetrics {
|
||||
@@ -99,6 +104,9 @@ impl PrometheusMetrics {
|
||||
}
|
||||
}
|
||||
|
||||
/// A `MetricsService` periodically sends general client and
|
||||
/// network state to the telemetry as well as (optionally)
|
||||
/// a Prometheus endpoint.
|
||||
pub struct MetricsService {
|
||||
metrics: Option<PrometheusMetrics>,
|
||||
last_update: Instant,
|
||||
@@ -107,6 +115,8 @@ pub struct MetricsService {
|
||||
}
|
||||
|
||||
impl MetricsService {
|
||||
/// Creates a `MetricsService` that only sends information
|
||||
/// to the telemetry.
|
||||
pub fn new() -> Self {
|
||||
MetricsService {
|
||||
metrics: None,
|
||||
@@ -116,6 +126,8 @@ impl MetricsService {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a `MetricsService` that sends metrics
|
||||
/// to prometheus alongside the telemetry.
|
||||
pub fn with_prometheus(
|
||||
registry: &Registry,
|
||||
config: &Configuration,
|
||||
@@ -141,60 +153,109 @@ impl MetricsService {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn tick<T: Block>(
|
||||
/// Returns a never-ending `Future` that performs the
|
||||
/// metric and telemetry updates with information from
|
||||
/// the given sources.
|
||||
pub async fn run<TBl, TExPool, TCl>(
|
||||
mut self,
|
||||
client: Arc<TCl>,
|
||||
transactions: Arc<TExPool>,
|
||||
network: NetworkStatusSinks<TBl>,
|
||||
) where
|
||||
TBl: Block,
|
||||
TCl: ProvideRuntimeApi<TBl> + UsageProvider<TBl>,
|
||||
TExPool: MaintainedTransactionPool<Block = TBl, Hash = <TBl as Block>::Hash>,
|
||||
{
|
||||
let mut timer = Delay::new(Duration::from_secs(0));
|
||||
let timer_interval = Duration::from_secs(5);
|
||||
|
||||
// Metric and telemetry update interval.
|
||||
let net_status_interval = timer_interval;
|
||||
let net_state_interval = Duration::from_secs(30);
|
||||
|
||||
// Source of network information.
|
||||
let mut net_status_rx = Some(network.status_stream(net_status_interval));
|
||||
let mut net_state_rx = Some(network.state_stream(net_state_interval));
|
||||
|
||||
loop {
|
||||
// Wait for the next tick of the timer.
|
||||
(&mut timer).await;
|
||||
|
||||
// Try to get the latest network information.
|
||||
let mut net_status = None;
|
||||
let mut net_state = None;
|
||||
if let Some(rx) = net_status_rx.as_mut() {
|
||||
match Self::latest(rx) {
|
||||
Ok(status) => { net_status = status; }
|
||||
Err(()) => { net_status_rx = None; }
|
||||
}
|
||||
}
|
||||
if let Some(rx) = net_state_rx.as_mut() {
|
||||
match Self::latest(rx) {
|
||||
Ok(state) => { net_state = state; }
|
||||
Err(()) => { net_state_rx = None; }
|
||||
}
|
||||
}
|
||||
|
||||
// Update / Send the metrics.
|
||||
self.update(
|
||||
&client.usage_info(),
|
||||
&transactions.status(),
|
||||
net_status,
|
||||
net_state,
|
||||
);
|
||||
|
||||
// Schedule next tick.
|
||||
timer.reset(timer_interval);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to get the latest value from a receiver, dropping intermediate values.
|
||||
fn latest<T>(rx: &mut TracingUnboundedReceiver<T>) -> Result<Option<T>, ()> {
|
||||
let mut value = None;
|
||||
|
||||
while let Ok(next) = rx.try_next() {
|
||||
match next {
|
||||
Some(v) => {
|
||||
value = Some(v)
|
||||
}
|
||||
None => {
|
||||
log::error!("Receiver closed unexpectedly.");
|
||||
return Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn update<T: Block>(
|
||||
&mut self,
|
||||
info: &ClientInfo<T>,
|
||||
txpool_status: &PoolStatus,
|
||||
net_status: &NetworkStatus<T>,
|
||||
net_status: Option<NetworkStatus<T>>,
|
||||
net_state: Option<NetworkState>,
|
||||
) {
|
||||
let now = Instant::now();
|
||||
let elapsed = (now - self.last_update).as_secs();
|
||||
self.last_update = now;
|
||||
|
||||
let best_number = info.chain.best_number.saturated_into::<u64>();
|
||||
let best_hash = info.chain.best_hash;
|
||||
let num_peers = net_status.num_connected_peers;
|
||||
let finalized_number: u64 = info.chain.finalized_number.saturated_into::<u64>();
|
||||
let total_bytes_inbound = net_status.total_bytes_inbound;
|
||||
let total_bytes_outbound = net_status.total_bytes_outbound;
|
||||
let best_seen_block = net_status
|
||||
.best_seen_block
|
||||
.map(|num: NumberFor<T>| num.unique_saturated_into() as u64);
|
||||
|
||||
let diff_bytes_inbound = total_bytes_inbound - self.last_total_bytes_inbound;
|
||||
let diff_bytes_outbound = total_bytes_outbound - self.last_total_bytes_outbound;
|
||||
let (avg_bytes_per_sec_inbound, avg_bytes_per_sec_outbound) =
|
||||
if elapsed > 0 {
|
||||
self.last_total_bytes_inbound = total_bytes_inbound;
|
||||
self.last_total_bytes_outbound = total_bytes_outbound;
|
||||
(diff_bytes_inbound / elapsed, diff_bytes_outbound / elapsed)
|
||||
} else {
|
||||
(diff_bytes_inbound, diff_bytes_outbound)
|
||||
};
|
||||
self.last_update = now;
|
||||
|
||||
// Update/send metrics that are always available.
|
||||
telemetry!(
|
||||
SUBSTRATE_INFO;
|
||||
"system.interval";
|
||||
"peers" => num_peers,
|
||||
"height" => best_number,
|
||||
"best" => ?best_hash,
|
||||
"txcount" => txpool_status.ready,
|
||||
"finalized_height" => finalized_number,
|
||||
"finalized_hash" => ?info.chain.finalized_hash,
|
||||
"bandwidth_download" => avg_bytes_per_sec_inbound,
|
||||
"bandwidth_upload" => avg_bytes_per_sec_outbound,
|
||||
"used_state_cache_size" => info.usage.as_ref()
|
||||
.map(|usage| usage.memory.state_cache.as_bytes())
|
||||
.unwrap_or(0),
|
||||
"used_db_cache_size" => info.usage.as_ref()
|
||||
.map(|usage| usage.memory.database_cache.as_bytes())
|
||||
.unwrap_or(0),
|
||||
"disk_read_per_sec" => info.usage.as_ref()
|
||||
.map(|usage| usage.io.bytes_read)
|
||||
.unwrap_or(0),
|
||||
"disk_write_per_sec" => info.usage.as_ref()
|
||||
.map(|usage| usage.io.bytes_written)
|
||||
.unwrap_or(0),
|
||||
);
|
||||
|
||||
if let Some(metrics) = self.metrics.as_ref() {
|
||||
@@ -213,10 +274,6 @@ impl MetricsService {
|
||||
|
||||
metrics.ready_transactions_number.set(txpool_status.ready as u64);
|
||||
|
||||
if let Some(best_seen_block) = best_seen_block {
|
||||
metrics.block_height.with_label_values(&["sync_target"]).set(best_seen_block);
|
||||
}
|
||||
|
||||
if let Some(info) = info.usage.as_ref() {
|
||||
metrics.database_cache.set(info.memory.database_cache.as_bytes() as u64);
|
||||
metrics.state_cache.set(info.memory.state_cache.as_bytes() as u64);
|
||||
@@ -232,5 +289,50 @@ impl MetricsService {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Update/send network status information, if any.
|
||||
if let Some(net_status) = net_status {
|
||||
let num_peers = net_status.num_connected_peers;
|
||||
let total_bytes_inbound = net_status.total_bytes_inbound;
|
||||
let total_bytes_outbound = net_status.total_bytes_outbound;
|
||||
|
||||
let diff_bytes_inbound = total_bytes_inbound - self.last_total_bytes_inbound;
|
||||
let diff_bytes_outbound = total_bytes_outbound - self.last_total_bytes_outbound;
|
||||
let (avg_bytes_per_sec_inbound, avg_bytes_per_sec_outbound) =
|
||||
if elapsed > 0 {
|
||||
self.last_total_bytes_inbound = total_bytes_inbound;
|
||||
self.last_total_bytes_outbound = total_bytes_outbound;
|
||||
(diff_bytes_inbound / elapsed, diff_bytes_outbound / elapsed)
|
||||
} else {
|
||||
(diff_bytes_inbound, diff_bytes_outbound)
|
||||
};
|
||||
|
||||
telemetry!(
|
||||
SUBSTRATE_INFO;
|
||||
"system.interval";
|
||||
"peers" => num_peers,
|
||||
"bandwidth_download" => avg_bytes_per_sec_inbound,
|
||||
"bandwidth_upload" => avg_bytes_per_sec_outbound,
|
||||
);
|
||||
|
||||
if let Some(metrics) = self.metrics.as_ref() {
|
||||
let best_seen_block = net_status
|
||||
.best_seen_block
|
||||
.map(|num: NumberFor<T>| num.unique_saturated_into() as u64);
|
||||
|
||||
if let Some(best_seen_block) = best_seen_block {
|
||||
metrics.block_height.with_label_values(&["sync_target"]).set(best_seen_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Send network state information, if any.
|
||||
if let Some(net_state) = net_state {
|
||||
telemetry!(
|
||||
SUBSTRATE_INFO;
|
||||
"system.network_state";
|
||||
"state" => net_state,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user