Additional Metrics collected and exposed via prometheus (#5414)

This PR refactors the metrics measuring and Prometheus exposing entity in sc-service into its own submodule and extends the parameters it exposes by:

- system load average (over one, five and 15min)
- the TCP connection state of the process (lsof), refs #5304
- number of tokio threads
- number of known forks
- counter for items in each unbounded queue (with internal unbounded channels)
- number of file descriptors opened by this process (*nix only at this point)
- number of system threads (*nix only at this point)

refs #4679

Co-authored-by: Max Inden <mail@max-inden.de>
Co-authored-by: Ashley <ashley.ruglys@gmail.com>
This commit is contained in:
Benjamin Kampmann
2020-04-04 15:13:35 +02:00
committed by GitHub
parent 6847f8452e
commit 247822bb33
60 changed files with 1344 additions and 526 deletions
@@ -18,12 +18,13 @@
use crate::protocol::light_client_handler;
use futures::{channel::mpsc, channel::oneshot, prelude::*};
use futures::{channel::oneshot, prelude::*};
use parking_lot::Mutex;
use sc_client_api::{
FetchChecker, Fetcher, RemoteBodyRequest, RemoteCallRequest, RemoteChangesRequest,
RemoteHeaderRequest, RemoteReadChildRequest, RemoteReadRequest, StorageProof, ChangesProof,
};
use sp_utils::mpsc::{tracing_unbounded, TracingUnboundedReceiver, TracingUnboundedSender};
use sp_blockchain::Error as ClientError;
use sp_runtime::traits::{Block as BlockT, Header as HeaderT, NumberFor};
use std::{collections::HashMap, pin::Pin, sync::Arc, task::Context, task::Poll};
@@ -42,10 +43,10 @@ pub struct OnDemand<B: BlockT> {
/// Note that a better alternative would be to use a MPMC queue here, and add a `poll` method
/// from the `OnDemand`. However there exists no popular implementation of MPMC channels in
/// asynchronous Rust at the moment
requests_queue: Mutex<Option<mpsc::UnboundedReceiver<light_client_handler::Request<B>>>>,
requests_queue: Mutex<Option<TracingUnboundedReceiver<light_client_handler::Request<B>>>>,
/// Sending side of `requests_queue`.
requests_send: mpsc::UnboundedSender<light_client_handler::Request<B>>,
requests_send: TracingUnboundedSender<light_client_handler::Request<B>>,
}
/// Dummy implementation of `FetchChecker` that always assumes that responses are bad.
@@ -112,7 +113,7 @@ where
{
/// Creates new on-demand service.
pub fn new(checker: Arc<dyn FetchChecker<B>>) -> Self {
let (requests_send, requests_queue) = mpsc::unbounded();
let (requests_send, requests_queue) = tracing_unbounded("mpsc_ondemand");
let requests_queue = Mutex::new(Some(requests_queue));
OnDemand {
@@ -134,9 +135,9 @@ where
///
/// If this function returns `None`, that means that the receiver has already been extracted in
/// the past, and therefore that something already handles the requests.
pub(crate) fn extract_receiver(
&self,
) -> Option<mpsc::UnboundedReceiver<light_client_handler::Request<B>>> {
pub(crate) fn extract_receiver(&self)
-> Option<TracingUnboundedReceiver<light_client_handler::Request<B>>>
{
self.requests_queue.lock().take()
}
}
+9 -8
View File
@@ -36,7 +36,8 @@ use crate::{
protocol::{self, event::Event, light_client_handler, sync::SyncState, PeerInfo, Protocol},
transport, ReputationChange,
};
use futures::{prelude::*, channel::mpsc};
use futures::prelude::*;
use sp_utils::mpsc::{tracing_unbounded, TracingUnboundedSender, TracingUnboundedReceiver};
use libp2p::swarm::{NetworkBehaviour, SwarmBuilder, SwarmEvent};
use libp2p::{kad::record, Multiaddr, PeerId};
use log::{error, info, trace, warn};
@@ -159,7 +160,7 @@ pub struct NetworkService<B: BlockT + 'static, H: ExHashT> {
/// nodes it should be connected to or not.
peerset: PeersetHandle,
/// Channel that sends messages to the actual worker.
to_worker: mpsc::UnboundedSender<ServiceToWorkerMsg<B, H>>,
to_worker: TracingUnboundedSender<ServiceToWorkerMsg<B, H>>,
/// Marker to pin the `H` generic. Serves no purpose except to not break backwards
/// compatibility.
_marker: PhantomData<H>,
@@ -172,7 +173,7 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
/// for the network processing to advance. From it, you can extract a `NetworkService` using
/// `worker.service()`. The `NetworkService` can be shared through the codebase.
pub fn new(params: Params<B, H>) -> Result<NetworkWorker<B, H>, Error> {
let (to_worker, from_worker) = mpsc::unbounded();
let (to_worker, from_worker) = tracing_unbounded("mpsc_network_worker");
if let Some(ref path) = params.network_config.net_config_path {
fs::create_dir_all(Path::new(path))?;
@@ -550,7 +551,7 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkService<B, H> {
/// The stream never ends (unless the `NetworkWorker` gets shut down).
pub fn event_stream(&self) -> impl Stream<Item = Event> {
// Note: when transitioning to stable futures, remove the `Error` entirely
let (tx, rx) = mpsc::unbounded();
let (tx, rx) = tracing_unbounded("mpsc_network_event_stream");
let _ = self.to_worker.unbounded_send(ServiceToWorkerMsg::EventStream(tx));
rx
}
@@ -770,7 +771,7 @@ enum ServiceToWorkerMsg<B: BlockT, H: ExHashT> {
PutValue(record::Key, Vec<u8>),
AddKnownAddress(PeerId, Multiaddr),
SyncFork(Vec<PeerId>, B::Hash, NumberFor<B>),
EventStream(mpsc::UnboundedSender<Event>),
EventStream(TracingUnboundedSender<Event>),
WriteNotification {
message: Vec<u8>,
engine_id: ConsensusEngineId,
@@ -801,11 +802,11 @@ pub struct NetworkWorker<B: BlockT + 'static, H: ExHashT> {
/// The import queue that was passed as initialization.
import_queue: Box<dyn ImportQueue<B>>,
/// Messages from the `NetworkService` and that must be processed.
from_worker: mpsc::UnboundedReceiver<ServiceToWorkerMsg<B, H>>,
from_worker: TracingUnboundedReceiver<ServiceToWorkerMsg<B, H>>,
/// Receiver for queries from the light client that must be processed.
light_client_rqs: Option<mpsc::UnboundedReceiver<light_client_handler::Request<B>>>,
light_client_rqs: Option<TracingUnboundedReceiver<light_client_handler::Request<B>>>,
/// Senders for events that happen on the network.
event_streams: Vec<mpsc::UnboundedSender<Event>>,
event_streams: Vec<TracingUnboundedSender<Event>>,
/// Prometheus network metrics.
metrics: Option<Metrics>,
/// The `PeerId`'s of all boot nodes.