Additional Metrics collected and exposed via prometheus (#5414)

This PR refactors the metrics measuring and Prometheus exposing entity in sc-service into its own submodule and extends the parameters it exposes by:

- system load average (over one, five and 15min)
- the TCP connection state of the process (lsof), refs #5304
- number of tokio threads
- number of known forks
- counter for items in each unbounded queue (with internal unbounded channels)
- number of file descriptors opened by this process (*nix only at this point)
- number of system threads (*nix only at this point)

refs #4679

Co-authored-by: Max Inden <mail@max-inden.de>
Co-authored-by: Ashley <ashley.ruglys@gmail.com>
This commit is contained in:
Benjamin Kampmann
2020-04-04 15:13:35 +02:00
committed by GitHub
parent 6847f8452e
commit 247822bb33
60 changed files with 1344 additions and 526 deletions
+20
View File
@@ -0,0 +1,20 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
//! Utilities Primitives for Substrate
pub mod metrics;
pub mod mpsc;
+58
View File
@@ -0,0 +1,58 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
//! Metering primitives and globals
use lazy_static::lazy_static;
use prometheus::{
Registry, Error as PrometheusError,
core::{ AtomicU64, GenericGauge, GenericCounter },
};
#[cfg(features = "metered")]
use prometheus::{core::GenericGaugeVec, Opts};
lazy_static! {
pub static ref TOKIO_THREADS_TOTAL: GenericCounter<AtomicU64> = GenericCounter::new(
"tokio_threads_total", "Total number of threads created"
).expect("Creating of statics doesn't fail. qed");
pub static ref TOKIO_THREADS_ALIVE: GenericGauge<AtomicU64> = GenericGauge::new(
"tokio_threads_alive", "Number of threads alive right now"
).expect("Creating of statics doesn't fail. qed");
}
#[cfg(features = "metered")]
lazy_static! {
pub static ref UNBOUNDED_CHANNELS_COUNTER : GenericGaugeVec<AtomicU64> = GenericGaugeVec::new(
Opts::new("unbounded_channel_len", "Items in each mpsc::unbounded instance"),
&["entity", "action"] // 'name of channel, send|received|dropped
).expect("Creating of statics doesn't fail. qed");
}
/// Register the statics to report to registry
pub fn register_globals(registry: &Registry) -> Result<(), PrometheusError> {
registry.register(Box::new(TOKIO_THREADS_ALIVE.clone()))?;
registry.register(Box::new(TOKIO_THREADS_TOTAL.clone()))?;
#[cfg(features = "metered")]
registry.register(Box::new(UNBOUNDED_CHANNELS_COUNTER.clone()))?;
Ok(())
}
+232
View File
@@ -0,0 +1,232 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
//! Features to meter unbounded channels
#[cfg(not(features = "metered"))]
mod inner {
// just aliased, non performance implications
use futures::channel::mpsc::{self, UnboundedReceiver, UnboundedSender};
pub type TracingUnboundedSender<T> = UnboundedSender<T>;
pub type TracingUnboundedReceiver<T> = UnboundedReceiver<T>;
/// Alias `mpsc::unbounded`
pub fn tracing_unbounded<T>(_key: &'static str) ->(TracingUnboundedSender<T>, TracingUnboundedReceiver<T>) {
mpsc::unbounded()
}
}
#[cfg(features = "metered")]
mod inner {
//tracing implementation
use futures::channel::mpsc::{self,
UnboundedReceiver, UnboundedSender,
TryRecvError, TrySendError, SendError
};
use futures::{sink::Sink, task::{Poll, Context}, stream::Stream};
use std::pin::Pin;
use crate::metrics::UNBOUNDED_CHANNELS_COUNTER;
/// Wrapper Type around `UnboundedSender` that increases the global
/// measure when a message is added
#[derive(Debug, Clone)]
pub struct TracingUnboundedSender<T>(&'static str, UnboundedSender<T>);
/// Wrapper Type around `UnboundedReceiver` that decreases the global
/// measure when a message is polled
#[derive(Debug)]
pub struct TracingUnboundedReceiver<T>(&'static str, UnboundedReceiver<T>);
/// Wrapper around `mpsc::unbounded` that tracks the in- and outflow via
/// `UNBOUNDED_CHANNELS_COUNTER`
pub fn tracing_unbounded<T>(key: &'static str) ->(TracingUnboundedSender<T>, TracingUnboundedReceiver<T>) {
let (s, r) = mpsc::unbounded();
(TracingUnboundedSender(key.clone(), s), TracingUnboundedReceiver(key,r))
}
impl<T> TracingUnboundedSender<T> {
/// Proxy function to mpsc::UnboundedSender
pub fn poll_ready(&self, ctx: &mut Context) -> Poll<Result<(), SendError>> {
self.1.poll_ready(ctx)
}
/// Proxy function to mpsc::UnboundedSender
pub fn is_closed(&self) -> bool {
self.1.is_closed()
}
/// Proxy function to mpsc::UnboundedSender
pub fn close_channel(&self) {
self.1.close_channel()
}
/// Proxy function to mpsc::UnboundedSender
pub fn disconnect(&mut self) {
self.1.disconnect()
}
/// Proxy function to mpsc::UnboundedSender
pub fn start_send(&mut self, msg: T) -> Result<(), SendError> {
self.1.start_send(msg)
}
/// Proxy function to mpsc::UnboundedSender
pub fn unbounded_send(&self, msg: T) -> Result<(), TrySendError<T>> {
self.1.unbounded_send(msg).map(|s|{
UNBOUNDED_CHANNELS_COUNTER.with_label_values(&[self.0, &"send"]).incr();
s
})
}
/// Proxy function to mpsc::UnboundedSender
pub fn same_receiver(&self, other: &UnboundedSender<T>) -> bool {
self.1.same_receiver(other)
}
}
impl<T> TracingUnboundedReceiver<T> {
fn consume(&mut self) {
// consume all items, make sure to reflect the updated count
let mut count = 0;
while let Ok(Some(..)) = self.try_next() {
count += 1;
}
// and discount the messages
if count > 0 {
UNBOUNDED_CHANNELS_COUNTER.with_label_values(&[self.0, &"dropped"]).incr_by(count);
}
}
/// Proxy function to mpsc::UnboundedReceiver
/// that consumes all messages first and updates the counter
pub fn close(&mut self) {
self.consume();
self.1.close()
}
/// Proxy function to mpsc::UnboundedReceiver
/// that discounts the messages taken out
pub fn try_next(&mut self) -> Result<Option<T>, TryRecvError> {
self.1.try_next().map(|s| {
if s.is_some() {
UNBOUNDED_CHANNELS_COUNTER.with_label_values(&[self.0, &"received"]).incr();
}
s
})
}
}
impl<T> Drop for TracingUnboundedReceiver<T> {
fn drop(&mut self) {
self.consume();
}
}
impl<T> Unpin for TracingUnboundedReceiver<T> {}
impl<T> Stream for TracingUnboundedReceiver<T> {
type Item = T;
fn poll_next(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<T>> {
let s = self.get_mut();
match Pin::new(&mut s.1).poll_next(cx) {
Poll::Ready(msg) => {
if msg.is_some() {
UNBOUNDED_CHANNELS_COUNTER.with_label_values(&[self.0, "received"]).incr();
}
Poll::Ready(msg)
}
Poll::Pending => {
Poll::Pending
}
}
}
}
impl<T> Sink<T> for TracingUnboundedSender<T> {
type Error = SendError;
fn poll_ready(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<(), Self::Error>> {
TracingUnboundedSender::poll_ready(&*self, cx)
}
fn start_send(
mut self: Pin<&mut Self>,
msg: T,
) -> Result<(), Self::Error> {
TracingUnboundedSender::start_send(&mut *self, msg)
}
fn poll_flush(
self: Pin<&mut Self>,
_: &mut Context<'_>,
) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn poll_close(
mut self: Pin<&mut Self>,
_: &mut Context<'_>,
) -> Poll<Result<(), Self::Error>> {
self.disconnect();
Poll::Ready(Ok(()))
}
}
impl<T> Sink<T> for &TracingUnboundedSender<T> {
type Error = SendError;
fn poll_ready(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<(), Self::Error>> {
TracingUnboundedSender::poll_ready(*self, cx)
}
fn start_send(self: Pin<&mut Self>, msg: T) -> Result<(), Self::Error> {
self.unbounded_send(msg)
.map_err(TrySendError::into_send_error)
}
fn poll_flush(
self: Pin<&mut Self>,
_: &mut Context<'_>,
) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn poll_close(
self: Pin<&mut Self>,
_: &mut Context<'_>,
) -> Poll<Result<(), Self::Error>> {
self.close_channel();
Poll::Ready(Ok(()))
}
}
}
pub use inner::{tracing_unbounded, TracingUnboundedSender, TracingUnboundedReceiver};