Introduce Prometheus metric endpoint replacing Grafana endpoint (#4981)

* Refactor rebase master prometheus_v0.3

* Milestone1: Final Version of v0.3

* no-std or warm compatibility issues, grapana-data -source code reference and correction,applicable

* Cargo.lock paritytech/master rebase

* prometheus networking.rs del, grafana-data-source networking.rs pub edit and note

* chore: reflect various feedback

* Spaces to tabs.

* Replace grafana and tidy

* Add generics

* Add photo back

* Re-fix spaces in primitives/consensus/babe/src/inherents.rs

* Refactor rebase master prometheus_v0.3

* Milestone1: Final Version of v0.3

* no-std or warm compatibility issues, grapana-data -source code reference and correction,applicable

* prometheus networking.rs del, grafana-data-source networking.rs pub edit and note

* chore: reflect various feedback

* Replace grafana and tidy

* Add generics

* Add photo back

* Re-fix spaces in primitives/consensus/babe/src/inherents.rs

* chore: revert this file back to paritytech/master inherents.rs.

* Add newline at EOF

* Tidy

* Use local registry

* fix typo

Co-Authored-By: Max Inden <mail@max-inden.de>

* chore:  Apply review feedback

* endpoint -> exporter

* fix readme

* Remove lazy_static, use ServiceMetrics struct instead

* Switch to using GaugeVecs

* chore: without nightly , edit README

* block_height -> block_height_number

* Switch to a ready_transactions_number gauge

* Update utils/prometheus/src/lib.rs

Co-Authored-By: Max Inden <mail@max-inden.de>

* no-prometheus flag add

* /metrics url Input check

* remove prometheus in Tracing

* remove prometheus in Tracing

* chore: master code rebase edit

* gitlab-check-web-wasm edit code

* From:from and cargo.lock update

* with_prometheus_registry add background_tasks

* utils/prometheus/src/lib.rs: Restructure #[cfg] for wasm without hyper

Given that Hyper is not compatible with WASM targets it needs to be
excluded from WASM builds. Instead of introducing #[cfg] lines
throughout the crate, this patch splits the crate into two: known_os and
unknown_os (WASM).

* utils/prometheus/src/lib.rs: Feature gate known_os module

* client/cli/src/lib.rs: Re-add newline at end of file

Co-authored-by: JeseonLEE <zeroday26@gmail.com>
Co-authored-by: Gavin Wood <github@gavwood.com>
Co-authored-by: Ashley <ashley.ruglys@gmail.com>
Co-authored-by: Hyungsuk Kang <hskang9@gmail.com>
This commit is contained in:
Max Inden
2020-02-19 15:36:24 +01:00
committed by GitHub
parent e417f986be
commit d8230ecf4b
22 changed files with 409 additions and 648 deletions
@@ -1,154 +0,0 @@
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use std::collections::HashMap;
use std::convert::TryFrom;
use crate::Error;
pub struct Database {
base_timestamp: i64,
storage: HashMap<String, Vec<Datapoint>>
}
impl Database {
/// Create a new Database.
pub fn new() -> Self {
Self {
base_timestamp: now_millis(),
storage: HashMap::new()
}
}
/// Produce an iterator for keys starting with a base string.
pub fn keys_starting_with<'a>(&'a self, base: &'a str) -> impl Iterator<Item = String> + 'a {
self.storage.keys()
.filter(move |key| key.starts_with(base))
.cloned()
}
/// Select `max_datapoints` datapoints that have been added between `from` and `to`.
pub fn datapoints_between(&self, key: &str, from: i64, to: i64, max_datapoints: usize) -> Option<Vec<(f32, i64)>> {
self.storage.get(key)
.map(|vec| {
let from = find_index(vec, self.base_timestamp, from);
let to = find_index(vec, self.base_timestamp, to);
let slice = &vec[from .. to];
if max_datapoints == 0 {
Vec::new()
} else if max_datapoints >= slice.len() {
// Just convert the slice as-is
slice.iter()
.map(|dp| dp.make_absolute(self.base_timestamp))
.collect()
} else {
// We have more datapoints than we need, so we need to skip some
(0 .. max_datapoints - 1)
.map(|i| &slice[i * slice.len() / (max_datapoints - 1)])
.chain(slice.last())
.map(|dp| dp.make_absolute(self.base_timestamp))
.collect()
}
})
}
/// Push a new datapoint. Will error if the base timestamp hasn't been updated in `2^32`
/// milliseconds (49 days).
pub fn push(&mut self, key: &str, value: f32) -> Result<(), Error> {
self.storage.entry(key.into())
.or_insert_with(Vec::new)
.push(Datapoint::new(self.base_timestamp, value)?);
Ok(())
}
/// Set a new base timestamp, and remove metrics older than this new timestamp. Errors if the
/// difference between timestamps is greater than `2^32` milliseconds (49 days).
pub fn truncate(&mut self, new_base_timestamp: i64) -> Result<(), Error> {
// Ensure that the new base is older.
if self.base_timestamp >= new_base_timestamp {
return Ok(());
}
// If the old base timestamp was too long ago, the
let delta = u32::try_from(new_base_timestamp - self.base_timestamp)
.map_err(Error::Timestamp)?;
for metric in self.storage.values_mut() {
// Find the index of the oldest allowed timestamp and cut out all those before it.
let index = find_index(&metric, self.base_timestamp, new_base_timestamp);
*metric = metric.iter_mut()
.skip(index)
.map(|dp| {
dp.delta_timestamp -= delta;
*dp
})
.collect();
}
self.base_timestamp = new_base_timestamp;
Ok(())
}
}
#[derive(Clone, Copy)]
struct Datapoint {
delta_timestamp: u32,
value: f32
}
impl Datapoint {
fn new(base_timestamp: i64, value: f32) -> Result<Self, Error> {
Ok(Self {
delta_timestamp: u32::try_from(now_millis() - base_timestamp)
.map_err(Error::Timestamp)?,
value
})
}
fn make_absolute(self, base_timestamp: i64) -> (f32, i64) {
(self.value, base_timestamp + self.delta_timestamp as i64)
}
}
fn find_index(slice: &[Datapoint], base_timestamp: i64, timestamp: i64) -> usize {
slice.binary_search_by_key(&timestamp, |datapoint| {
base_timestamp + datapoint.delta_timestamp as i64
}).unwrap_or_else(|index| index)
}
/// Get the current unix timestamp in milliseconds.
fn now_millis() -> i64 {
chrono::Utc::now().timestamp_millis()
}
#[test]
fn test() {
let mut database = Database::new();
database.push("test", 1.0).unwrap();
database.push("test", 2.5).unwrap();
database.push("test", 2.0).unwrap();
database.push("test 2", 1.0).unwrap();
let mut keys: Vec<_> = database.keys_starting_with("test").collect();
keys.sort();
assert_eq!(keys, ["test", "test 2"]);
assert_eq!(database.keys_starting_with("test ").collect::<Vec<_>>(), ["test 2"]);
}
@@ -1,100 +0,0 @@
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
//! [Grafana] data source server
//!
//! To display node statistics with [Grafana], this module exposes a `run_server` function that
//! starts up a HTTP server that conforms to the [`grafana-json-data-source`] API. The
//! `record_metrics` macro can be used to pass metrics to this server.
//!
//! [Grafana]: https://grafana.com/
//! [`grafana-json-data-source`]: https://github.com/simPod/grafana-json-datasource
#![warn(missing_docs)]
use lazy_static::lazy_static;
use parking_lot::RwLock;
mod types;
mod server;
#[cfg(not(target_os = "unknown"))]
mod networking;
mod database;
use database::Database;
pub use server::run_server;
use std::num::TryFromIntError;
lazy_static! {
// The `RwLock` wrapping the metrics database.
static ref DATABASE: RwLock<Database> = RwLock::new(Database::new());
}
/// Write metrics to `METRICS`.
#[macro_export]
macro_rules! record_metrics(
($($key:expr => $value:expr,)*) => {
if cfg!(not(target_os = "unknown")) {
$crate::record_metrics_slice(&[
$( ($key, $value as f32), )*
])
} else {
Ok(())
}
}
);
/// Write metrics to `METRICS` as a slice. Intended to be only used via `record_metrics!`.
pub fn record_metrics_slice(metrics: &[(&str, f32)]) -> Result<(), Error> {
let mut database = crate::DATABASE.write();
for &(key, value) in metrics.iter() {
database.push(key, value)?;
}
Ok(())
}
/// Error type that can be returned by either `record_metrics` or `run_server`.
#[derive(Debug, derive_more::Display, derive_more::From)]
pub enum Error {
/// Hyper internal error.
#[cfg(not(target_os = "unknown"))]
Hyper(hyper::Error),
/// Http request error.
#[cfg(not(target_os = "unknown"))]
Http(hyper::http::Error),
/// Serialization/deserialization error.
Serde(serde_json::Error),
/// Timestamp error.
Timestamp(TryFromIntError),
/// i/o error.
Io(std::io::Error)
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
#[cfg(not(target_os = "unknown"))]
Error::Hyper(error) => Some(error),
#[cfg(not(target_os = "unknown"))]
Error::Http(error) => Some(error),
Error::Serde(error) => Some(error),
Error::Timestamp(error) => Some(error),
Error::Io(error) => Some(error)
}
}
}
@@ -1,164 +0,0 @@
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use serde::{Serialize, de::DeserializeOwned};
use chrono::{Duration, Utc};
use futures_util::{FutureExt, TryStreamExt, future::{Future, select, Either}};
use futures_timer::Delay;
use crate::{DATABASE, Error, types::{Target, Query, TimeseriesData, Range}};
#[cfg(not(target_os = "unknown"))]
use hyper::{Body, Request, Response, header, service::{service_fn, make_service_fn}, Server};
#[cfg(not(target_os = "unknown"))]
async fn api_response(req: Request<Body>) -> Result<Response<Body>, Error> {
match req.uri().path() {
"/search" => {
map_request_to_response(req, |target: Target| {
// Filter and return metrics relating to the target
DATABASE.read()
.keys_starting_with(&target.target)
.collect::<Vec<_>>()
}).await
},
"/query" => {
map_request_to_response(req, |query: Query| {
let metrics = DATABASE.read();
let Query {
range: Range { from, to },
max_datapoints, ..
} = query;
// Return timeseries data related to the specified metrics
query.targets.iter()
.map(|target| {
let datapoints = metrics.datapoints_between(&target.target, from, to, max_datapoints)
.unwrap_or_else(Vec::new);
TimeseriesData {
target: target.target.clone(), datapoints
}
})
.collect::<Vec<_>>()
}).await
},
_ => Ok(Response::new(Body::empty())),
}
}
#[cfg(not(target_os = "unknown"))]
async fn map_request_to_response<Req, Res, T>(req: Request<Body>, transformation: T) -> Result<Response<Body>, Error>
where
Req: DeserializeOwned,
Res: Serialize,
T: Fn(Req) -> Res + Send + Sync + 'static
{
let body = req.into_body()
.map_ok(|bytes| bytes.to_vec())
.try_concat()
.await
.map_err(Error::Hyper)?;
let req = serde_json::from_slice(body.as_ref()).map_err(Error::Serde)?;
let res = transformation(req);
let string = serde_json::to_string(&res).map_err(Error::Serde)?;
Response::builder()
.header(header::CONTENT_TYPE, "application/json")
.body(Body::from(string))
.map_err(Error::Http)
}
/// Given that we're not using hyper's tokio feature, we need to define out own executor.
#[derive(Clone)]
pub struct Executor;
#[cfg(not(target_os = "unknown"))]
impl<T> hyper::rt::Executor<T> for Executor
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
fn execute(&self, future: T) {
async_std::task::spawn(future);
}
}
/// Start the data source server.
#[cfg(not(target_os = "unknown"))]
pub async fn run_server(mut address: std::net::SocketAddr) -> Result<(), Error> {
use async_std::{net, io};
use crate::networking::Incoming;
let listener = loop {
let listener = net::TcpListener::bind(&address).await;
match listener {
Ok(listener) => {
log::info!("Grafana data source server started at {}", address);
break listener
},
Err(err) => match err.kind() {
io::ErrorKind::AddrInUse | io::ErrorKind::PermissionDenied if address.port() != 0 => {
log::warn!(
"Unable to bind grafana data source server to {}. Trying random port.",
address
);
address.set_port(0);
continue;
},
_ => return Err(err.into()),
}
}
};
let service = make_service_fn(|_| {
async {
Ok::<_, Error>(service_fn(api_response))
}
});
let server = Server::builder(Incoming(listener.incoming()))
.executor(Executor)
.serve(service)
.boxed();
let every = std::time::Duration::from_secs(24 * 3600);
let clean = clean_up(every, Duration::weeks(1))
.boxed();
let result = match select(server, clean).await {
Either::Left((result, _)) => result.map_err(Into::into),
Either::Right((result, _)) => result
};
result
}
#[cfg(target_os = "unknown")]
pub async fn run_server(_: std::net::SocketAddr) -> Result<(), Error> {
Ok(())
}
/// Periodically remove old metrics.
async fn clean_up(every: std::time::Duration, before: Duration) -> Result<(), Error> {
loop {
Delay::new(every).await;
let oldest_allowed = (Utc::now() - before).timestamp_millis();
DATABASE.write().truncate(oldest_allowed)?;
}
}
@@ -1,50 +0,0 @@
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)]
pub struct Target {
pub target: String,
}
#[derive(Serialize, Deserialize)]
pub struct Query {
#[serde(rename = "maxDataPoints")]
pub max_datapoints: usize,
pub targets: Vec<Target>,
pub range: Range,
}
#[derive(Serialize, Deserialize)]
pub struct Range {
#[serde(deserialize_with = "date_to_timestamp_ms")]
pub from: i64,
#[serde(deserialize_with = "date_to_timestamp_ms")]
pub to: i64,
}
// Deserialize a timestamp via a `DateTime<Utc>`
fn date_to_timestamp_ms<'de, D: serde::Deserializer<'de>>(timestamp: D) -> Result<i64, D::Error> {
Deserialize::deserialize(timestamp)
.map(|date: chrono::DateTime<chrono::Utc>| date.timestamp_millis())
}
#[derive(Serialize, Deserialize)]
pub struct TimeseriesData {
pub target: String,
pub datapoints: Vec<(f32, i64)>
}
@@ -1,13 +0,0 @@
[package]
description = "Grafana data source server test"
name = "grafana-data-source-test"
version = "2.0.0"
license = "GPL-3.0"
authors = ["Parity Technologies <admin@parity.io>"]
edition = "2018"
[dependencies]
grafana-data-source = { version = "0.8", path = ".." }
futures = "0.3"
futures-timer = "3.0.1"
rand = "0.7"
@@ -1,44 +0,0 @@
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use grafana_data_source::{run_server, record_metrics};
use std::time::Duration;
use rand::Rng;
use futures::{future::join, executor};
async fn randomness() {
loop {
futures_timer::Delay::new(Duration::from_secs(1)).await;
let random = rand::thread_rng().gen_range(0.0, 1000.0);
let result = record_metrics!(
"random data" => random,
"random^2" => random * random,
);
if let Err(error) = result {
eprintln!("{}", error);
}
}
}
fn main() {
executor::block_on(join(
run_server("127.0.0.1:9955".parse().unwrap()),
randomness()
)).0.unwrap();
}
@@ -1,6 +1,6 @@
[package]
description = "Grafana data source server"
name = "grafana-data-source"
description = "Prometheus exporter server"
name = "prometheus-exporter"
version = "0.8.0"
license = "GPL-3.0"
authors = ["Parity Technologies <admin@parity.io>"]
@@ -8,13 +8,8 @@ edition = "2018"
[dependencies]
log = "0.4.8"
prometheus = "0.7"
futures-util = { version = "0.3.1", default-features = false, features = ["io"] }
serde_json = "1"
serde = { version = "1", features = ["derive"] }
chrono = { version = "0.4", features = ["serde"] }
lazy_static = "1.4"
parking_lot = "0.10.0"
futures-timer = "3.0.1"
derive_more = "0.99"
[target.'cfg(not(target_os = "unknown"))'.dependencies]
+16
View File
@@ -0,0 +1,16 @@
# Substrate Prometheus Exporter
## Introduction
[Prometheus](https://prometheus.io/) is one of the most widely used monitoring tools for managing highly available services supported by [Cloud Native Computing Foundation](https://www.cncf.io/). By providing Prometheus metrics in Substrate, node operators can easily adopt widely used display/alert tools such
as [Grafana](https://grafana.com/) and [Alertmanager](https://prometheus.io/docs/alerting/alertmanager/). Easy access to such monitoring tools will benefit parachain developers/operators and validators to have much higher availability of their services.
Metrics will be served under `/metrics` on TCP port 9615 by default.
## Quick Start
1. From the root of the repository start Substrate `cargo run --release`.
2. In another terminal run `curl localhost:9615/metrics` to retrieve the metrics.
To learn how to configure Prometheus see the Prometheus [Getting Started](https://prometheus.io/docs/prometheus/latest/getting_started/) guide.
+144
View File
@@ -0,0 +1,144 @@
// Copyright 2019 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use futures_util::{FutureExt, future::Future};
pub use prometheus::{
Registry, Error as PrometheusError, Opts,
core::{
GenericGauge as Gauge, GenericCounter as Counter,
GenericGaugeVec as GaugeVec, GenericCounterVec as CounterVec,
AtomicF64 as F64, AtomicI64 as I64, AtomicU64 as U64,
}
};
use prometheus::{Encoder, TextEncoder, core::Collector};
use std::net::SocketAddr;
#[cfg(not(target_os = "unknown"))]
mod networking;
#[cfg(target_os = "unknown")]
pub use unknown_os::init_prometheus;
#[cfg(not(target_os = "unknown"))]
pub use known_os::init_prometheus;
pub fn register<T: Clone + Collector + 'static>(metric: T, registry: &Registry) -> Result<T, PrometheusError> {
registry.register(Box::new(metric.clone()))?;
Ok(metric)
}
// On WASM `init_prometheus` becomes a no-op.
#[cfg(target_os = "unknown")]
mod unknown_os {
use super::*;
pub enum Error {}
pub async fn init_prometheus(_: SocketAddr, _registry: Registry) -> Result<(), Error> {
Ok(())
}
}
#[cfg(not(target_os = "unknown"))]
mod known_os {
use super::*;
use hyper::http::StatusCode;
use hyper::{Server, Body, Request, Response, service::{service_fn, make_service_fn}};
#[derive(Debug, derive_more::Display, derive_more::From)]
pub enum Error {
/// Hyper internal error.
Hyper(hyper::Error),
/// Http request error.
Http(hyper::http::Error),
/// i/o error.
Io(std::io::Error),
#[display(fmt = "Prometheus exporter port {} already in use.", _0)]
PortInUse(SocketAddr)
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::Hyper(error) => Some(error),
Error::Http(error) => Some(error),
Error::Io(error) => Some(error),
Error::PortInUse(_) => None
}
}
}
async fn request_metrics(req: Request<Body>, registry: Registry) -> Result<Response<Body>, Error> {
if req.uri().path() == "/metrics" {
let metric_families = registry.gather();
let mut buffer = vec![];
let encoder = TextEncoder::new();
encoder.encode(&metric_families, &mut buffer).unwrap();
Response::builder().status(StatusCode::OK)
.header("Content-Type", encoder.format_type())
.body(Body::from(buffer))
.map_err(Error::Http)
} else {
Response::builder().status(StatusCode::NOT_FOUND)
.body(Body::from("Not found."))
.map_err(Error::Http)
}
}
#[derive(Clone)]
pub struct Executor;
impl<T> hyper::rt::Executor<T> for Executor
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
fn execute(&self, future: T) {
async_std::task::spawn(future);
}
}
/// Initializes the metrics context, and starts an HTTP server
/// to serve metrics.
pub async fn init_prometheus(prometheus_addr: SocketAddr, registry: Registry) -> Result<(), Error>{
use networking::Incoming;
let listener = async_std::net::TcpListener::bind(&prometheus_addr)
.await
.map_err(|_| Error::PortInUse(prometheus_addr))?;
log::info!("Prometheus server started at {}", prometheus_addr);
let service = make_service_fn(move |_| {
let registry = registry.clone();
async move {
Ok::<_, hyper::Error>(service_fn(move |req: Request<Body>| {
request_metrics(req, registry.clone())
}))
}
});
let server = Server::builder(Incoming(listener.incoming()))
.executor(Executor)
.serve(service)
.boxed();
let result = server.await.map_err(Into::into);
result
}
}