initial prometheus metrics (#1536)

* service-new: cosmetic changes

* overseer: draft of prometheus metrics

* metrics: update active_leaves metrics

* metrics: extract into functions

* metrics: resolve XXX

* metrics: it's ugly, but it works

* Bump Substrate

* metrics: move a bunch of code around

* Bumb substrate again

* metrics: fix a warning

* fix a warning in runtime

* metrics: statements signed

* metrics: statements impl RegisterMetrics

* metrics: refactor Metrics trait

* metrics: add Metrics assoc type to JobTrait

* metrics: move Metrics trait to util

* metrics: fix overseer

* metrics: fix backing

* metrics: fix candidate validation

* metrics: derive Default

* metrics: docs

* metrics: add stubs for other subsystems

* metrics: add more stubs and fix compilation

* metrics: fix doctest

* metrics: move to subsystem

* metrics: fix candidate validation

* metrics: bitfield signing

* metrics: av store

* metrics: chain API

* metrics: runtime API

* metrics: stub for avad

* metrics: candidates seconded

* metrics: ok I gave up

* metrics: provisioner

* metrics: remove a clone by requiring Metrics: Sync

* metrics: YAGNI

* metrics: remove another TODO

* metrics: for later

* metrics: add parachain_ prefix

* metrics: s/signed_statement/signed_statements

* utils: add a comment for job metrics

* metrics: address review comments

* metrics: oops

* metrics: make sure to save files before commit 😅

* use _total suffix for requests metrics

Co-authored-by: Max Inden <mail@max-inden.de>

* metrics: add tests for overseer

* update Cargo.lock

* overseer: add a test for CollationGeneration

* collation-generation: impl metrics

* collation-generation: use kebab-case for name

* collation-generation: add a constructor

Co-authored-by: Gav Wood <gavin@parity.io>
Co-authored-by: Ashley Ruglys <ashley.ruglys@gmail.com>
Co-authored-by: Max Inden <mail@max-inden.de>
This commit is contained in:
Andronik Ordian
2020-08-18 11:18:54 +02:00
committed by GitHub
parent ae37a00c17
commit e7ead40255
20 changed files with 742 additions and 106 deletions
+55 -11
View File
@@ -34,6 +34,7 @@ use polkadot_primitives::v1::{
};
use polkadot_subsystem::{
FromOverseer, SubsystemError, Subsystem, SubsystemContext, SpawnedSubsystem,
metrics::{self, prometheus},
};
use polkadot_subsystem::messages::AvailabilityStoreMessage;
@@ -59,6 +60,7 @@ enum Error {
/// An implementation of the Availability Store subsystem.
pub struct AvailabilityStoreSubsystem {
inner: Arc<dyn KeyValueDB>,
metrics: Metrics,
}
fn available_data_key(candidate_hash: &Hash) -> Vec<u8> {
@@ -85,7 +87,7 @@ pub struct Config {
impl AvailabilityStoreSubsystem {
/// Create a new `AvailabilityStoreSubsystem` with a given config on disk.
pub fn new_on_disk(config: Config) -> io::Result<Self> {
pub fn new_on_disk(config: Config, metrics: Metrics) -> io::Result<Self> {
let mut db_config = DatabaseConfig::with_columns(columns::NUM_COLUMNS);
if let Some(cache_size) = config.cache_size {
@@ -106,6 +108,7 @@ impl AvailabilityStoreSubsystem {
Ok(Self {
inner: Arc::new(db),
metrics,
})
}
@@ -113,6 +116,7 @@ impl AvailabilityStoreSubsystem {
fn new_in_memory(inner: Arc<dyn KeyValueDB>) -> Self {
Self {
inner,
metrics: Metrics(None),
}
}
}
@@ -130,7 +134,7 @@ where
Ok(FromOverseer::Signal(Conclude)) => break,
Ok(FromOverseer::Signal(_)) => (),
Ok(FromOverseer::Communication { msg }) => {
process_message(&subsystem.inner, msg)?;
process_message(&subsystem.inner, &subsystem.metrics, msg)?;
}
Err(_) => break,
}
@@ -142,7 +146,7 @@ where
Ok(())
}
fn process_message(db: &Arc<dyn KeyValueDB>, msg: AvailabilityStoreMessage) -> Result<(), Error> {
fn process_message(db: &Arc<dyn KeyValueDB>, metrics: &Metrics, msg: AvailabilityStoreMessage) -> Result<(), Error> {
use AvailabilityStoreMessage::*;
match msg {
QueryAvailableData(hash, tx) => {
@@ -152,10 +156,10 @@ fn process_message(db: &Arc<dyn KeyValueDB>, msg: AvailabilityStoreMessage) -> R
tx.send(available_data(db, &hash).is_some()).map_err(|_| oneshot::Canceled)?;
}
QueryChunk(hash, id, tx) => {
tx.send(get_chunk(db, &hash, id)?).map_err(|_| oneshot::Canceled)?;
tx.send(get_chunk(db, &hash, id, metrics)?).map_err(|_| oneshot::Canceled)?;
}
QueryChunkAvailability(hash, id, tx) => {
tx.send(get_chunk(db, &hash, id)?.is_some()).map_err(|_| oneshot::Canceled)?;
tx.send(get_chunk(db, &hash, id, metrics)?.is_some()).map_err(|_| oneshot::Canceled)?;
}
StoreChunk(hash, id, chunk, tx) => {
match store_chunk(db, &hash, id, chunk) {
@@ -169,7 +173,7 @@ fn process_message(db: &Arc<dyn KeyValueDB>, msg: AvailabilityStoreMessage) -> R
}
}
StoreAvailableData(hash, id, n_validators, av_data, tx) => {
match store_available_data(db, &hash, id, n_validators, av_data) {
match store_available_data(db, &hash, id, n_validators, av_data, metrics) {
Err(e) => {
tx.send(Err(())).map_err(|_| oneshot::Canceled)?;
return Err(e);
@@ -194,11 +198,12 @@ fn store_available_data(
id: Option<ValidatorIndex>,
n_validators: u32,
available_data: AvailableData,
metrics: &Metrics,
) -> Result<(), Error> {
let mut tx = DBTransaction::new();
if let Some(index) = id {
let chunks = get_chunks(&available_data, n_validators as usize)?;
let chunks = get_chunks(&available_data, n_validators as usize, metrics)?;
store_chunk(db, candidate_hash, n_validators, chunks[index as usize].clone())?;
}
@@ -231,7 +236,7 @@ fn store_chunk(db: &Arc<dyn KeyValueDB>, candidate_hash: &Hash, _n_validators: u
Ok(())
}
fn get_chunk(db: &Arc<dyn KeyValueDB>, candidate_hash: &Hash, index: u32)
fn get_chunk(db: &Arc<dyn KeyValueDB>, candidate_hash: &Hash, index: u32, metrics: &Metrics)
-> Result<Option<ErasureChunk>, Error>
{
if let Some(chunk) = query_inner(
@@ -242,7 +247,7 @@ fn get_chunk(db: &Arc<dyn KeyValueDB>, candidate_hash: &Hash, index: u32)
}
if let Some(data) = available_data(db, candidate_hash) {
let mut chunks = get_chunks(&data.data, data.n_validators as usize)?;
let mut chunks = get_chunks(&data.data, data.n_validators as usize, metrics)?;
let desired_chunk = chunks.get(index as usize).cloned();
for chunk in chunks.drain(..) {
store_chunk(db, candidate_hash, data.n_validators, chunk)?;
@@ -271,6 +276,8 @@ impl<Context> Subsystem<Context> for AvailabilityStoreSubsystem
where
Context: SubsystemContext<Message=AvailabilityStoreMessage>,
{
type Metrics = Metrics;
fn start(self, ctx: Context) -> SpawnedSubsystem {
let future = Box::pin(async move {
if let Err(e) = run(self, ctx).await {
@@ -285,8 +292,9 @@ impl<Context> Subsystem<Context> for AvailabilityStoreSubsystem
}
}
fn get_chunks(data: &AvailableData, n_validators: usize) -> Result<Vec<ErasureChunk>, Error> {
fn get_chunks(data: &AvailableData, n_validators: usize, metrics: &Metrics) -> Result<Vec<ErasureChunk>, Error> {
let chunks = erasure::obtain_chunks_v1(n_validators, data)?;
metrics.on_chunks_received(chunks.len());
let branches = erasure::branches(chunks.as_ref());
Ok(chunks
@@ -302,6 +310,41 @@ fn get_chunks(data: &AvailableData, n_validators: usize) -> Result<Vec<ErasureCh
)
}
#[derive(Clone)]
struct MetricsInner {
received_availability_chunks_total: prometheus::Counter<prometheus::U64>,
}
/// Availability metrics.
#[derive(Default, Clone)]
pub struct Metrics(Option<MetricsInner>);
impl Metrics {
fn on_chunks_received(&self, count: usize) {
if let Some(metrics) = &self.0 {
use core::convert::TryFrom as _;
// assume usize fits into u64
let by = u64::try_from(count).unwrap_or_default();
metrics.received_availability_chunks_total.inc_by(by);
}
}
}
impl metrics::Metrics for Metrics {
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
let metrics = MetricsInner {
received_availability_chunks_total: prometheus::register(
prometheus::Counter::new(
"parachain_received_availability_chunks_total",
"Number of availability chunks received.",
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -501,7 +544,8 @@ mod tests {
omitted_validation,
};
let chunks_expected = get_chunks(&available_data, n_validators as usize).unwrap();
let no_metrics = Metrics(None);
let chunks_expected = get_chunks(&available_data, n_validators as usize, &no_metrics).unwrap();
let (tx, rx) = oneshot::channel();
let block_msg = AvailabilityStoreMessage::StoreAvailableData(