initial prometheus metrics (#1536)

* service-new: cosmetic changes

* overseer: draft of prometheus metrics

* metrics: update active_leaves metrics

* metrics: extract into functions

* metrics: resolve XXX

* metrics: it's ugly, but it works

* Bump Substrate

* metrics: move a bunch of code around

* Bumb substrate again

* metrics: fix a warning

* fix a warning in runtime

* metrics: statements signed

* metrics: statements impl RegisterMetrics

* metrics: refactor Metrics trait

* metrics: add Metrics assoc type to JobTrait

* metrics: move Metrics trait to util

* metrics: fix overseer

* metrics: fix backing

* metrics: fix candidate validation

* metrics: derive Default

* metrics: docs

* metrics: add stubs for other subsystems

* metrics: add more stubs and fix compilation

* metrics: fix doctest

* metrics: move to subsystem

* metrics: fix candidate validation

* metrics: bitfield signing

* metrics: av store

* metrics: chain API

* metrics: runtime API

* metrics: stub for avad

* metrics: candidates seconded

* metrics: ok I gave up

* metrics: provisioner

* metrics: remove a clone by requiring Metrics: Sync

* metrics: YAGNI

* metrics: remove another TODO

* metrics: for later

* metrics: add parachain_ prefix

* metrics: s/signed_statement/signed_statements

* utils: add a comment for job metrics

* metrics: address review comments

* metrics: oops

* metrics: make sure to save files before commit 😅

* use _total suffix for requests metrics

Co-authored-by: Max Inden <mail@max-inden.de>

* metrics: add tests for overseer

* update Cargo.lock

* overseer: add a test for CollationGeneration

* collation-generation: impl metrics

* collation-generation: use kebab-case for name

* collation-generation: add a constructor

Co-authored-by: Gav Wood <gavin@parity.io>
Co-authored-by: Ashley Ruglys <ashley.ruglys@gmail.com>
Co-authored-by: Max Inden <mail@max-inden.de>
This commit is contained in:
Andronik Ordian
2020-08-18 11:18:54 +02:00
committed by GitHub
parent ae37a00c17
commit e7ead40255
20 changed files with 742 additions and 106 deletions
+54 -13
View File
@@ -30,6 +30,7 @@ use polkadot_node_subsystem::{
AllMessages, ChainApiMessage, ProvisionableData, ProvisionerInherentData,
ProvisionerMessage, RuntimeApiMessage,
},
metrics::{self, prometheus},
};
use polkadot_node_subsystem_util::{
self as util,
@@ -50,6 +51,7 @@ struct ProvisioningJob {
provisionable_data_channels: Vec<mpsc::Sender<ProvisionableData>>,
backed_candidates: Vec<BackedCandidate>,
signed_bitfields: Vec<SignedAvailabilityBitfield>,
metrics: Metrics,
}
/// This enum defines the messages that the provisioner is prepared to receive.
@@ -134,6 +136,7 @@ impl JobTrait for ProvisioningJob {
type FromJob = FromJob;
type Error = Error;
type RunArgs = ();
type Metrics = Metrics;
const NAME: &'static str = "ProvisioningJob";
@@ -143,11 +146,12 @@ impl JobTrait for ProvisioningJob {
fn run(
relay_parent: Hash,
_run_args: Self::RunArgs,
metrics: Self::Metrics,
receiver: mpsc::Receiver<ToJob>,
sender: mpsc::Sender<FromJob>,
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>> {
async move {
let job = ProvisioningJob::new(relay_parent, sender, receiver);
let job = ProvisioningJob::new(relay_parent, metrics, sender, receiver);
// it isn't necessary to break run_loop into its own function,
// but it's convenient to separate the concerns in this way
@@ -160,6 +164,7 @@ impl JobTrait for ProvisioningJob {
impl ProvisioningJob {
pub fn new(
relay_parent: Hash,
metrics: Metrics,
sender: mpsc::Sender<FromJob>,
receiver: mpsc::Receiver<ToJob>,
) -> Self {
@@ -170,6 +175,7 @@ impl ProvisioningJob {
provisionable_data_channels: Vec::new(),
backed_candidates: Vec::new(),
signed_bitfields: Vec::new(),
metrics,
}
}
@@ -190,7 +196,10 @@ impl ProvisioningJob {
)
.await
{
log::warn!(target: "provisioner", "failed to send inherent data: {:?}", err);
log::warn!(target: "provisioner", "failed to assemble or send inherent data: {:?}", err);
self.metrics.on_inherent_data_request(false);
} else {
self.metrics.on_inherent_data_request(true);
}
}
ToJob::Provisioner(RequestBlockAuthorshipData(_, sender)) => {
@@ -275,17 +284,9 @@ async fn send_inherent_data(
return_sender: oneshot::Sender<ProvisionerInherentData>,
mut from_job: mpsc::Sender<FromJob>,
) -> Result<(), Error> {
let availability_cores = match request_availability_cores(relay_parent, &mut from_job)
let availability_cores = request_availability_cores(relay_parent, &mut from_job)
.await?
.await?
{
Ok(cores) => cores,
Err(runtime_err) => {
// Don't take down the node on runtime API errors.
log::warn!(target: "provisioner", "Encountered a runtime API error: {:?}", runtime_err);
return Ok(());
}
};
.await??;
let bitfields = select_availability_bitfields(&availability_cores, bitfields);
let candidates = select_candidates(
@@ -467,7 +468,47 @@ fn bitfields_indicate_availability(
3 * availability.count_ones() >= 2 * availability.len()
}
delegated_subsystem!(ProvisioningJob(()) <- ToJob as ProvisioningSubsystem);
#[derive(Clone)]
struct MetricsInner {
inherent_data_requests: prometheus::CounterVec<prometheus::U64>,
}
/// Candidate backing metrics.
#[derive(Default, Clone)]
pub struct Metrics(Option<MetricsInner>);
impl Metrics {
fn on_inherent_data_request(&self, succeeded: bool) {
if let Some(metrics) = &self.0 {
if succeeded {
metrics.inherent_data_requests.with_label_values(&["succeded"]).inc();
} else {
metrics.inherent_data_requests.with_label_values(&["failed"]).inc();
}
}
}
}
impl metrics::Metrics for Metrics {
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
let metrics = MetricsInner {
inherent_data_requests: prometheus::register(
prometheus::CounterVec::new(
prometheus::Opts::new(
"parachain_inherent_data_requests_total",
"Number of InherentData requests served by provisioner.",
),
&["succeeded", "failed"],
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
}
delegated_subsystem!(ProvisioningJob((), Metrics) <- ToJob as ProvisioningSubsystem);
#[cfg(test)]
mod tests {