mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 07:01:05 +00:00
initial prometheus metrics (#1536)
* service-new: cosmetic changes * overseer: draft of prometheus metrics * metrics: update active_leaves metrics * metrics: extract into functions * metrics: resolve XXX * metrics: it's ugly, but it works * Bump Substrate * metrics: move a bunch of code around * Bumb substrate again * metrics: fix a warning * fix a warning in runtime * metrics: statements signed * metrics: statements impl RegisterMetrics * metrics: refactor Metrics trait * metrics: add Metrics assoc type to JobTrait * metrics: move Metrics trait to util * metrics: fix overseer * metrics: fix backing * metrics: fix candidate validation * metrics: derive Default * metrics: docs * metrics: add stubs for other subsystems * metrics: add more stubs and fix compilation * metrics: fix doctest * metrics: move to subsystem * metrics: fix candidate validation * metrics: bitfield signing * metrics: av store * metrics: chain API * metrics: runtime API * metrics: stub for avad * metrics: candidates seconded * metrics: ok I gave up * metrics: provisioner * metrics: remove a clone by requiring Metrics: Sync * metrics: YAGNI * metrics: remove another TODO * metrics: for later * metrics: add parachain_ prefix * metrics: s/signed_statement/signed_statements * utils: add a comment for job metrics * metrics: address review comments * metrics: oops * metrics: make sure to save files before commit 😅 * use _total suffix for requests metrics Co-authored-by: Max Inden <mail@max-inden.de> * metrics: add tests for overseer * update Cargo.lock * overseer: add a test for CollationGeneration * collation-generation: impl metrics * collation-generation: use kebab-case for name * collation-generation: add a constructor Co-authored-by: Gav Wood <gavin@parity.io> Co-authored-by: Ashley Ruglys <ashley.ruglys@gmail.com> Co-authored-by: Max Inden <mail@max-inden.de>
This commit is contained in:
@@ -24,6 +24,7 @@ use polkadot_node_subsystem::{
|
||||
errors::{ChainApiError, RuntimeApiError},
|
||||
messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, RuntimeApiSender},
|
||||
FromOverseer, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, SubsystemResult,
|
||||
metrics,
|
||||
};
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
@@ -63,11 +64,13 @@ pub mod reexports {
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/// Duration a job will wait after sending a stop signal before hard-aborting.
|
||||
pub const JOB_GRACEFUL_STOP_DURATION: Duration = Duration::from_secs(1);
|
||||
/// Capacity of channels to and from individual jobs
|
||||
pub const JOB_CHANNEL_CAPACITY: usize = 64;
|
||||
|
||||
|
||||
/// Utility errors
|
||||
#[derive(Debug, derive_more::From)]
|
||||
pub enum Error {
|
||||
@@ -446,6 +449,12 @@ pub trait JobTrait: Unpin {
|
||||
///
|
||||
/// If no extra information is needed, it is perfectly acceptable to set it to `()`.
|
||||
type RunArgs: 'static + Send;
|
||||
/// Subsystem-specific Prometheus metrics.
|
||||
///
|
||||
/// Jobs spawned by one subsystem should share the same
|
||||
/// instance of metrics (use `.clone()`).
|
||||
/// The `delegate_subsystem!` macro should take care of this.
|
||||
type Metrics: 'static + metrics::Metrics + Send;
|
||||
|
||||
/// Name of the job, i.e. `CandidateBackingJob`
|
||||
const NAME: &'static str;
|
||||
@@ -454,6 +463,7 @@ pub trait JobTrait: Unpin {
|
||||
fn run(
|
||||
parent: Hash,
|
||||
run_args: Self::RunArgs,
|
||||
metrics: Self::Metrics,
|
||||
receiver: mpsc::Receiver<Self::ToJob>,
|
||||
sender: mpsc::Sender<Self::FromJob>,
|
||||
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>>;
|
||||
@@ -532,7 +542,7 @@ impl<Spawner: SpawnNamed, Job: 'static + JobTrait> Jobs<Spawner, Job> {
|
||||
}
|
||||
|
||||
/// Spawn a new job for this `parent_hash`, with whatever args are appropriate.
|
||||
fn spawn_job(&mut self, parent_hash: Hash, run_args: Job::RunArgs) -> Result<(), Error> {
|
||||
fn spawn_job(&mut self, parent_hash: Hash, run_args: Job::RunArgs, metrics: Job::Metrics) -> Result<(), Error> {
|
||||
let (to_job_tx, to_job_rx) = mpsc::channel(JOB_CHANNEL_CAPACITY);
|
||||
let (from_job_tx, from_job_rx) = mpsc::channel(JOB_CHANNEL_CAPACITY);
|
||||
let (finished_tx, finished) = oneshot::channel();
|
||||
@@ -541,7 +551,7 @@ impl<Spawner: SpawnNamed, Job: 'static + JobTrait> Jobs<Spawner, Job> {
|
||||
let err_tx = self.errors.clone();
|
||||
|
||||
let (future, abort_handle) = future::abortable(async move {
|
||||
if let Err(e) = Job::run(parent_hash, run_args, to_job_rx, from_job_tx).await {
|
||||
if let Err(e) = Job::run(parent_hash, run_args, metrics, to_job_rx, from_job_tx).await {
|
||||
log::error!(
|
||||
"{}({}) finished with an error {:?}",
|
||||
Job::NAME,
|
||||
@@ -648,6 +658,7 @@ where
|
||||
pub struct JobManager<Spawner, Context, Job: JobTrait> {
|
||||
spawner: Spawner,
|
||||
run_args: Job::RunArgs,
|
||||
metrics: Job::Metrics,
|
||||
context: std::marker::PhantomData<Context>,
|
||||
job: std::marker::PhantomData<Job>,
|
||||
errors: Option<mpsc::Sender<(Option<Hash>, JobsError<Job::Error>)>>,
|
||||
@@ -662,10 +673,11 @@ where
|
||||
Job::ToJob: TryFrom<AllMessages> + TryFrom<<Context as SubsystemContext>::Message> + Sync,
|
||||
{
|
||||
/// Creates a new `Subsystem`.
|
||||
pub fn new(spawner: Spawner, run_args: Job::RunArgs) -> Self {
|
||||
pub fn new(spawner: Spawner, run_args: Job::RunArgs, metrics: Job::Metrics) -> Self {
|
||||
Self {
|
||||
spawner,
|
||||
run_args,
|
||||
metrics,
|
||||
context: std::marker::PhantomData,
|
||||
job: std::marker::PhantomData,
|
||||
errors: None,
|
||||
@@ -703,6 +715,7 @@ where
|
||||
pub async fn run(
|
||||
mut ctx: Context,
|
||||
run_args: Job::RunArgs,
|
||||
metrics: Job::Metrics,
|
||||
spawner: Spawner,
|
||||
mut err_tx: Option<mpsc::Sender<(Option<Hash>, JobsError<Job::Error>)>>,
|
||||
) {
|
||||
@@ -714,7 +727,7 @@ where
|
||||
|
||||
loop {
|
||||
select! {
|
||||
incoming = ctx.recv().fuse() => if Self::handle_incoming(incoming, &mut jobs, &run_args, &mut err_tx).await { break },
|
||||
incoming = ctx.recv().fuse() => if Self::handle_incoming(incoming, &mut jobs, &run_args, &metrics, &mut err_tx).await { break },
|
||||
outgoing = jobs.next().fuse() => if Self::handle_outgoing(outgoing, &mut ctx, &mut err_tx).await { break },
|
||||
complete => break,
|
||||
}
|
||||
@@ -741,6 +754,7 @@ where
|
||||
incoming: SubsystemResult<FromOverseer<Context::Message>>,
|
||||
jobs: &mut Jobs<Spawner, Job>,
|
||||
run_args: &Job::RunArgs,
|
||||
metrics: &Job::Metrics,
|
||||
err_tx: &mut Option<mpsc::Sender<(Option<Hash>, JobsError<Job::Error>)>>,
|
||||
) -> bool {
|
||||
use polkadot_node_subsystem::ActiveLeavesUpdate;
|
||||
@@ -753,7 +767,8 @@ where
|
||||
deactivated,
|
||||
}))) => {
|
||||
for hash in activated {
|
||||
if let Err(e) = jobs.spawn_job(hash, run_args.clone()) {
|
||||
let metrics = metrics.clone();
|
||||
if let Err(e) = jobs.spawn_job(hash, run_args.clone(), metrics) {
|
||||
log::error!("Failed to spawn a job: {:?}", e);
|
||||
Self::fwd_err(Some(hash), e.into(), err_tx).await;
|
||||
return true;
|
||||
@@ -849,14 +864,18 @@ where
|
||||
Job: 'static + JobTrait + Send,
|
||||
Job::RunArgs: Clone + Sync,
|
||||
Job::ToJob: TryFrom<AllMessages> + Sync,
|
||||
Job::Metrics: Sync,
|
||||
{
|
||||
type Metrics = Job::Metrics;
|
||||
|
||||
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
||||
let spawner = self.spawner.clone();
|
||||
let run_args = self.run_args.clone();
|
||||
let metrics = self.metrics.clone();
|
||||
let errors = self.errors;
|
||||
|
||||
let future = Box::pin(async move {
|
||||
Self::run(ctx, run_args, spawner, errors).await;
|
||||
Self::run(ctx, run_args, metrics, spawner, errors).await;
|
||||
});
|
||||
|
||||
SpawnedSubsystem {
|
||||
@@ -901,11 +920,11 @@ where
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! delegated_subsystem {
|
||||
($job:ident($run_args:ty) <- $to_job:ty as $subsystem:ident) => {
|
||||
delegated_subsystem!($job($run_args) <- $to_job as $subsystem; stringify!($subsystem));
|
||||
($job:ident($run_args:ty, $metrics:ty) <- $to_job:ty as $subsystem:ident) => {
|
||||
delegated_subsystem!($job($run_args, $metrics) <- $to_job as $subsystem; stringify!($subsystem));
|
||||
};
|
||||
|
||||
($job:ident($run_args:ty) <- $to_job:ty as $subsystem:ident; $subsystem_name:expr) => {
|
||||
($job:ident($run_args:ty, $metrics:ty) <- $to_job:ty as $subsystem:ident; $subsystem_name:expr) => {
|
||||
#[doc = "Manager type for the "]
|
||||
#[doc = $subsystem_name]
|
||||
type Manager<Spawner, Context> = $crate::JobManager<Spawner, Context, $job>;
|
||||
@@ -924,15 +943,15 @@ macro_rules! delegated_subsystem {
|
||||
{
|
||||
#[doc = "Creates a new "]
|
||||
#[doc = $subsystem_name]
|
||||
pub fn new(spawner: Spawner, run_args: $run_args) -> Self {
|
||||
pub fn new(spawner: Spawner, run_args: $run_args, metrics: $metrics) -> Self {
|
||||
$subsystem {
|
||||
manager: $crate::JobManager::new(spawner, run_args)
|
||||
manager: $crate::JobManager::new(spawner, run_args, metrics)
|
||||
}
|
||||
}
|
||||
|
||||
/// Run this subsystem
|
||||
pub async fn run(ctx: Context, run_args: $run_args, spawner: Spawner) {
|
||||
<Manager<Spawner, Context>>::run(ctx, run_args, spawner, None).await
|
||||
pub async fn run(ctx: Context, run_args: $run_args, metrics: $metrics, spawner: Spawner) {
|
||||
<Manager<Spawner, Context>>::run(ctx, run_args, metrics, spawner, None).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -942,6 +961,8 @@ macro_rules! delegated_subsystem {
|
||||
Context: $crate::reexports::SubsystemContext,
|
||||
<Context as $crate::reexports::SubsystemContext>::Message: Into<$to_job>,
|
||||
{
|
||||
type Metrics = $metrics;
|
||||
|
||||
fn start(self, ctx: Context) -> $crate::reexports::SpawnedSubsystem {
|
||||
self.manager.start(ctx)
|
||||
}
|
||||
@@ -1061,6 +1082,7 @@ mod tests {
|
||||
// RunArgs get cloned so that each job gets its own owned copy. If you need that, wrap it in
|
||||
// an Arc. Within a testing context, that efficiency is less important.
|
||||
type RunArgs = HashMap<Hash, Vec<FromJob>>;
|
||||
type Metrics = ();
|
||||
|
||||
const NAME: &'static str = "FakeCandidateSelectionJob";
|
||||
|
||||
@@ -1070,6 +1092,7 @@ mod tests {
|
||||
fn run(
|
||||
parent: Hash,
|
||||
mut run_args: Self::RunArgs,
|
||||
_metrics: Self::Metrics,
|
||||
receiver: mpsc::Receiver<ToJob>,
|
||||
mut sender: mpsc::Sender<FromJob>,
|
||||
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>> {
|
||||
@@ -1121,7 +1144,7 @@ mod tests {
|
||||
let (context, overseer_handle) = make_subsystem_context(pool.clone());
|
||||
let (err_tx, err_rx) = mpsc::channel(16);
|
||||
|
||||
let subsystem = FakeCandidateSelectionSubsystem::run(context, run_args, pool, Some(err_tx));
|
||||
let subsystem = FakeCandidateSelectionSubsystem::run(context, run_args, (), pool, Some(err_tx));
|
||||
let test_future = test(overseer_handle, err_rx);
|
||||
let timeout = Delay::new(Duration::from_secs(2));
|
||||
|
||||
@@ -1196,7 +1219,7 @@ mod tests {
|
||||
let (context, _) = make_subsystem_context::<CandidateSelectionMessage, _>(pool.clone());
|
||||
|
||||
let SpawnedSubsystem { name, .. } =
|
||||
FakeCandidateSelectionSubsystem::new(pool, HashMap::new()).start(context);
|
||||
FakeCandidateSelectionSubsystem::new(pool, HashMap::new(), ()).start(context);
|
||||
assert_eq!(name, "FakeCandidateSelection");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user