Add Prometheus timers to the subsystems (#1923)

* reexport prometheus-super for ease of use of other subsystems

* add some prometheus timers for collation generation subsystem

* add timing metrics to av-store

* add metrics to candidate backing

* add timing metric to bitfield signing

* add timing metrics to candidate selection

* add timing metrics to candidate-validation

* add timing metrics to chain-api

* add timing metrics to provisioner

* add timing metrics to runtime-api

* add timing metrics to availability-distribution

* add timing metrics to bitfield-distribution

* add timing metrics to collator protocol: collator side

* add timing metrics to collator protocol: validator side

* fix candidate validation test failures

* add timing metrics to pov distribution

* add timing metrics to statement-distribution

* use substrate_prometheus_endpoint prometheus reexport instead of prometheus_super

* don't include JOB_DELAY in bitfield-signing metrics

* give adder-collator ability to easily export its genesis-state and validation code

* wip: adder-collator pushbutton script

* don't attempt to register the adder-collator automatically

Instead, get these values with

```sh
target/release/adder-collator export-genesis-state
target/release/adder-collator export-genesis-wasm
```

And then register the parachain on https://polkadot.js.org/apps/?rpc=ws%3A%2F%2F127.0.0.1%3A9944#/explorer

To collect prometheus data, after running the script, create `prometheus.yml` per the instructions
at https://www.notion.so/paritytechnologies/Setting-up-Prometheus-locally-835cb3a9df7541a781c381006252b5ff
and then run:

```sh
docker run -v `pwd`/prometheus.yml:/etc/prometheus/prometheus.yml:z --network host prom/prometheus
```

Demonstrates that data makes it across to prometheus, though it is likely to be useful in the future
to tweak the buckets.

* Update parachain/test-parachains/adder/collator/src/cli.rs

Co-authored-by: Andronik Ordian <write@reusable.software>

* use the grandpa-pause parameter

* skip metrics in tracing instrumentation

* remove unnecessary grandpa_pause cli param

Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Peter Goodspeed-Niklaus
2020-11-20 15:04:51 +01:00
committed by GitHub
parent e49989971d
commit 0a5bc82529
23 changed files with 1199 additions and 87 deletions
@@ -103,12 +103,15 @@ async fn run(
pov,
response_sender,
) => {
let _timer = metrics.time_validate_from_chain_state();
let res = spawn_validate_from_chain_state(
&mut ctx,
isolation_strategy.clone(),
descriptor,
pov,
spawn.clone(),
&metrics,
).await;
match res {
@@ -126,6 +129,8 @@ async fn run(
pov,
response_sender,
) => {
let _timer = metrics.time_validate_from_exhaustive();
let res = spawn_validate_exhaustive(
&mut ctx,
isolation_strategy.clone(),
@@ -134,6 +139,7 @@ async fn run(
descriptor,
pov,
spawn.clone(),
&metrics,
).await;
match res {
@@ -260,13 +266,14 @@ async fn find_assumed_validation_data(
Ok(AssumptionCheckOutcome::DoesNotMatch)
}
#[tracing::instrument(level = "trace", skip(ctx, pov, spawn), fields(subsystem = LOG_TARGET))]
#[tracing::instrument(level = "trace", skip(ctx, pov, spawn, metrics), fields(subsystem = LOG_TARGET))]
async fn spawn_validate_from_chain_state(
ctx: &mut impl SubsystemContext<Message = CandidateValidationMessage>,
isolation_strategy: IsolationStrategy,
descriptor: CandidateDescriptor,
pov: Arc<PoV>,
spawn: impl SpawnNamed + 'static,
metrics: &Metrics,
) -> SubsystemResult<Result<ValidationResult, ValidationFailed>> {
let (validation_data, validation_code) =
match find_assumed_validation_data(ctx, &descriptor).await? {
@@ -292,6 +299,7 @@ async fn spawn_validate_from_chain_state(
descriptor.clone(),
pov,
spawn,
metrics,
)
.await;
@@ -320,7 +328,7 @@ async fn spawn_validate_from_chain_state(
validation_result
}
#[tracing::instrument(level = "trace", skip(ctx, validation_code, pov, spawn), fields(subsystem = LOG_TARGET))]
#[tracing::instrument(level = "trace", skip(ctx, validation_code, pov, spawn, metrics), fields(subsystem = LOG_TARGET))]
async fn spawn_validate_exhaustive(
ctx: &mut impl SubsystemContext<Message = CandidateValidationMessage>,
isolation_strategy: IsolationStrategy,
@@ -329,8 +337,10 @@ async fn spawn_validate_exhaustive(
descriptor: CandidateDescriptor,
pov: Arc<PoV>,
spawn: impl SpawnNamed + 'static,
metrics: &Metrics,
) -> SubsystemResult<Result<ValidationResult, ValidationFailed>> {
let (tx, rx) = oneshot::channel();
let metrics = metrics.clone();
let fut = async move {
let res = validate_candidate_exhaustive::<RealValidationBackend, _>(
isolation_strategy,
@@ -339,6 +349,7 @@ async fn spawn_validate_exhaustive(
descriptor,
pov,
spawn,
&metrics,
);
let _ = tx.send(res);
@@ -408,7 +419,7 @@ impl ValidationBackend for RealValidationBackend {
/// Validates the candidate from exhaustive parameters.
///
/// Sends the result of validation on the channel once complete.
#[tracing::instrument(level = "trace", skip(backend_arg, validation_code, pov, spawn), fields(subsystem = LOG_TARGET))]
#[tracing::instrument(level = "trace", skip(backend_arg, validation_code, pov, spawn, metrics), fields(subsystem = LOG_TARGET))]
fn validate_candidate_exhaustive<B: ValidationBackend, S: SpawnNamed + 'static>(
backend_arg: B::Arg,
persisted_validation_data: PersistedValidationData,
@@ -416,7 +427,10 @@ fn validate_candidate_exhaustive<B: ValidationBackend, S: SpawnNamed + 'static>(
descriptor: CandidateDescriptor,
pov: Arc<PoV>,
spawn: S,
metrics: &Metrics,
) -> Result<ValidationResult, ValidationFailed> {
let _timer = metrics.time_validate_candidate_exhaustive();
if let Err(e) = perform_basic_checks(&descriptor, persisted_validation_data.max_pov_size, &*pov) {
return Ok(ValidationResult::Invalid(e))
}
@@ -460,6 +474,9 @@ fn validate_candidate_exhaustive<B: ValidationBackend, S: SpawnNamed + 'static>(
#[derive(Clone)]
struct MetricsInner {
validation_requests: prometheus::CounterVec<prometheus::U64>,
validate_from_chain_state: prometheus::Histogram,
validate_from_exhaustive: prometheus::Histogram,
validate_candidate_exhaustive: prometheus::Histogram,
}
/// Candidate validation metrics.
@@ -482,6 +499,21 @@ impl Metrics {
}
}
}
/// Provide a timer for `validate_from_chain_state` which observes on drop.
fn time_validate_from_chain_state(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.validate_from_chain_state.start_timer())
}
/// Provide a timer for `validate_from_exhaustive` which observes on drop.
fn time_validate_from_exhaustive(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.validate_from_exhaustive.start_timer())
}
/// Provide a timer for `validate_candidate_exhaustive` which observes on drop.
fn time_validate_candidate_exhaustive(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.validate_candidate_exhaustive.start_timer())
}
}
impl metrics::Metrics for Metrics {
@@ -497,6 +529,33 @@ impl metrics::Metrics for Metrics {
)?,
registry,
)?,
validate_from_chain_state: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"parachain_candidate_validation_validate_from_chain_state",
"Time spent within `candidate_validation::validate_from_chain_state`",
)
)?,
registry,
)?,
validate_from_exhaustive: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"parachain_candidate_validation_validate_from_exhaustive",
"Time spent within `candidate_validation::validate_from_exhaustive`",
)
)?,
registry,
)?,
validate_candidate_exhaustive: prometheus::register(
prometheus::Histogram::with_opts(
prometheus::HistogramOpts::new(
"parachain_candidate_validation_validate_candidate_exhaustive",
"Time spent within `candidate_validation::validate_candidate_exhaustive`",
)
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
@@ -850,6 +909,7 @@ mod tests {
descriptor,
Arc::new(pov),
TaskExecutor::new(),
&Default::default(),
).unwrap();
assert_matches!(v, ValidationResult::Valid(outputs, used_validation_data) => {
@@ -885,6 +945,7 @@ mod tests {
descriptor,
Arc::new(pov),
TaskExecutor::new(),
&Default::default(),
).unwrap();
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::BadReturn));
@@ -913,6 +974,7 @@ mod tests {
descriptor,
Arc::new(pov),
TaskExecutor::new(),
&Default::default(),
);
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)));