mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-11 15:21:08 +00:00
Jaeger spans for availability distribution (#2559)
* Logging functionality for spans. * Jaeger spans for availability distribution. * Fix instrumentation to use log target properly. * Add some tracing instrumentation macros. * Use int_tags instead of logs. * Add span per iteration. * Remove span::log functionality. * Fix instrumentation log target for real. * Add jaeger span to responding side as well. * Revert "Fix instrumentation log target for real." This reverts commit e1c2a2e6ff6f257e702f07d8a77c2668af92b0ef. * Revert "Fix instrumentation to use log target properly." This reverts commit 7caa0bd1acc6fe9727bb3a91851560d756c40ab8. * target -> subsystem in instrumentatio macro target is not correct either, and the correct way of using a top level target = LOG_TARGET does not work, as the macro expects a string literal and gets confused by the constant `LOG_TARGET`. * Use kebab-case for spa names. Co-authored-by: Andronik Ordian <write@reusable.software> Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
@@ -313,6 +313,14 @@ impl Span {
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an additional int tag to the span.
|
||||
pub fn add_int_tag(&mut self, tag: &str, value: i64) {
|
||||
match self {
|
||||
Self::Enabled(ref mut inner) => inner.add_int_tag(tag, value),
|
||||
Self::Disabled => {},
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the `FollowsFrom` relationship to this span with respect to the given one.
|
||||
pub fn add_follows_from(&mut self, other: &Self) {
|
||||
match (self, other) {
|
||||
|
||||
@@ -33,7 +33,7 @@ use polkadot_primitives::v1::{
|
||||
use polkadot_subsystem::messages::{
|
||||
AllMessages, AvailabilityStoreMessage, NetworkBridgeMessage,
|
||||
};
|
||||
use polkadot_subsystem::SubsystemContext;
|
||||
use polkadot_subsystem::{SubsystemContext, jaeger};
|
||||
|
||||
use crate::{
|
||||
error::{Error, Result},
|
||||
@@ -119,6 +119,9 @@ struct RunningTask {
|
||||
|
||||
/// Prometheues metrics for reporting results.
|
||||
metrics: Metrics,
|
||||
|
||||
/// Span tracking the fetching of this chunk.
|
||||
span: jaeger::Span,
|
||||
}
|
||||
|
||||
impl FetchTaskConfig {
|
||||
@@ -142,6 +145,9 @@ impl FetchTaskConfig {
|
||||
};
|
||||
}
|
||||
|
||||
let mut span = jaeger::candidate_hash_span(&core.candidate_hash, "availability-distribution");
|
||||
span.add_stage(jaeger::Stage::AvailabilityDistribution);
|
||||
|
||||
let prepared_running = RunningTask {
|
||||
session_index: session_info.session_index,
|
||||
group_index: core.group_responsible,
|
||||
@@ -156,6 +162,7 @@ impl FetchTaskConfig {
|
||||
relay_parent: core.candidate_descriptor.relay_parent,
|
||||
metrics,
|
||||
sender,
|
||||
span,
|
||||
};
|
||||
FetchTaskConfig {
|
||||
live_in,
|
||||
@@ -168,6 +175,7 @@ impl FetchTask {
|
||||
/// Start fetching a chunk.
|
||||
///
|
||||
/// A task handling the fetching of the configured chunk will be spawned.
|
||||
#[tracing::instrument(level = "trace", skip(config, ctx), fields(subsystem = LOG_TARGET))]
|
||||
pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
@@ -240,6 +248,7 @@ enum TaskError {
|
||||
}
|
||||
|
||||
impl RunningTask {
|
||||
#[tracing::instrument(level = "trace", skip(self, kill), fields(subsystem = LOG_TARGET))]
|
||||
async fn run(self, kill: oneshot::Receiver<()>) {
|
||||
// Wait for completion/or cancel.
|
||||
let run_it = self.run_inner();
|
||||
@@ -254,8 +263,13 @@ impl RunningTask {
|
||||
let mut bad_validators = Vec::new();
|
||||
let mut label = FAILED;
|
||||
let mut count: u32 = 0;
|
||||
let mut _span = self.span.child_builder("fetch-task")
|
||||
.with_chunk_index(self.request.index.0)
|
||||
.with_relay_parent(&self.relay_parent)
|
||||
.build();
|
||||
// Try validators in reverse order:
|
||||
while let Some(validator) = self.group.pop() {
|
||||
let _try_span = _span.child("try");
|
||||
// Report retries:
|
||||
if count > 0 {
|
||||
self.metrics.on_retry();
|
||||
@@ -302,8 +316,10 @@ impl RunningTask {
|
||||
// Ok, let's store it and be happy:
|
||||
self.store_chunk(chunk).await;
|
||||
label = SUCCEEDED;
|
||||
_span.add_string_tag("success", "true");
|
||||
break;
|
||||
}
|
||||
_span.add_int_tag("tries", count as _);
|
||||
self.metrics.on_fetch(label);
|
||||
self.conclude(bad_validators).await;
|
||||
}
|
||||
|
||||
@@ -291,6 +291,7 @@ fn get_test_running_task() -> (RunningTask, mpsc::Receiver<FromFetchTask>) {
|
||||
relay_parent: Hash::repeat_byte(71),
|
||||
sender: tx,
|
||||
metrics: Metrics::new_dummy(),
|
||||
span: jaeger::Span::Disabled,
|
||||
},
|
||||
rx
|
||||
)
|
||||
|
||||
@@ -74,6 +74,7 @@ impl Requester {
|
||||
///
|
||||
/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
|
||||
/// by advancing the stream.
|
||||
#[tracing::instrument(level = "trace", skip(keystore, metrics), fields(subsystem = LOG_TARGET))]
|
||||
pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
|
||||
// All we do is forwarding messages, no need to make this big.
|
||||
// Each sender will get one slot, see
|
||||
@@ -90,6 +91,7 @@ impl Requester {
|
||||
/// Update heads that need availability distribution.
|
||||
///
|
||||
/// For all active heads we will be fetching our chunks for availabilty distribution.
|
||||
#[tracing::instrument(level = "trace", skip(self, ctx, update), fields(subsystem = LOG_TARGET))]
|
||||
pub async fn update_fetching_heads<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
|
||||
@@ -22,7 +22,7 @@ use polkadot_node_network_protocol::request_response::{request::IncomingRequest,
|
||||
use polkadot_primitives::v1::{CandidateHash, ErasureChunk, ValidatorIndex};
|
||||
use polkadot_subsystem::{
|
||||
messages::{AllMessages, AvailabilityStoreMessage},
|
||||
SubsystemContext,
|
||||
SubsystemContext, jaeger,
|
||||
};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
@@ -64,6 +64,12 @@ pub async fn answer_request<Context>(
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let mut span = jaeger::candidate_hash_span(&req.payload.candidate_hash, "answer-request");
|
||||
span.add_stage(jaeger::Stage::AvailabilityDistribution);
|
||||
let _child_span = span.child_builder("answer-chunk-request")
|
||||
.with_chunk_index(req.payload.index.0)
|
||||
.build();
|
||||
|
||||
let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?;
|
||||
|
||||
let result = chunk.is_some();
|
||||
|
||||
@@ -116,6 +116,7 @@ impl SessionCache {
|
||||
///
|
||||
/// Use this function over any `fetch_session_info` if all you need is a reference to
|
||||
/// `SessionInfo`, as it avoids an expensive clone.
|
||||
#[tracing::instrument(level = "trace", skip(self, ctx, with_info), fields(subsystem = LOG_TARGET))]
|
||||
pub async fn with_session_info<Context, F, R>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
|
||||
Reference in New Issue
Block a user