mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-10 01:47:58 +00:00
Fix telemetry span not entering properly attempt 3 (#8043)
* Fix tracing tests (#8022) * Fix tracing tests The tests were not working properly. 1. Some test was setting a global subscriber, this could lead to racy conditions with other tests. 2. A logging test called `process::exit` which is completly wrong. * Update client/tracing/src/lib.rs Co-authored-by: David <dvdplm@gmail.com> * Review comments Co-authored-by: David <dvdplm@gmail.com> * Fix tracing spans are not being forwarded to spawned task (#8009) * Fix tracing spans are not being forwarded to spawned task There is a bug that tracing spans are not forwarded to spawned task. The problem was that only the telemetry span was forwarded. The solution to this is to use the tracing provided `in_current_span` to capture the current active span and pass the telemetry span explictely. We will now always enter the span when the future is polled. This is essentially the same strategy as tracing is doing with its `Instrumented`, but now extended for our use case with having multiple spans active. * More tests * Proper test for telemetry and prefix span * WIP * Fix test (need to create & enter the span at the same time) * WIP * Remove telemtry_span from sc_service config * CLEANUP * Update comment * Incorrect indent * More meaningful name * Dedent * Naming XD * Attempt to make a more complete test * lint * Missing licenses * Remove user data * CLEANUP * Apply suggestions from code review Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * CLEANUP * Apply suggestion * Update bin/node/cli/tests/telemetry.rs Co-authored-by: David <dvdplm@gmail.com> * Wrapping lines Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> Co-authored-by: David <dvdplm@gmail.com>
This commit is contained in:
@@ -24,7 +24,7 @@ use log::{debug, error};
|
||||
use futures::{
|
||||
Future, FutureExt, StreamExt,
|
||||
future::{select, Either, BoxFuture, join_all, try_join_all, pending},
|
||||
sink::SinkExt, task::{Context, Poll},
|
||||
sink::SinkExt,
|
||||
};
|
||||
use prometheus_endpoint::{
|
||||
exponential_buckets, register,
|
||||
@@ -34,43 +34,11 @@ use prometheus_endpoint::{
|
||||
use sp_utils::mpsc::{TracingUnboundedSender, TracingUnboundedReceiver, tracing_unbounded};
|
||||
use tracing_futures::Instrument;
|
||||
use crate::{config::{TaskExecutor, TaskType, JoinFuture}, Error};
|
||||
use sc_telemetry::TelemetrySpan;
|
||||
|
||||
mod prometheus_future;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// A wrapper around a `[Option<TelemetrySpan>]` and a [`Future`].
|
||||
///
|
||||
/// The telemetry in Substrate uses a span to identify the telemetry context. The span "infrastructure"
|
||||
/// is provided by the tracing-crate. Now it is possible to have your own spans as well. To support
|
||||
/// this with the [`TaskManager`] we have this wrapper. This wrapper enters the telemetry span every
|
||||
/// time the future is polled and polls the inner future. So, the inner future can still have its
|
||||
/// own span attached and we get our telemetry span ;)
|
||||
struct WithTelemetrySpan<T> {
|
||||
span: Option<TelemetrySpan>,
|
||||
inner: T,
|
||||
}
|
||||
|
||||
impl<T> WithTelemetrySpan<T> {
|
||||
fn new(span: Option<TelemetrySpan>, inner: T) -> Self {
|
||||
Self {
|
||||
span,
|
||||
inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Future<Output = ()> + Unpin> Future for WithTelemetrySpan<T> {
|
||||
type Output = ();
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, ctx: &mut Context) -> Poll<Self::Output> {
|
||||
let span = self.span.clone();
|
||||
let _enter = span.as_ref().map(|s| s.enter());
|
||||
Pin::new(&mut self.inner).poll(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
/// An handle for spawning tasks in the service.
|
||||
#[derive(Clone)]
|
||||
pub struct SpawnTaskHandle {
|
||||
@@ -78,7 +46,6 @@ pub struct SpawnTaskHandle {
|
||||
executor: TaskExecutor,
|
||||
metrics: Option<Metrics>,
|
||||
task_notifier: TracingUnboundedSender<JoinFuture>,
|
||||
telemetry_span: Option<TelemetrySpan>,
|
||||
}
|
||||
|
||||
impl SpawnTaskHandle {
|
||||
@@ -155,11 +122,7 @@ impl SpawnTaskHandle {
|
||||
}
|
||||
};
|
||||
|
||||
let future = future.in_current_span().boxed();
|
||||
let join_handle = self.executor.spawn(
|
||||
WithTelemetrySpan::new(self.telemetry_span.clone(), future).boxed(),
|
||||
task_type,
|
||||
);
|
||||
let join_handle = self.executor.spawn(future.in_current_span().boxed(), task_type);
|
||||
|
||||
let mut task_notifier = self.task_notifier.clone();
|
||||
self.executor.spawn(
|
||||
@@ -266,8 +229,6 @@ pub struct TaskManager {
|
||||
/// terminates and gracefully shutdown. Also ends the parent `future()` if a child's essential
|
||||
/// task fails.
|
||||
children: Vec<TaskManager>,
|
||||
/// A `TelemetrySpan` used to enter the telemetry span when a task is spawned.
|
||||
telemetry_span: Option<TelemetrySpan>,
|
||||
}
|
||||
|
||||
impl TaskManager {
|
||||
@@ -276,7 +237,6 @@ impl TaskManager {
|
||||
pub(super) fn new(
|
||||
executor: TaskExecutor,
|
||||
prometheus_registry: Option<&Registry>,
|
||||
telemetry_span: Option<TelemetrySpan>,
|
||||
) -> Result<Self, PrometheusError> {
|
||||
let (signal, on_exit) = exit_future::signal();
|
||||
|
||||
@@ -305,7 +265,6 @@ impl TaskManager {
|
||||
task_notifier,
|
||||
completion_future,
|
||||
children: Vec::new(),
|
||||
telemetry_span,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -316,7 +275,6 @@ impl TaskManager {
|
||||
executor: self.executor.clone(),
|
||||
metrics: self.metrics.clone(),
|
||||
task_notifier: self.task_notifier.clone(),
|
||||
telemetry_span: self.telemetry_span.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,10 +20,14 @@ use crate::config::TaskExecutor;
|
||||
use crate::task_manager::TaskManager;
|
||||
use futures::{future::FutureExt, pin_mut, select};
|
||||
use parking_lot::Mutex;
|
||||
use std::{any::Any, sync::Arc, time::Duration};
|
||||
use tracing_subscriber::{layer::{SubscriberExt, Context}, Layer};
|
||||
use tracing::{subscriber::Subscriber, span::{Attributes, Id, Record, Span}, event::Event};
|
||||
use sc_telemetry::TelemetrySpan;
|
||||
use std::{any::Any, env, sync::Arc, time::Duration};
|
||||
use tracing::{event::Event, span::Id, subscriber::Subscriber};
|
||||
use tracing_subscriber::{
|
||||
layer::{Context, SubscriberExt},
|
||||
registry::LookupSpan,
|
||||
Layer,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct DropTester(Arc<Mutex<usize>>);
|
||||
@@ -83,7 +87,7 @@ async fn run_background_task_blocking(duration: Duration, _keep_alive: impl Any)
|
||||
}
|
||||
|
||||
fn new_task_manager(task_executor: TaskExecutor) -> TaskManager {
|
||||
TaskManager::new(task_executor, None, None).unwrap()
|
||||
TaskManager::new(task_executor, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -315,92 +319,92 @@ fn ensure_task_manager_future_continues_when_childs_not_essential_task_fails() {
|
||||
}
|
||||
|
||||
struct TestLayer {
|
||||
spans_entered: Arc<Mutex<Vec<String>>>,
|
||||
spans: Arc<Mutex<std::collections::HashMap<Id, String>>>,
|
||||
spans_found: Arc<Mutex<Option<Vec<Id>>>>,
|
||||
}
|
||||
|
||||
impl<S: Subscriber> Layer<S> for TestLayer {
|
||||
fn new_span(&self, attrs: &Attributes<'_>, id: &Id, _ctx: Context<S>) {
|
||||
self.spans.lock().insert(id.clone(), attrs.metadata().name().to_string());
|
||||
impl<S> Layer<S> for TestLayer
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
fn on_event(&self, _: &Event<'_>, ctx: Context<S>) {
|
||||
let mut spans_found = self.spans_found.lock();
|
||||
|
||||
if spans_found.is_some() {
|
||||
panic!("on_event called multiple times");
|
||||
}
|
||||
|
||||
*spans_found = Some(ctx.scope().map(|x| x.id()).collect());
|
||||
}
|
||||
|
||||
fn on_record(&self, _: &Id, _: &Record<'_>, _: Context<S>) {}
|
||||
|
||||
fn on_event(&self, _: &Event<'_>, _: Context<S>) {}
|
||||
|
||||
fn on_enter(&self, span: &Id, _: Context<S>) {
|
||||
let name = self.spans.lock().get(span).unwrap().clone();
|
||||
self.spans_entered.lock().push(name);
|
||||
}
|
||||
|
||||
fn on_exit(&self, _: &Id, _: Context<S>) {}
|
||||
|
||||
fn on_close(&self, _: Id, _: Context<S>) {}
|
||||
}
|
||||
|
||||
type TestSubscriber = tracing_subscriber::layer::Layered<
|
||||
TestLayer,
|
||||
tracing_subscriber::fmt::Subscriber
|
||||
>;
|
||||
|
||||
fn setup_subscriber() -> (
|
||||
TestSubscriber,
|
||||
Arc<Mutex<Vec<String>>>,
|
||||
impl Subscriber + for<'a> LookupSpan<'a>,
|
||||
Arc<Mutex<Option<Vec<Id>>>>,
|
||||
) {
|
||||
let spans_entered = Arc::new(Mutex::new(Default::default()));
|
||||
let spans_found = Arc::new(Mutex::new(Default::default()));
|
||||
let layer = TestLayer {
|
||||
spans: Arc::new(Mutex::new(Default::default())),
|
||||
spans_entered: spans_entered.clone(),
|
||||
spans_found: spans_found.clone(),
|
||||
};
|
||||
let subscriber = tracing_subscriber::fmt().finish().with(layer);
|
||||
(subscriber, spans_entered)
|
||||
(subscriber, spans_found)
|
||||
}
|
||||
|
||||
/// This is not an actual test, it is used by the `telemetry_span_is_forwarded_to_task` test.
|
||||
/// The given test will call the test executable and only execute this one test that
|
||||
/// test that the telemetry span and the prefix span are forwarded correctly. This needs to be done
|
||||
/// in a separate process to avoid interfering with the other tests.
|
||||
#[test]
|
||||
fn subprocess_telemetry_span_is_forwarded_to_task() {
|
||||
if env::var("SUBPROCESS_TEST").is_err() {
|
||||
return;
|
||||
}
|
||||
|
||||
let (subscriber, spans_found) = setup_subscriber();
|
||||
tracing_log::LogTracer::init().unwrap();
|
||||
let _sub_guard = tracing::subscriber::set_global_default(subscriber);
|
||||
|
||||
let mut runtime = tokio::runtime::Runtime::new().unwrap();
|
||||
|
||||
let prefix_span = tracing::info_span!("prefix");
|
||||
let _enter_prefix_span = prefix_span.enter();
|
||||
|
||||
let telemetry_span = TelemetrySpan::new();
|
||||
let _enter_telemetry_span = telemetry_span.enter();
|
||||
|
||||
let handle = runtime.handle().clone();
|
||||
let task_executor = TaskExecutor::from(move |fut, _| handle.spawn(fut).map(|_| ()));
|
||||
let task_manager = new_task_manager(task_executor);
|
||||
|
||||
let (sender, receiver) = futures::channel::oneshot::channel();
|
||||
|
||||
task_manager.spawn_handle().spawn(
|
||||
"log-something",
|
||||
async move {
|
||||
log::info!("boo!");
|
||||
sender.send(()).unwrap();
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
|
||||
runtime.block_on(receiver).unwrap();
|
||||
runtime.block_on(task_manager.clean_shutdown());
|
||||
|
||||
let spans = spans_found.lock().take().unwrap();
|
||||
assert_eq!(2, spans.len());
|
||||
|
||||
assert_eq!(spans[0], prefix_span.id().unwrap());
|
||||
assert_eq!(spans[1], telemetry_span.span().id().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_span_is_forwarded_to_task() {
|
||||
let (subscriber, spans_entered) = setup_subscriber();
|
||||
let _sub_guard = tracing::subscriber::set_global_default(subscriber);
|
||||
|
||||
let telemetry_span = TelemetrySpan::new();
|
||||
|
||||
let span = tracing::info_span!("test");
|
||||
let _enter = span.enter();
|
||||
|
||||
let mut runtime = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = runtime.handle().clone();
|
||||
let task_executor = TaskExecutor::from(move |fut, _| handle.spawn(fut).map(|_| ()));
|
||||
let task_manager = TaskManager::new(task_executor, None, Some(telemetry_span.clone())).unwrap();
|
||||
|
||||
let (sender, receiver) = futures::channel::oneshot::channel();
|
||||
let spawn_handle = task_manager.spawn_handle();
|
||||
|
||||
let span = span.clone();
|
||||
task_manager.spawn_handle().spawn(
|
||||
"test",
|
||||
async move {
|
||||
assert_eq!(span, Span::current());
|
||||
spawn_handle.spawn("test-nested", async move {
|
||||
assert_eq!(span, Span::current());
|
||||
sender.send(()).unwrap();
|
||||
}.boxed());
|
||||
}.boxed(),
|
||||
);
|
||||
|
||||
// We need to leave exit the span here. If tokio is not running with multithreading, this
|
||||
// would lead to duplicate spans being "active" and forwarding the wrong one.
|
||||
drop(_enter);
|
||||
runtime.block_on(receiver).unwrap();
|
||||
runtime.block_on(task_manager.clean_shutdown());
|
||||
drop(runtime);
|
||||
|
||||
let spans = spans_entered.lock();
|
||||
// We entered the telemetry span and the "test" in the future, the nested future and
|
||||
// the "test" span outside of the future. So, we should have recorded 3 spans.
|
||||
assert_eq!(5, spans.len());
|
||||
|
||||
assert_eq!(spans[0], "test");
|
||||
assert_eq!(spans[1], telemetry_span.span().metadata().unwrap().name());
|
||||
assert_eq!(spans[2], "test");
|
||||
assert_eq!(spans[3], telemetry_span.span().metadata().unwrap().name());
|
||||
assert_eq!(spans[4], "test");
|
||||
let executable = env::current_exe().unwrap();
|
||||
let output = std::process::Command::new(executable)
|
||||
.env("SUBPROCESS_TEST", "1")
|
||||
.args(&["--nocapture", "subprocess_telemetry_span_is_forwarded_to_task"])
|
||||
.output()
|
||||
.unwrap();
|
||||
println!("{}", String::from_utf8(output.stdout).unwrap());
|
||||
eprintln!("{}", String::from_utf8(output.stderr).unwrap());
|
||||
assert!(output.status.success());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user