Add diagnostics to tasks (#4752)

This commit is contained in:
Pierre Krieger
2020-01-29 11:46:39 +01:00
committed by GitHub
parent 34c1c4b954
commit b452867eb7
10 changed files with 117 additions and 52 deletions
+20 -1
View File
@@ -1513,6 +1513,21 @@ dependencies = [
"num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "futures-diagnose"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"pin-project 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "futures-executor"
version = "0.3.1"
@@ -5835,6 +5850,7 @@ dependencies = [
"exit-future 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-diagnose 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-timer 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"grafana-data-source 0.8.0",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -5969,6 +5985,7 @@ version = "2.0.0"
dependencies = [
"derive_more 0.99.2 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-diagnose 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"parity-scale-codec 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -6415,6 +6432,7 @@ version = "0.8.0"
dependencies = [
"derive_more 0.99.2 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-diagnose 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-timer 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libp2p 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -7700,7 +7718,7 @@ name = "twox-hash"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -8528,6 +8546,7 @@ dependencies = [
"checksum futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "79564c427afefab1dfb3298535b21eda083ef7935b4f0ecbfcb121f0aec10866"
"checksum futures-core-preview 0.3.0-alpha.19 (registry+https://github.com/rust-lang/crates.io-index)" = "b35b6263fb1ef523c3056565fa67b1d16f0a8604ff12b11b08c25f28a734c60a"
"checksum futures-cpupool 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "ab90cde24b3319636588d0c35fe03b1333857621051837ed769faefb4c2162e4"
"checksum futures-diagnose 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ebbb8371dd6ee87aa2aeaa8458a372fd82fe216032387b766255754c92dd7271"
"checksum futures-executor 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1e274736563f686a837a0568b478bdabfeaec2dca794b5649b04e2fe1627c231"
"checksum futures-io 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e676577d229e70952ab25f3945795ba5b16d63ca794ca9d2c860e5595d20b5ff"
"checksum futures-macro 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "52e7c56c15537adb4f76d0b7a76ad131cb4d2f4f32d3b0bcabcbe1c7c5e87764"
+3 -3
View File
@@ -131,7 +131,7 @@ pub fn new_full<C: Send + Default + 'static>(config: Configuration<C, GenesisCon
// the AURA authoring task is considered essential, i.e. if it
// fails we take down the service with it.
service.spawn_essential_task(aura);
service.spawn_essential_task("aura", aura);
}
// if the node isn't actively participating in consensus then it doesn't
@@ -155,7 +155,7 @@ pub fn new_full<C: Send + Default + 'static>(config: Configuration<C, GenesisCon
match (is_authority, disable_grandpa) {
(false, false) => {
// start the lightweight GRANDPA observer
service.spawn_task(grandpa::run_grandpa_observer(
service.spawn_task("grandpa-observer", grandpa::run_grandpa_observer(
grandpa_config,
grandpa_link,
service.network(),
@@ -178,7 +178,7 @@ pub fn new_full<C: Send + Default + 'static>(config: Configuration<C, GenesisCon
// the GRANDPA voter task is considered infallible, i.e.
// if it fails we take down the service with it.
service.spawn_essential_task(grandpa::run_grandpa_voter(voter_config)?);
service.spawn_essential_task("grandpa", grandpa::run_grandpa_voter(voter_config)?);
},
(_, true) => {
grandpa::setup_disabled_grandpa(
+4 -3
View File
@@ -172,7 +172,7 @@ macro_rules! new_full {
};
let babe = sc_consensus_babe::start_babe(babe_config)?;
service.spawn_essential_task(babe);
service.spawn_essential_task("babe-proposer", babe);
let network = service.network();
let dht_event_stream = network.event_stream().filter_map(|e| async move { match e {
@@ -187,7 +187,7 @@ macro_rules! new_full {
dht_event_stream,
);
service.spawn_task(authority_discovery);
service.spawn_task("authority-discovery", authority_discovery);
}
// if the node isn't actively participating in consensus then it doesn't
@@ -211,7 +211,7 @@ macro_rules! new_full {
match (is_authority, disable_grandpa) {
(false, false) => {
// start the lightweight GRANDPA observer
service.spawn_task(grandpa::run_grandpa_observer(
service.spawn_task("grandpa-observer", grandpa::run_grandpa_observer(
config,
grandpa_link,
service.network(),
@@ -234,6 +234,7 @@ macro_rules! new_full {
// the GRANDPA voter task is considered infallible, i.e.
// if it fails we take down the service with it.
service.spawn_essential_task(
"grandpa-voter",
grandpa::run_grandpa_voter(grandpa_config)?
);
},
+1
View File
@@ -17,6 +17,7 @@ wasmtime = [
derive_more = "0.99.2"
futures01 = { package = "futures", version = "0.1.29" }
futures = "0.3.1"
futures-diagnose = "1.0"
parking_lot = "0.9.0"
lazy_static = "1.4.0"
log = "0.4.8"
+42 -20
View File
@@ -44,6 +44,7 @@ use sp_runtime::traits::{
use sp_api::ProvideRuntimeApi;
use sc_executor::{NativeExecutor, NativeExecutionDispatch};
use std::{
borrow::Cow,
io::{Read, Write, Seek},
marker::PhantomData, sync::Arc, time::SystemTime, pin::Pin
};
@@ -791,7 +792,7 @@ ServiceBuilder<
// List of asynchronous tasks to spawn. We collect them, then spawn them all at once.
let (to_spawn_tx, to_spawn_rx) =
mpsc::unbounded::<Pin<Box<dyn Future<Output = ()> + Send>>>();
mpsc::unbounded::<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>();
// A side-channel for essential tasks to communicate shutdown.
let (essential_failed_tx, essential_failed_rx) = mpsc::unbounded();
@@ -816,7 +817,7 @@ ServiceBuilder<
imports_external_transactions: !config.roles.is_light(),
pool: transaction_pool.clone(),
client: client.clone(),
executor: Arc::new(SpawnTaskHandle { sender: to_spawn_tx.clone(), on_exit: exit.clone() }),
executor: SpawnTaskHandle { sender: to_spawn_tx.clone(), on_exit: exit.clone() },
});
let protocol_id = {
@@ -840,7 +841,7 @@ ServiceBuilder<
executor: {
let to_spawn_tx = to_spawn_tx.clone();
Some(Box::new(move |fut| {
if let Err(e) = to_spawn_tx.unbounded_send(fut) {
if let Err(e) = to_spawn_tx.unbounded_send((fut, From::from("libp2p-node"))) {
error!("Failed to spawn libp2p background task: {:?}", e);
}
}))
@@ -891,7 +892,10 @@ ServiceBuilder<
&BlockId::hash(notification.hash),
&notification.retracted,
);
let _ = to_spawn_tx_.unbounded_send(Box::pin(future));
let _ = to_spawn_tx_.unbounded_send((
Box::pin(future),
From::from("txpool-maintain")
));
}
let offchain = offchain.as_ref().and_then(|o| o.upgrade());
@@ -901,12 +905,18 @@ ServiceBuilder<
network_state_info.clone(),
is_validator
);
let _ = to_spawn_tx_.unbounded_send(Box::pin(future));
let _ = to_spawn_tx_.unbounded_send((
Box::pin(future),
From::from("offchain-on-block")
));
}
ready(())
});
let _ = to_spawn_tx.unbounded_send(Box::pin(select(events, exit.clone()).map(drop)));
let _ = to_spawn_tx.unbounded_send((
Box::pin(select(events, exit.clone()).map(drop)),
From::from("txpool-and-offchain-notif")
));
}
{
@@ -926,7 +936,10 @@ ServiceBuilder<
ready(())
});
let _ = to_spawn_tx.unbounded_send(Box::pin(select(events, exit.clone()).map(drop)));
let _ = to_spawn_tx.unbounded_send((
Box::pin(select(events, exit.clone()).map(drop)),
From::from("telemetry-on-block")
));
}
// Periodically notify the telemetry.
@@ -990,7 +1003,10 @@ ServiceBuilder<
ready(())
});
let _ = to_spawn_tx.unbounded_send(Box::pin(select(tel_task, exit.clone()).map(drop)));
let _ = to_spawn_tx.unbounded_send((
Box::pin(select(tel_task, exit.clone()).map(drop)),
From::from("telemetry-periodic-send")
));
// Periodically send the network state to the telemetry.
let (netstat_tx, netstat_rx) = mpsc::unbounded::<(NetworkStatus<_>, NetworkState)>();
@@ -1003,7 +1019,10 @@ ServiceBuilder<
);
ready(())
});
let _ = to_spawn_tx.unbounded_send(Box::pin(select(tel_task_2, exit.clone()).map(drop)));
let _ = to_spawn_tx.unbounded_send((
Box::pin(select(tel_task_2, exit.clone()).map(drop)),
From::from("telemetry-periodic-network-state")
));
// RPC
let (system_rpc_tx, system_rpc_rx) = mpsc::unbounded();
@@ -1079,14 +1098,17 @@ ServiceBuilder<
let rpc = start_rpc_servers(&config, gen_handler)?;
let _ = to_spawn_tx.unbounded_send(Box::pin(select(build_network_future(
config.roles,
network_mut,
client.clone(),
network_status_sinks.clone(),
system_rpc_rx,
has_bootnodes,
), exit.clone()).map(drop)));
let _ = to_spawn_tx.unbounded_send((
Box::pin(select(build_network_future(
config.roles,
network_mut,
client.clone(),
network_status_sinks.clone(),
system_rpc_rx,
has_bootnodes,
), exit.clone()).map(drop)),
From::from("network-worker")
));
let telemetry_connection_sinks: Arc<Mutex<Vec<futures::channel::mpsc::UnboundedSender<()>>>> = Default::default();
@@ -1127,9 +1149,9 @@ ServiceBuilder<
});
ready(())
});
let _ = to_spawn_tx.unbounded_send(Box::pin(select(
let _ = to_spawn_tx.unbounded_send((Box::pin(select(
future, exit.clone()
).map(drop)));
).map(drop)), From::from("telemetry-worker")));
telemetry
});
@@ -1140,7 +1162,7 @@ ServiceBuilder<
exit.clone()
).map(drop);
let _ = to_spawn_tx.unbounded_send(Box::pin(future));
let _ = to_spawn_tx.unbounded_send((Box::pin(future), From::from("grafana-server")));
}
// Instrumentation
+42 -22
View File
@@ -27,7 +27,7 @@ pub mod error;
mod builder;
mod status_sinks;
use std::{io, pin::Pin};
use std::{borrow::Cow, io, pin::Pin};
use std::marker::PhantomData;
use std::net::SocketAddr;
use std::collections::HashMap;
@@ -42,7 +42,7 @@ use futures::{
future::select, channel::mpsc,
compat::*,
sink::SinkExt,
task::{Spawn, SpawnExt, FutureObj, SpawnError},
task::{Spawn, FutureObj, SpawnError},
};
use sc_network::{
NetworkService, NetworkState, specialization::NetworkSpecialization,
@@ -92,9 +92,9 @@ pub struct Service<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> {
/// A receiver for spawned essential-tasks concluding.
essential_failed_rx: mpsc::UnboundedReceiver<()>,
/// Sender for futures that must be spawned as background tasks.
to_spawn_tx: mpsc::UnboundedSender<Pin<Box<dyn Future<Output = ()> + Send>>>,
to_spawn_tx: mpsc::UnboundedSender<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
/// Receiver for futures that must be spawned as background tasks.
to_spawn_rx: mpsc::UnboundedReceiver<Pin<Box<dyn Future<Output = ()> + Send>>>,
to_spawn_rx: mpsc::UnboundedReceiver<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
/// How to spawn background tasks.
tasks_executor: Box<dyn Fn(Pin<Box<dyn Future<Output = ()> + Send>>) + Send>,
rpc_handlers: sc_rpc_server::RpcHandler<sc_rpc::Metadata>,
@@ -112,15 +112,29 @@ pub type TaskExecutor = Arc<dyn Spawn + Send + Sync>;
/// An handle for spawning tasks in the service.
#[derive(Clone)]
pub struct SpawnTaskHandle {
sender: mpsc::UnboundedSender<Pin<Box<dyn Future<Output = ()> + Send>>>,
sender: mpsc::UnboundedSender<(Pin<Box<dyn Future<Output = ()> + Send>>, Cow<'static, str>)>,
on_exit: exit_future::Exit,
}
impl SpawnTaskHandle {
/// Spawns the given task with the given name.
pub fn spawn(&self, name: impl Into<Cow<'static, str>>, task: impl Future<Output = ()> + Send + 'static) {
let on_exit = self.on_exit.clone();
let future = async move {
futures::pin_mut!(task);
let _ = select(on_exit, task).await;
};
if self.sender.unbounded_send((Box::pin(future), name.into())).is_err() {
error!("Failed to send task to spawn over channel");
}
}
}
impl Spawn for SpawnTaskHandle {
fn spawn_obj(&self, future: FutureObj<'static, ()>)
-> Result<(), SpawnError> {
let future = select(self.on_exit.clone(), future).map(drop);
self.sender.unbounded_send(Box::pin(future))
self.sender.unbounded_send((Box::pin(future), From::from("unnamed")))
.map_err(|_| SpawnError::shutdown())
}
}
@@ -129,7 +143,7 @@ type Boxed01Future01 = Box<dyn futures01::Future<Item = (), Error = ()> + Send +
impl futures01::future::Executor<Boxed01Future01> for SpawnTaskHandle {
fn execute(&self, future: Boxed01Future01) -> Result<(), futures01::future::ExecuteError<Boxed01Future01>>{
self.spawn(future.compat().map(drop));
self.spawn("unnamed", future.compat().map(drop));
Ok(())
}
}
@@ -159,12 +173,12 @@ pub trait AbstractService: 'static + Future<Output = Result<(), Error>> +
fn telemetry(&self) -> Option<sc_telemetry::Telemetry>;
/// Spawns a task in the background that runs the future passed as parameter.
fn spawn_task(&self, task: impl Future<Output = ()> + Send + Unpin + 'static);
fn spawn_task(&self, name: impl Into<Cow<'static, str>>, task: impl Future<Output = ()> + Send + 'static);
/// Spawns a task in the background that runs the future passed as
/// parameter. The given task is considered essential, i.e. if it errors we
/// trigger a service exit.
fn spawn_essential_task(&self, task: impl Future<Output = ()> + Send + Unpin + 'static);
fn spawn_essential_task(&self, name: impl Into<Cow<'static, str>>, task: impl Future<Output = ()> + Send + 'static);
/// Returns a handle for spawning tasks.
fn spawn_task_handle(&self) -> SpawnTaskHandle;
@@ -238,12 +252,16 @@ where
self.keystore.clone()
}
fn spawn_task(&self, task: impl Future<Output = ()> + Send + Unpin + 'static) {
let task = select(self.on_exit(), task).map(drop);
let _ = self.to_spawn_tx.unbounded_send(Box::pin(task));
fn spawn_task(&self, name: impl Into<Cow<'static, str>>, task: impl Future<Output = ()> + Send + 'static) {
let on_exit = self.on_exit();
let task = async move {
futures::pin_mut!(task);
let _ = select(on_exit, task).await;
};
let _ = self.to_spawn_tx.unbounded_send((Box::pin(task), name.into()));
}
fn spawn_essential_task(&self, task: impl Future<Output = ()> + Send + Unpin + 'static) {
fn spawn_essential_task(&self, name: impl Into<Cow<'static, str>>, task: impl Future<Output = ()> + Send + 'static) {
let mut essential_failed = self.essential_failed_tx.clone();
let essential_task = std::panic::AssertUnwindSafe(task)
.catch_unwind()
@@ -251,9 +269,13 @@ where
error!("Essential task failed. Shutting down service.");
let _ = essential_failed.send(());
});
let task = select(self.on_exit(), essential_task).map(drop);
let on_exit = self.on_exit();
let task = async move {
futures::pin_mut!(essential_task);
let _ = select(on_exit, essential_task).await;
};
let _ = self.to_spawn_tx.unbounded_send(Box::pin(task));
let _ = self.to_spawn_tx.unbounded_send((Box::pin(task), name.into()));
}
fn spawn_task_handle(&self) -> SpawnTaskHandle {
@@ -317,8 +339,8 @@ impl<TBl: Unpin, TCl, TSc: Unpin, TNetStatus, TNet, TTxPool, TOc> Future for
}
}
while let Poll::Ready(Some(task_to_spawn)) = Pin::new(&mut this.to_spawn_rx).poll_next(cx) {
(this.tasks_executor)(task_to_spawn);
while let Poll::Ready(Some((task_to_spawn, name))) = Pin::new(&mut this.to_spawn_rx).poll_next(cx) {
(this.tasks_executor)(Box::pin(futures_diagnose::diagnose(name, task_to_spawn)));
}
// The service future never ends.
@@ -333,7 +355,7 @@ impl<TBl, TCl, TSc, TNetStatus, TNet, TTxPool, TOc> Spawn for
&self,
future: FutureObj<'static, ()>
) -> Result<(), SpawnError> {
self.to_spawn_tx.unbounded_send(Box::pin(future))
self.to_spawn_tx.unbounded_send((Box::pin(future), From::from("unnamed")))
.map_err(|_| SpawnError::shutdown())
}
}
@@ -575,7 +597,7 @@ pub struct TransactionPoolAdapter<C, P> {
imports_external_transactions: bool,
pool: Arc<P>,
client: Arc<C>,
executor: TaskExecutor,
executor: SpawnTaskHandle,
}
/// Get transactions for propagation.
@@ -649,9 +671,7 @@ where
}
});
if let Err(e) = self.executor.spawn(Box::new(import_future)) {
warn!("Error scheduling extrinsic import: {:?}", e);
}
self.executor.spawn("extrinsic-import", import_future);
}
Err(e) => debug!("Error decoding transaction {}", e),
}
@@ -8,6 +8,7 @@ edition = "2018"
codec = { package = "parity-scale-codec", version = "1.0.0" }
derive_more = "0.99.2"
futures = { version = "0.3.1", features = ["compat"] }
futures-diagnose = "1.0"
log = "0.4.8"
parking_lot = "0.9.0"
sp-core = { path = "../../primitives/core" }
+2 -2
View File
@@ -87,13 +87,13 @@ impl<Client, Block> sc_transaction_graph::ChainApi for FullChainApi<Client, Bloc
let client = self.client.clone();
let at = at.clone();
self.pool.spawn_ok(async move {
self.pool.spawn_ok(futures_diagnose::diagnose("validate-transaction", async move {
let res = client.runtime_api().validate_transaction(&at, uxt)
.map_err(|e| Error::RuntimeApi(format!("{:?}", e)));
if let Err(e) = tx.send(res) {
log::warn!("Unable to send a validate transaction result: {:?}", e);
}
});
}));
Box::pin(async move {
match rx.await {
@@ -14,6 +14,7 @@ sp-inherents = { version = "2.0.0", path = "../../inherents" }
sp-state-machine = { version = "0.8.0", path = "../../../primitives/state-machine" }
futures = { version = "0.3.1", features = ["thread-pool"] }
futures-timer = "0.4.0"
futures-diagnose = "1.0"
sp-std = { version = "2.0.0", path = "../../std" }
sp-version = { version = "2.0.0", path = "../../version" }
sp-runtime = { version = "2.0.0", path = "../../runtime" }
@@ -71,7 +71,7 @@ impl<B: BlockT, Transaction: Send + 'static> BasicQueue<B, Transaction> {
let manual_poll;
if let Some(pool) = &mut pool {
pool.spawn_ok(future);
pool.spawn_ok(futures_diagnose::diagnose("import-queue", future));
manual_poll = None;
} else {
manual_poll = Some(Box::pin(future) as Pin<Box<_>>);