mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-13 05:51:02 +00:00
Send statements to own backing group first (#2927)
* Factor out runtime module into utils. * First fatal error design. * Better error handling infra. * Error handling cleanup. * Send to peers of our group first. * Finish backing group prioritization. * Little cleanup. * More cleanup. * Forgot to checkin error.rs. * Notes. * Runtime -> RuntimeInfo * qed in debug assert. * PolkaErr -> Fault.
This commit is contained in:
Generated
+1
@@ -6568,6 +6568,7 @@ dependencies = [
|
||||
"sp-keystore",
|
||||
"sp-staking",
|
||||
"sp-tracing",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
|
||||
@@ -23,29 +23,66 @@ use thiserror::Error;
|
||||
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use polkadot_node_subsystem_util::{
|
||||
runtime,
|
||||
Error as UtilError,
|
||||
};
|
||||
use polkadot_node_subsystem_util::{Fault, Error as UtilError, runtime, unwrap_non_fatal};
|
||||
use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// Errors of this subsystem.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error("Response channel to obtain chunk failed")]
|
||||
QueryChunkResponseChannel(#[source] oneshot::Canceled),
|
||||
#[error(transparent)]
|
||||
pub struct Error(pub Fault<NonFatal, Fatal>);
|
||||
|
||||
#[error("Response channel to obtain available data failed")]
|
||||
QueryAvailableDataResponseChannel(#[source] oneshot::Canceled),
|
||||
impl From<NonFatal> for Error {
|
||||
fn from(e: NonFatal) -> Self {
|
||||
Self(Fault::from_non_fatal(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Fatal> for Error {
|
||||
fn from(f: Fatal) -> Self {
|
||||
Self(Fault::from_fatal(f))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<runtime::Error> for Error {
|
||||
fn from(o: runtime::Error) -> Self {
|
||||
Self(Fault::from_other(o))
|
||||
}
|
||||
}
|
||||
|
||||
/// Fatal errors of this subsystem.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Fatal {
|
||||
/// Spawning a running task failed.
|
||||
#[error("Spawning subsystem task failed")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
/// Runtime API subsystem is down, which means we're shutting down.
|
||||
#[error("Runtime request canceled")]
|
||||
RuntimeRequestCanceled(oneshot::Canceled),
|
||||
|
||||
/// Requester stream exhausted.
|
||||
#[error("Erasure chunk requester stream exhausted")]
|
||||
RequesterExhausted,
|
||||
|
||||
#[error("Receive channel closed")]
|
||||
IncomingMessageChannel(#[source] SubsystemError),
|
||||
|
||||
/// Spawning a running task failed.
|
||||
#[error("Spawning subsystem task failed")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
/// Errors coming from runtime::Runtime.
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] #[source] runtime::Fatal),
|
||||
}
|
||||
|
||||
/// Non fatal errors of this subsystem.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NonFatal {
|
||||
/// av-store will drop the sender on any error that happens.
|
||||
#[error("Response channel to obtain chunk failed")]
|
||||
QueryChunkResponseChannel(#[source] oneshot::Canceled),
|
||||
|
||||
/// av-store will drop the sender on any error that happens.
|
||||
#[error("Response channel to obtain available data failed")]
|
||||
QueryAvailableDataResponseChannel(#[source] oneshot::Canceled),
|
||||
|
||||
/// We tried accessing a session that was not cached.
|
||||
#[error("Session is not cached.")]
|
||||
@@ -55,11 +92,7 @@ pub enum Error {
|
||||
#[error("Not a validator.")]
|
||||
NotAValidator,
|
||||
|
||||
/// Requester stream exhausted.
|
||||
#[error("Erasure chunk requester stream exhausted")]
|
||||
RequesterExhausted,
|
||||
|
||||
/// Sending response failed.
|
||||
/// Sending request response failed (Can happen on timeouts for example).
|
||||
#[error("Sending a request's response failed.")]
|
||||
SendResponse,
|
||||
|
||||
@@ -68,10 +101,6 @@ pub enum Error {
|
||||
#[error("Utility request failed")]
|
||||
UtilRequest(UtilError),
|
||||
|
||||
/// Runtime API subsystem is down, which means we're shutting down.
|
||||
#[error("Runtime request canceled")]
|
||||
RuntimeRequestCanceled(oneshot::Canceled),
|
||||
|
||||
/// Some request to the runtime failed.
|
||||
/// For example if we prune a block we're requesting info about.
|
||||
#[error("Runtime API error")]
|
||||
@@ -98,39 +127,30 @@ pub enum Error {
|
||||
|
||||
/// Errors coming from runtime::Runtime.
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[source] runtime::Error),
|
||||
Runtime(#[from] #[source] runtime::NonFatal),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl From<runtime::Error> for Error {
|
||||
fn from(err: runtime::Error) -> Self {
|
||||
Self::Runtime(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SubsystemError> for Error {
|
||||
fn from(err: SubsystemError) -> Self {
|
||||
Self::IncomingMessageChannel(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Receive a response from a runtime request and convert errors.
|
||||
pub(crate) async fn recv_runtime<V>(
|
||||
r: oneshot::Receiver<std::result::Result<V, RuntimeApiError>>,
|
||||
) -> std::result::Result<V, Error> {
|
||||
r.await
|
||||
.map_err(Error::RuntimeRequestCanceled)?
|
||||
.map_err(Error::RuntimeRequest)
|
||||
}
|
||||
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str) {
|
||||
if let Err(error) = result {
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str)
|
||||
-> std::result::Result<(), Fatal>
|
||||
{
|
||||
if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? {
|
||||
tracing::warn!(target: LOG_TARGET, error = ?error, ctx);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Receive a response from a runtime request and convert errors.
|
||||
pub(crate) async fn recv_runtime<V>(
|
||||
r: oneshot::Receiver<std::result::Result<V, RuntimeApiError>>,
|
||||
) -> Result<V> {
|
||||
let result = r.await
|
||||
.map_err(Fatal::RuntimeRequestCanceled)?
|
||||
.map_err(NonFatal::RuntimeRequest)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -25,10 +25,10 @@ use polkadot_subsystem::{
|
||||
|
||||
/// Error and [`Result`] type for this subsystem.
|
||||
mod error;
|
||||
pub use error::Error;
|
||||
pub use error::{Fatal, NonFatal};
|
||||
use error::{Result, log_error};
|
||||
|
||||
use polkadot_node_subsystem_util::runtime::Runtime;
|
||||
use polkadot_node_subsystem_util::runtime::RuntimeInfo;
|
||||
|
||||
/// `Requester` taking care of requesting chunks for candidates pending availability.
|
||||
mod requester;
|
||||
@@ -59,7 +59,7 @@ pub struct AvailabilityDistributionSubsystem {
|
||||
/// Pointer to a keystore, which is required for determining this nodes validator index.
|
||||
keystore: SyncCryptoStorePtr,
|
||||
/// Easy and efficient runtime access for this subsystem.
|
||||
runtime: Runtime,
|
||||
runtime: RuntimeInfo,
|
||||
/// Prometheus metrics.
|
||||
metrics: Metrics,
|
||||
}
|
||||
@@ -85,12 +85,12 @@ impl AvailabilityDistributionSubsystem {
|
||||
|
||||
/// Create a new instance of the availability distribution.
|
||||
pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
|
||||
let runtime = Runtime::new(keystore.clone());
|
||||
let runtime = RuntimeInfo::new(keystore.clone());
|
||||
Self { keystore, runtime, metrics }
|
||||
}
|
||||
|
||||
/// Start processing work as passed on from the Overseer.
|
||||
async fn run<Context>(mut self, mut ctx: Context) -> Result<()>
|
||||
async fn run<Context>(mut self, mut ctx: Context) -> std::result::Result<(), Fatal>
|
||||
where
|
||||
Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
|
||||
{
|
||||
@@ -108,10 +108,10 @@ impl AvailabilityDistributionSubsystem {
|
||||
// Handle task messages sending:
|
||||
let message = match action {
|
||||
Either::Left(subsystem_msg) => {
|
||||
subsystem_msg.map_err(|e| Error::IncomingMessageChannel(e))?
|
||||
subsystem_msg.map_err(|e| Fatal::IncomingMessageChannel(e))?
|
||||
}
|
||||
Either::Right(from_task) => {
|
||||
let from_task = from_task.ok_or(Error::RequesterExhausted)?;
|
||||
let from_task = from_task.ok_or(Fatal::RequesterExhausted)?;
|
||||
ctx.send_message(from_task).await;
|
||||
continue;
|
||||
}
|
||||
@@ -133,7 +133,7 @@ impl AvailabilityDistributionSubsystem {
|
||||
log_error(
|
||||
requester.get_mut().update_fetching_heads(&mut ctx, update).await,
|
||||
"Error in Requester::update_fetching_heads"
|
||||
);
|
||||
)?;
|
||||
}
|
||||
FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}
|
||||
FromOverseer::Signal(OverseerSignal::Conclude) => {
|
||||
@@ -169,7 +169,7 @@ impl AvailabilityDistributionSubsystem {
|
||||
tx,
|
||||
).await,
|
||||
"PoVRequester::fetch_pov"
|
||||
);
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,9 +33,10 @@ use polkadot_subsystem::{
|
||||
ActiveLeavesUpdate, SubsystemContext, ActivatedLeaf,
|
||||
messages::{AllMessages, NetworkBridgeMessage, IfDisconnected}
|
||||
};
|
||||
use polkadot_node_subsystem_util::runtime::{Runtime, ValidatorInfo};
|
||||
use polkadot_node_subsystem_util::runtime::{RuntimeInfo, ValidatorInfo};
|
||||
|
||||
use crate::error::{Error, log_error};
|
||||
use crate::error::{Fatal, NonFatal};
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// Number of sessions we want to keep in the LRU.
|
||||
const NUM_SESSIONS: usize = 2;
|
||||
@@ -63,7 +64,7 @@ impl PoVRequester {
|
||||
pub async fn update_connected_validators<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
runtime: &mut RuntimeInfo,
|
||||
update: &ActiveLeavesUpdate,
|
||||
) -> super::Result<()>
|
||||
where
|
||||
@@ -87,7 +88,7 @@ impl PoVRequester {
|
||||
pub async fn fetch_pov<Context>(
|
||||
&self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
runtime: &mut RuntimeInfo,
|
||||
parent: Hash,
|
||||
from_validator: ValidatorIndex,
|
||||
candidate_hash: CandidateHash,
|
||||
@@ -99,7 +100,7 @@ impl PoVRequester {
|
||||
{
|
||||
let info = &runtime.get_session_info(ctx, parent).await?.session_info;
|
||||
let authority_id = info.discovery_keys.get(from_validator.0 as usize)
|
||||
.ok_or(Error::InvalidValidatorIndex)?
|
||||
.ok_or(NonFatal::InvalidValidatorIndex)?
|
||||
.clone();
|
||||
let (req, pending_response) = OutgoingRequest::new(
|
||||
Recipient::Authority(authority_id),
|
||||
@@ -125,7 +126,8 @@ impl PoVRequester {
|
||||
.with_relay_parent(parent);
|
||||
ctx.spawn("pov-fetcher", fetch_pov_job(pov_hash, pending_response.boxed(), span, tx).boxed())
|
||||
.await
|
||||
.map_err(|e| Error::SpawnTask(e))
|
||||
.map_err(|e| Fatal::SpawnTask(e))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,10 +138,13 @@ async fn fetch_pov_job(
|
||||
span: jaeger::Span,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
) {
|
||||
log_error(
|
||||
do_fetch_pov(pov_hash, pending_response, span, tx).await,
|
||||
"fetch_pov_job",
|
||||
)
|
||||
if let Err(err) = do_fetch_pov(pov_hash, pending_response, span, tx).await {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"fetch_pov_job"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Do the actual work of waiting for the response.
|
||||
@@ -149,24 +154,24 @@ async fn do_fetch_pov(
|
||||
_span: jaeger::Span,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
)
|
||||
-> super::Result<()>
|
||||
-> std::result::Result<(), NonFatal>
|
||||
{
|
||||
let response = pending_response.await.map_err(Error::FetchPoV)?;
|
||||
let response = pending_response.await.map_err(NonFatal::FetchPoV)?;
|
||||
let pov = match response {
|
||||
PoVFetchingResponse::PoV(pov) => pov,
|
||||
PoVFetchingResponse::NoSuchPoV => {
|
||||
return Err(Error::NoSuchPoV)
|
||||
return Err(NonFatal::NoSuchPoV)
|
||||
}
|
||||
};
|
||||
if pov.hash() == pov_hash {
|
||||
tx.send(pov).map_err(|_| Error::SendResponse)
|
||||
tx.send(pov).map_err(|_| NonFatal::SendResponse)
|
||||
} else {
|
||||
Err(Error::UnexpectedPoV)
|
||||
Err(NonFatal::UnexpectedPoV)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the session indeces for the given relay chain parents.
|
||||
async fn get_activated_sessions<Context>(ctx: &mut Context, runtime: &mut Runtime, new_heads: impl Iterator<Item = &Hash>)
|
||||
async fn get_activated_sessions<Context>(ctx: &mut Context, runtime: &mut RuntimeInfo, new_heads: impl Iterator<Item = &Hash>)
|
||||
-> super::Result<impl Iterator<Item = (Hash, SessionIndex)>>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
@@ -181,7 +186,7 @@ where
|
||||
/// Connect to validators of our validator group.
|
||||
async fn connect_to_relevant_validators<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
runtime: &mut RuntimeInfo,
|
||||
parent: Hash,
|
||||
session: SessionIndex
|
||||
)
|
||||
@@ -206,7 +211,7 @@ where
|
||||
/// Return: `None` if not a validator.
|
||||
async fn determine_relevant_validators<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
runtime: &mut RuntimeInfo,
|
||||
parent: Hash,
|
||||
session: SessionIndex,
|
||||
)
|
||||
@@ -275,7 +280,7 @@ mod tests {
|
||||
let (mut context, mut virtual_overseer) =
|
||||
test_helpers::make_subsystem_context::<AvailabilityDistributionMessage, TaskExecutor>(pool.clone());
|
||||
let keystore = make_ferdie_keystore();
|
||||
let mut runtime = polkadot_node_subsystem_util::runtime::Runtime::new(keystore);
|
||||
let mut runtime = polkadot_node_subsystem_util::runtime::RuntimeInfo::new(keystore);
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let testee = async {
|
||||
|
||||
@@ -34,7 +34,7 @@ use polkadot_subsystem::messages::{
|
||||
use polkadot_subsystem::{SubsystemContext, jaeger};
|
||||
|
||||
use crate::{
|
||||
error::{Error, Result},
|
||||
error::{Fatal, Result},
|
||||
session_cache::{BadValidators, SessionInfo},
|
||||
LOG_TARGET,
|
||||
metrics::{Metrics, SUCCEEDED, FAILED},
|
||||
@@ -191,7 +191,7 @@ impl FetchTask {
|
||||
|
||||
ctx.spawn("chunk-fetcher", running.run(kill).boxed())
|
||||
.await
|
||||
.map_err(|e| Error::SpawnTask(e))?;
|
||||
.map_err(|e| Fatal::SpawnTask(e))?;
|
||||
|
||||
Ok(FetchTask {
|
||||
live_in,
|
||||
|
||||
@@ -28,7 +28,7 @@ use polkadot_subsystem::{
|
||||
SubsystemContext, jaeger,
|
||||
};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::error::{NonFatal, Result};
|
||||
use crate::{LOG_TARGET, metrics::{Metrics, SUCCEEDED, FAILED, NOT_FOUND}};
|
||||
|
||||
/// Variant of `answer_pov_request` that does Prometheus metric and logging on errors.
|
||||
@@ -107,7 +107,7 @@ where
|
||||
}
|
||||
};
|
||||
|
||||
req.send_response(response).map_err(|_| Error::SendResponse)?;
|
||||
req.send_response(response).map_err(|_| NonFatal::SendResponse)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -144,7 +144,7 @@ where
|
||||
Some(chunk) => v1::ChunkFetchingResponse::Chunk(chunk.into()),
|
||||
};
|
||||
|
||||
req.send_response(response).map_err(|_| Error::SendResponse)?;
|
||||
req.send_response(response).map_err(|_| NonFatal::SendResponse)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -164,7 +164,7 @@ where
|
||||
))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(|e| {
|
||||
let result = rx.await.map_err(|e| {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?validator_index,
|
||||
@@ -172,8 +172,9 @@ where
|
||||
error = ?e,
|
||||
"Error retrieving chunk",
|
||||
);
|
||||
Error::QueryChunkResponseChannel(e)
|
||||
})
|
||||
NonFatal::QueryChunkResponseChannel(e)
|
||||
})?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Query PoV from the availability store.
|
||||
@@ -191,5 +192,6 @@ where
|
||||
))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(|e| Error::QueryAvailableDataResponseChannel(e))
|
||||
let result = rx.await.map_err(|e| NonFatal::QueryAvailableDataResponseChannel(e))?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ use polkadot_primitives::v1::{
|
||||
use polkadot_subsystem::SubsystemContext;
|
||||
|
||||
use super::{
|
||||
error::{recv_runtime, Error},
|
||||
error::{recv_runtime, Error, NonFatal},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
@@ -189,9 +189,9 @@ impl SessionCache {
|
||||
let session = self
|
||||
.session_info_cache
|
||||
.get_mut(&report.session_index)
|
||||
.ok_or(Error::NoSuchCachedSession)?
|
||||
.ok_or(NonFatal::NoSuchCachedSession)?
|
||||
.as_mut()
|
||||
.ok_or(Error::NotAValidator)?;
|
||||
.ok_or(NonFatal::NotAValidator)?;
|
||||
let group = session
|
||||
.validator_groups
|
||||
.get_mut(report.group_index.0 as usize)
|
||||
@@ -231,7 +231,7 @@ impl SessionCache {
|
||||
..
|
||||
} = recv_runtime(request_session_info(parent, session_index, ctx.sender()).await)
|
||||
.await?
|
||||
.ok_or(Error::NoSuchSession(session_index))?;
|
||||
.ok_or(NonFatal::NoSuchSession(session_index))?;
|
||||
|
||||
if let Some(our_index) = self.get_our_index(validators).await {
|
||||
// Get our group index:
|
||||
|
||||
@@ -10,6 +10,7 @@ futures = "0.3.12"
|
||||
tracing = "0.1.25"
|
||||
polkadot-primitives = { path = "../../../primitives" }
|
||||
sp-staking = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
|
||||
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" }
|
||||
polkadot-node-primitives = { path = "../../primitives" }
|
||||
@@ -18,6 +19,7 @@ polkadot-node-network-protocol = { path = "../../network/protocol" }
|
||||
arrayvec = "0.5.2"
|
||||
indexmap = "1.6.1"
|
||||
parity-scale-codec = { version = "2.0.0", default-features = false, features = ["derive"] }
|
||||
thiserror = "1.0.23"
|
||||
|
||||
[dev-dependencies]
|
||||
polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use polkadot_node_network_protocol::PeerId;
|
||||
use polkadot_primitives::v1::{CandidateHash, Hash};
|
||||
use polkadot_subsystem::SubsystemError;
|
||||
use thiserror::Error;
|
||||
|
||||
use polkadot_node_subsystem_util::{Fault, runtime, unwrap_non_fatal};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// General result.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
/// Result for non fatal only failures.
|
||||
pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
|
||||
/// Result for fatal only failures.
|
||||
pub type FatalResult<T> = std::result::Result<T, Fatal>;
|
||||
|
||||
/// Errors for statement distribution.
|
||||
#[derive(Debug, Error)]
|
||||
#[error(transparent)]
|
||||
pub struct Error(pub Fault<NonFatal, Fatal>);
|
||||
|
||||
impl From<NonFatal> for Error {
|
||||
fn from(e: NonFatal) -> Self {
|
||||
Self(Fault::from_non_fatal(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Fatal> for Error {
|
||||
fn from(f: Fatal) -> Self {
|
||||
Self(Fault::from_fatal(f))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<runtime::Error> for Error {
|
||||
fn from(o: runtime::Error) -> Self {
|
||||
Self(Fault::from_other(o))
|
||||
}
|
||||
}
|
||||
|
||||
/// Fatal runtime errors.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Fatal {
|
||||
/// Requester channel is never closed.
|
||||
#[error("Requester receiver stream finished.")]
|
||||
RequesterReceiverFinished,
|
||||
|
||||
/// Responder channel is never closed.
|
||||
#[error("Responder receiver stream finished.")]
|
||||
ResponderReceiverFinished,
|
||||
|
||||
/// Spawning a running task failed.
|
||||
#[error("Spawning subsystem task failed")]
|
||||
SpawnTask(#[source] SubsystemError),
|
||||
|
||||
/// Receiving subsystem message from overseer failed.
|
||||
#[error("Receiving message from overseer failed")]
|
||||
SubsystemReceive(#[source] SubsystemError),
|
||||
|
||||
/// Errors coming from runtime::Runtime.
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] #[source] runtime::Fatal),
|
||||
}
|
||||
|
||||
/// Errors for fetching of runtime information.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NonFatal {
|
||||
/// Errors coming from runtime::Runtime.
|
||||
#[error("Error while accessing runtime information")]
|
||||
Runtime(#[from] #[source] runtime::NonFatal),
|
||||
|
||||
/// Relay parent was not present in active heads.
|
||||
#[error("Relay parent could not be found in active heads")]
|
||||
NoSuchHead(Hash),
|
||||
|
||||
/// Peer requested statement data for candidate that was never announced to it.
|
||||
#[error("Peer requested data for candidate it never received a notification for")]
|
||||
RequestedUnannouncedCandidate(PeerId, CandidateHash),
|
||||
|
||||
/// A large statement status was requested, which could not be found.
|
||||
#[error("Statement status does not exist")]
|
||||
NoSuchLargeStatementStatus(Hash, CandidateHash),
|
||||
|
||||
/// A fetched large statement was requested, but could not be found.
|
||||
#[error("Fetched large statement does not exist")]
|
||||
NoSuchFetchedLargeStatement(Hash, CandidateHash),
|
||||
|
||||
/// Responder no longer waits for our data. (Should not happen right now.)
|
||||
#[error("Oneshot `GetData` channel closed")]
|
||||
ResponderGetDataCanceled,
|
||||
}
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them.
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str)
|
||||
-> FatalResult<()>
|
||||
{
|
||||
if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? {
|
||||
tracing::debug!(target: LOG_TARGET, error = ?error, ctx)
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,7 +20,7 @@ use futures::{SinkExt, channel::{mpsc, oneshot}};
|
||||
|
||||
use polkadot_node_network_protocol::{
|
||||
PeerId, UnifiedReputationChange,
|
||||
request_response::{
|
||||
request_response::{
|
||||
OutgoingRequest, Recipient, Requests,
|
||||
v1::{
|
||||
StatementFetchingRequest, StatementFetchingResponse
|
||||
|
||||
@@ -530,6 +530,7 @@ where
|
||||
spawner.clone(),
|
||||
),
|
||||
statement_distribution: StatementDistributionSubsystem::new(
|
||||
keystore.clone(),
|
||||
Metrics::register(registry)?,
|
||||
),
|
||||
approval_distribution: ApprovalDistributionSubsystem::new(
|
||||
|
||||
@@ -0,0 +1,201 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Utilities for general error handling in Polkadot.
|
||||
//!
|
||||
//! Goals:
|
||||
//!
|
||||
//! - Ergonomic API with little repetition.
|
||||
//! - Still explicitness where it matters - fatal errors should be visible and justified.
|
||||
//! - Easy recovering from non fatal errors.
|
||||
//! - Errors start as non fatal and can be made fatal at the level where it is really clear they
|
||||
//! are fatal. E.g. cancellation of a oneshot might be fatal in one case, but absolutely expected
|
||||
//! in another.
|
||||
//! - Good error messages. Fatal errors don't need to be properly structured (as we won't handle
|
||||
//! them), but should provide good error messages of what is going on.
|
||||
//! - Encourage many error types. One per module or even per function is totally fine - it makes
|
||||
//! error handling robust, if you only need to handle errors that can actually happen, also error
|
||||
//! messages will get better.
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Error abstraction.
|
||||
///
|
||||
/// Errors might either be fatal and should bring the subsystem down or are at least at the point
|
||||
/// of occurrence deemed potentially recoverable.
|
||||
///
|
||||
/// Upper layers might have a better view and might make a non fatal error of a called function a
|
||||
/// fatal one. The opposite should not happen, therefore don't make an error fatal if you don't
|
||||
/// know it is in all cases.
|
||||
///
|
||||
/// Usage pattern:
|
||||
///
|
||||
/// ```
|
||||
/// use thiserror::Error;
|
||||
/// use polkadot_node_subsystem::errors::RuntimeApiError;
|
||||
/// use polkadot_primitives::v1::SessionIndex;
|
||||
/// use futures::channel::oneshot;
|
||||
/// use polkadot_node_subsystem_util::{Fault, runtime};
|
||||
///
|
||||
/// #[derive(Debug, Error)]
|
||||
/// #[error(transparent)]
|
||||
/// pub struct Error(pub Fault<NonFatal, Fatal>);
|
||||
///
|
||||
/// pub type Result<T> = std::result::Result<T, Error>;
|
||||
/// pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
|
||||
/// pub type FatalResult<T> = std::result::Result<T, Fatal>;
|
||||
///
|
||||
/// // Make an error from a `NonFatal` one.
|
||||
/// impl From<NonFatal> for Error {
|
||||
/// fn from(e: NonFatal) -> Self {
|
||||
/// Self(Fault::from_non_fatal(e))
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// // Make an Error from a `Fatal` one.
|
||||
/// impl From<Fatal> for Error {
|
||||
/// fn from(f: Fatal) -> Self {
|
||||
/// Self(Fault::from_fatal(f))
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// // Easy conversion from sub error types from other modules:
|
||||
/// impl From<runtime::Error> for Error {
|
||||
/// fn from(o: runtime::Error) -> Self {
|
||||
/// Self(Fault::from_other(o))
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// #[derive(Debug, Error)]
|
||||
/// pub enum Fatal {
|
||||
/// /// Really fatal stuff.
|
||||
/// #[error("Something fatal happened.")]
|
||||
/// SomeFatalError,
|
||||
/// /// Errors coming from runtime::Runtime.
|
||||
/// #[error("Error while accessing runtime information")]
|
||||
/// Runtime(#[from] #[source] runtime::Fatal),
|
||||
/// }
|
||||
///
|
||||
/// #[derive(Debug, Error)]
|
||||
/// pub enum NonFatal {
|
||||
/// /// Some non fatal error.
|
||||
/// /// For example if we prune a block we're requesting info about.
|
||||
/// #[error("Non fatal error happened.")]
|
||||
/// SomeNonFatalError,
|
||||
///
|
||||
/// /// Errors coming from runtime::Runtime.
|
||||
/// #[error("Error while accessing runtime information")]
|
||||
/// Runtime(#[from] #[source] runtime::NonFatal),
|
||||
/// }
|
||||
/// ```
|
||||
/// Then mostly use `Error` in functions, you may also use `NonFatal` and `Fatal` directly in
|
||||
/// functions that strictly only fail non fatal or fatal respectively, as `Fatal` and `NonFatal`
|
||||
/// can automatically converted into the above defined `Error`.
|
||||
/// ```
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Fault<E, F>
|
||||
where
|
||||
E: std::fmt::Debug + std::error::Error + 'static,
|
||||
F: std::fmt::Debug + std::error::Error + 'static, {
|
||||
/// Error is fatal and should be escalated up.
|
||||
///
|
||||
/// While we usually won't want to pattern match on those, a concrete descriptive enum might
|
||||
/// still be a good idea for easy auditing of what can go wrong in a module and also makes for
|
||||
/// good error messages thanks to `thiserror`.
|
||||
#[error("Fatal error occurred.")]
|
||||
Fatal(#[source] F),
|
||||
/// Error that is not fatal, at least not yet at this level of execution.
|
||||
#[error("Non fatal error occurred.")]
|
||||
Err(#[source] E),
|
||||
}
|
||||
|
||||
/// Due to typesystem constraints we cannot implement the following methods as standard
|
||||
/// `From::from` implementations. So no auto conversions by default, a simple `Result::map_err` is
|
||||
/// not too bad though.
|
||||
impl<E, F> Fault<E, F>
|
||||
where
|
||||
E: std::fmt::Debug + std::error::Error + 'static,
|
||||
F: std::fmt::Debug + std::error::Error + 'static,
|
||||
{
|
||||
/// Build an `Fault` from compatible fatal error.
|
||||
pub fn from_fatal<F1: Into<F>>(f: F1) -> Self {
|
||||
Self::Fatal(f.into())
|
||||
}
|
||||
|
||||
/// Build an `Fault` from compatible non fatal error.
|
||||
pub fn from_non_fatal<E1: Into<E>>(e: E1) -> Self {
|
||||
Self::Err(e.into())
|
||||
}
|
||||
|
||||
/// Build an `Fault` from a compatible other `Fault`.
|
||||
pub fn from_other<E1, F1>(e: Fault<E1, F1>) -> Self
|
||||
where
|
||||
E1: Into<E> + std::fmt::Debug + std::error::Error + 'static,
|
||||
F1: Into<F> + std::fmt::Debug + std::error::Error + 'static,
|
||||
{
|
||||
match e {
|
||||
Fault::Fatal(f) => Self::from_fatal(f),
|
||||
Fault::Err(e) => Self::from_non_fatal(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap non fatal error and report fatal one.
|
||||
///
|
||||
/// This function is useful for top level error handling. Fatal errors will be extracted,
|
||||
/// non fatal error will be returned for handling.
|
||||
///
|
||||
/// Usage:
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use thiserror::Error;
|
||||
/// # use polkadot_node_subsystem_util::{Fault, unwrap_non_fatal};
|
||||
/// # use polkadot_node_subsystem::SubsystemError;
|
||||
/// # #[derive(Error, Debug)]
|
||||
/// # enum Fatal {
|
||||
/// # }
|
||||
/// # #[derive(Error, Debug)]
|
||||
/// # enum NonFatal {
|
||||
/// # }
|
||||
/// # fn computation() -> Result<(), Fault<NonFatal, Fatal>> {
|
||||
/// # panic!();
|
||||
/// # }
|
||||
/// #
|
||||
/// // Use run like so:
|
||||
/// // run(ctx)
|
||||
/// // .map_err(|e| SubsystemError::with_origin("subsystem-name", e))
|
||||
/// fn run() -> std::result::Result<(), Fatal> {
|
||||
/// loop {
|
||||
/// // ....
|
||||
/// if let Some(err) = unwrap_non_fatal(computation())? {
|
||||
/// println!("Something bad happened: {}", err);
|
||||
/// continue
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// ```
|
||||
pub fn unwrap_non_fatal<E,F>(result: Result<(), Fault<E,F>>) -> Result<Option<E>, F>
|
||||
where
|
||||
E: std::fmt::Debug + std::error::Error + 'static,
|
||||
F: std::fmt::Debug + std::error::Error + Send + Sync + 'static
|
||||
{
|
||||
match result {
|
||||
Ok(()) => Ok(None),
|
||||
Err(Fault::Fatal(f)) => Err(f),
|
||||
Err(Fault::Err(e)) => Ok(Some(e)),
|
||||
}
|
||||
}
|
||||
@@ -55,6 +55,11 @@ pub mod validator_discovery;
|
||||
pub use metered_channel as metered;
|
||||
pub use polkadot_node_network_protocol::MIN_GOSSIP_PEERS;
|
||||
|
||||
mod error_handling;
|
||||
|
||||
/// Error classification.
|
||||
pub use error_handling::{Fault, unwrap_non_fatal};
|
||||
|
||||
/// These reexports are required so that external crates can use the `delegated_subsystem` macro properly.
|
||||
pub mod reexports {
|
||||
pub use sp_core::traits::SpawnNamed;
|
||||
|
||||
@@ -23,15 +23,36 @@ use futures::channel::oneshot;
|
||||
use polkadot_node_subsystem::errors::RuntimeApiError;
|
||||
use polkadot_primitives::v1::SessionIndex;
|
||||
|
||||
use crate::Fault;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
/// Errors for `Runtime` cache.
|
||||
pub type Error = Fault<NonFatal, Fatal>;
|
||||
|
||||
impl From<NonFatal> for Error {
|
||||
fn from(e: NonFatal) -> Self {
|
||||
Self::from_non_fatal(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Fatal> for Error {
|
||||
fn from(f: Fatal) -> Self {
|
||||
Self::from_fatal(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Fatal runtime errors.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Fatal {
|
||||
/// Runtime API subsystem is down, which means we're shutting down.
|
||||
#[error("Runtime request got canceled")]
|
||||
RuntimeRequestCanceled(oneshot::Canceled),
|
||||
}
|
||||
|
||||
/// Errors for fetching of runtime information.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
/// Runtime API subsystem is down, which means we're shutting down.
|
||||
#[error("Runtime request canceled")]
|
||||
RuntimeRequestCanceled(oneshot::Canceled),
|
||||
|
||||
pub enum NonFatal {
|
||||
/// Some request to the runtime failed.
|
||||
/// For example if we prune a block we're requesting info about.
|
||||
#[error("Runtime API error")]
|
||||
@@ -45,8 +66,9 @@ pub enum Error {
|
||||
/// Receive a response from a runtime request and convert errors.
|
||||
pub(crate) async fn recv_runtime<V>(
|
||||
r: oneshot::Receiver<std::result::Result<V, RuntimeApiError>>,
|
||||
) -> std::result::Result<V, Error> {
|
||||
r.await
|
||||
.map_err(Error::RuntimeRequestCanceled)?
|
||||
.map_err(Error::RuntimeRequest)
|
||||
) -> Result<V> {
|
||||
let result = r.await
|
||||
.map_err(Fatal::RuntimeRequestCanceled)?
|
||||
.map_err(NonFatal::RuntimeRequest)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -33,12 +33,12 @@ use crate::{
|
||||
mod error;
|
||||
|
||||
use error::{recv_runtime, Result};
|
||||
pub use error::Error;
|
||||
pub use error::{Error, NonFatal, Fatal};
|
||||
|
||||
/// Caching of session info.
|
||||
///
|
||||
/// It should be ensured that a cached session stays live in the cache as long as we might need it.
|
||||
pub struct Runtime {
|
||||
pub struct RuntimeInfo {
|
||||
/// Get the session index for a given relay parent.
|
||||
///
|
||||
/// We query this up to a 100 times per block, so caching it here without roundtrips over the
|
||||
@@ -70,8 +70,8 @@ pub struct ValidatorInfo {
|
||||
pub our_group: Option<GroupIndex>,
|
||||
}
|
||||
|
||||
impl Runtime {
|
||||
/// Create a new `Runtime` for convenient runtime fetches.
|
||||
impl RuntimeInfo {
|
||||
/// Create a new `RuntimeInfo` for convenient runtime fetches.
|
||||
pub fn new(keystore: SyncCryptoStorePtr) -> Self {
|
||||
Self {
|
||||
// Adjust, depending on how many forks we want to support.
|
||||
@@ -134,7 +134,7 @@ impl Runtime {
|
||||
let session_info =
|
||||
recv_runtime(request_session_info(parent, session_index, ctx.sender()).await)
|
||||
.await?
|
||||
.ok_or(Error::NoSuchSession(session_index))?;
|
||||
.ok_or(NonFatal::NoSuchSession(session_index))?;
|
||||
let validator_info = self.get_validator_info(&session_info).await?;
|
||||
|
||||
let full_info = ExtendedSessionInfo {
|
||||
|
||||
@@ -224,9 +224,13 @@ pub enum NetworkBridgeMessage {
|
||||
SendCollationMessage(Vec<PeerId>, protocol_v1::CollationProtocol),
|
||||
|
||||
/// Send a batch of validation messages.
|
||||
///
|
||||
/// NOTE: Messages will be processed in order (at least statement distribution relies on this).
|
||||
SendValidationMessages(Vec<(Vec<PeerId>, protocol_v1::ValidationProtocol)>),
|
||||
|
||||
/// Send a batch of collation messages.
|
||||
///
|
||||
/// NOTE: Messages will be processed in order.
|
||||
SendCollationMessages(Vec<(Vec<PeerId>, protocol_v1::CollationProtocol)>),
|
||||
|
||||
/// Send requests via substrate request/response.
|
||||
|
||||
Reference in New Issue
Block a user