fatality based errors (#4448)

* seed commit for fatality based errors

* fatality

* first draft of fatality

* cleanup

* differnt approach

* simplify

* first working version for enums, with documentation

* add split

* fix simple split test case

* extend README.md

* update fatality impl

* make tests passed

* apply fatality to first subsystem

* fatality fixes

* use fatality in a subsystem

* fix subsystemg

* fixup proc macro

* fix/test: log::*! do not execute when log handler is missing

* fix spelling

* rename Runtime2 to something sane

* allow nested split with `forward` annotations

* add free license

* enable and fixup all tests

* use external fatality

Makes this more reviewable.

* bump fatality dep

Avoid duplicate expander compilations.

* migrate availability distribution

* more fatality usage

* chore: bump fatality to 0.0.6

* fixup remaining subsystems

* chore: fmt

* make cargo spellcheck happy

* remove single instance of `#[fatal(false)]`

* last quality sweep

* fixup
This commit is contained in:
Bernhard Schuster
2022-02-25 18:25:26 +01:00
committed by GitHub
parent 85fa087405
commit d946582707
48 changed files with 425 additions and 659 deletions
@@ -17,76 +17,54 @@
//! Error handling related code and Error/Result definitions.
use thiserror::Error;
use polkadot_node_subsystem_util::runtime;
use polkadot_subsystem::SubsystemError;
use crate::{sender, LOG_TARGET};
#[derive(Debug, Error, derive_more::From)]
#[error(transparent)]
use fatality::Nested;
#[allow(missing_docs)]
#[fatality::fatality(splitable)]
pub enum Error {
/// Fatal errors of dispute distribution.
Fatal(Fatal),
/// Non-fatal errors of dispute distribution.
NonFatal(NonFatal),
}
impl From<sender::Error> for Error {
fn from(o: sender::Error) -> Self {
match o {
sender::Error::Fatal(f) => Self::Fatal(Fatal::Sender(f)),
sender::Error::NonFatal(f) => Self::NonFatal(NonFatal::Sender(f)),
}
}
}
/// Fatal errors of this subsystem.
#[derive(Debug, Error)]
pub enum Fatal {
/// Receiving subsystem message from overseer failed.
#[fatal]
#[error("Receiving message from overseer failed")]
SubsystemReceive(#[source] SubsystemError),
/// Spawning a running task failed.
#[fatal]
#[error("Spawning subsystem task failed")]
SpawnTask(#[source] SubsystemError),
/// `DisputeSender` mpsc receiver exhausted.
#[fatal]
#[error("Erasure chunk requester stream exhausted")]
SenderExhausted,
/// Errors coming from `runtime::Runtime`.
#[fatal(forward)]
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::Fatal),
Runtime(#[from] runtime::Error),
/// Errors coming from `DisputeSender`
#[fatal(forward)]
#[error("Error while accessing runtime information")]
Sender(#[from] sender::Fatal),
}
/// Non-fatal errors of this subsystem.
#[derive(Debug, Error)]
pub enum NonFatal {
/// Errors coming from `DisputeSender`
#[error("Error while accessing runtime information")]
Sender(#[from] sender::NonFatal),
Sender(#[from] sender::Error),
}
pub type Result<T> = std::result::Result<T, Error>;
pub type FatalResult<T> = std::result::Result<T, Fatal>;
pub type FatalResult<T> = std::result::Result<T, FatalError>;
/// Utility for eating top level errors and log them.
///
/// We basically always want to try and continue on error. This utility function is meant to
/// consume top-level errors by simply logging them
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), Fatal> {
match result {
Err(Error::Fatal(f)) => Err(f),
Err(Error::NonFatal(error)) => {
tracing::warn!(target: LOG_TARGET, error = ?error, ctx);
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), FatalError> {
match result.into_nested()? {
Err(jfyi) => {
tracing::warn!(target: LOG_TARGET, error = ?jfyi, ctx);
Ok(())
},
Ok(()) => Ok(()),
@@ -82,7 +82,7 @@ use self::receiver::DisputesReceiver;
/// Error and [`Result`] type for this subsystem.
mod error;
use error::{log_error, Fatal, FatalResult, Result};
use error::{log_error, FatalError, FatalResult, Result};
#[cfg(test)]
mod tests;
@@ -160,7 +160,7 @@ where
}
/// Start processing work as passed on from the Overseer.
async fn run<Context>(mut self, mut ctx: Context) -> std::result::Result<(), Fatal>
async fn run<Context>(mut self, mut ctx: Context) -> std::result::Result<(), FatalError>
where
Context: SubsystemContext<Message = DisputeDistributionMessage>
+ overseer::SubsystemContext<Message = DisputeDistributionMessage>
@@ -176,7 +176,7 @@ where
self.metrics.clone(),
);
ctx.spawn("disputes-receiver", receiver.run().boxed())
.map_err(Fatal::SpawnTask)?;
.map_err(FatalError::SpawnTask)?;
loop {
let message = MuxedMessage::receive(&mut ctx, &mut self.sender_rx).await;
@@ -197,7 +197,7 @@ where
},
MuxedMessage::Sender(result) => {
self.disputes_sender
.on_task_message(result.ok_or(Fatal::SenderExhausted)?)
.on_task_message(result.ok_or(FatalError::SenderExhausted)?)
.await;
},
}
@@ -254,7 +254,7 @@ impl MuxedMessage {
let from_overseer = ctx.recv().fuse();
futures::pin_mut!(from_overseer, from_sender);
futures::select!(
msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(Fatal::SubsystemReceive)),
msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(FatalError::SubsystemReceive)),
msg = from_sender.next() => MuxedMessage::Sender(msg),
)
}
@@ -17,100 +17,55 @@
//! Error handling related code and Error/Result definitions.
use thiserror::Error;
use fatality::Nested;
use polkadot_node_network_protocol::{request_response::incoming, PeerId};
use polkadot_node_subsystem_util::runtime;
use crate::LOG_TARGET;
#[derive(Debug, Error, derive_more::From)]
#[error(transparent)]
#[allow(missing_docs)]
#[fatality::fatality(splitable)]
pub enum Error {
/// All fatal errors.
Fatal(Fatal),
/// All nonfatal/potentially recoverable errors.
NonFatal(NonFatal),
}
impl From<runtime::Error> for Error {
fn from(o: runtime::Error) -> Self {
match o {
runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)),
runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)),
}
}
}
impl From<incoming::Error> for Error {
fn from(o: incoming::Error) -> Self {
match o {
incoming::Error::Fatal(f) => Self::Fatal(Fatal::IncomingRequest(f)),
incoming::Error::NonFatal(f) => Self::NonFatal(NonFatal::IncomingRequest(f)),
}
}
}
/// Fatal errors of this subsystem.
#[derive(Debug, Error)]
pub enum Fatal {
/// Errors coming from runtime::Runtime.
#[fatal(forward)]
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::Fatal),
Runtime(#[from] runtime::Error),
/// Errors coming from receiving incoming requests.
#[fatal(forward)]
#[error("Retrieving next incoming request failed.")]
IncomingRequest(#[from] incoming::Fatal),
}
IncomingRequest(#[from] incoming::Error),
/// Non-fatal errors of this subsystem.
#[derive(Debug, Error)]
pub enum NonFatal {
/// Answering request failed.
#[error("Sending back response to peer {0} failed.")]
SendResponse(PeerId),
/// Setting reputation for peer failed.
#[error("Changing peer's ({0}) reputation failed.")]
SetPeerReputation(PeerId),
/// Peer sent us request with invalid signature.
#[error("Dispute request with invalid signatures, from peer {0}.")]
InvalidSignature(PeerId),
/// Import oneshot got canceled.
#[error("Import of dispute got canceled for peer {0} - import failed for some reason.")]
ImportCanceled(PeerId),
/// Non validator tried to participate in dispute.
#[error("Peer {0} is not a validator.")]
#[error("Peer {0} attempted to participate in dispute and is not a validator.")]
NotAValidator(PeerId),
/// Errors coming from runtime::Runtime.
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::NonFatal),
/// Errors coming from receiving incoming requests.
#[error("Retrieving next incoming request failed.")]
IncomingRequest(#[from] incoming::NonFatal),
}
pub type Result<T> = std::result::Result<T, Error>;
pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
pub type JfyiErrorResult<T> = std::result::Result<T, JfyiError>;
/// Utility for eating top level errors and log them.
///
/// We basically always want to try and continue on error. This utility function is meant to
/// consume top-level errors by simply logging them.
pub fn log_error(result: Result<()>) -> std::result::Result<(), Fatal> {
match result {
Err(Error::Fatal(f)) => Err(f),
Err(Error::NonFatal(error @ NonFatal::ImportCanceled(_))) => {
pub fn log_error(result: Result<()>) -> std::result::Result<(), FatalError> {
match result.into_nested()? {
Err(error @ JfyiError::ImportCanceled(_)) => {
tracing::debug!(target: LOG_TARGET, error = ?error);
Ok(())
},
Err(Error::NonFatal(error)) => {
Err(error) => {
tracing::warn!(target: LOG_TARGET, error = ?error);
Ok(())
},
@@ -32,7 +32,7 @@ use lru::LruCache;
use polkadot_node_network_protocol::{
authority_discovery::AuthorityDiscovery,
request_response::{
incoming::{OutgoingResponse, OutgoingResponseSender},
incoming::{self, OutgoingResponse, OutgoingResponseSender},
v1::{DisputeRequest, DisputeResponse},
IncomingRequest, IncomingRequestReceiver,
},
@@ -51,7 +51,7 @@ use crate::{
};
mod error;
use self::error::{log_error, NonFatal, NonFatalResult, Result};
use self::error::{log_error, JfyiError, JfyiErrorResult, Result};
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid.");
@@ -101,7 +101,7 @@ enum MuxedMessage {
/// - We need to make sure responses are actually sent (therefore we need to await futures
/// promptly).
/// - We need to update `banned_peers` accordingly to the result.
ConfirmedImport(NonFatalResult<(PeerId, ImportStatementsResult)>),
ConfirmedImport(JfyiErrorResult<(PeerId, ImportStatementsResult)>),
/// A new request has arrived and should be handled.
NewRequest(IncomingRequest<DisputeRequest>),
@@ -117,7 +117,7 @@ impl MuxedMessage {
pin_mut!(next_req);
if let Poll::Ready(r) = next_req.poll(ctx) {
return match r {
Err(e) => Poll::Ready(Err(e.into())),
Err(e) => Poll::Ready(Err(incoming::Error::from(e).into())),
Ok(v) => Poll::Ready(Ok(Self::NewRequest(v))),
}
}
@@ -204,9 +204,9 @@ where
reputation_changes: vec![COST_NOT_A_VALIDATOR],
sent_feedback: None,
})
.map_err(|_| NonFatal::SendResponse(peer))?;
.map_err(|_| JfyiError::SendResponse(peer))?;
return Err(NonFatal::NotAValidator(peer).into())
return Err(JfyiError::NotAValidator(peer).into())
}
// Immediately drop requests from peers that already have requests in flight or have
@@ -255,9 +255,9 @@ where
reputation_changes: vec![COST_INVALID_SIGNATURE],
sent_feedback: None,
})
.map_err(|_| NonFatal::SetPeerReputation(peer))?;
.map_err(|_| JfyiError::SetPeerReputation(peer))?;
return Err(From::from(NonFatal::InvalidSignature(peer)))
return Err(From::from(JfyiError::InvalidSignature(peer)))
},
Ok(votes) => votes,
};
@@ -285,8 +285,8 @@ where
/// In addition we report import metrics.
fn ban_bad_peer(
&mut self,
result: NonFatalResult<(PeerId, ImportStatementsResult)>,
) -> NonFatalResult<()> {
result: JfyiErrorResult<(PeerId, ImportStatementsResult)>,
) -> JfyiErrorResult<()> {
match result? {
(_, ImportStatementsResult::ValidImport) => {
self.metrics.on_imported(SUCCEEDED);
@@ -303,7 +303,8 @@ where
/// Manage pending imports in a way that preserves invariants.
struct PendingImports {
/// Futures in flight.
futures: FuturesUnordered<BoxFuture<'static, (PeerId, NonFatalResult<ImportStatementsResult>)>>,
futures:
FuturesUnordered<BoxFuture<'static, (PeerId, JfyiErrorResult<ImportStatementsResult>)>>,
/// Peers whose requests are currently in flight.
peers: HashSet<PeerId>,
}
@@ -341,7 +342,7 @@ impl PendingImports {
}
impl Stream for PendingImports {
type Item = NonFatalResult<(PeerId, ImportStatementsResult)>;
type Item = JfyiErrorResult<(PeerId, ImportStatementsResult)>;
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.futures).poll_next(ctx) {
Poll::Pending => Poll::Pending,
@@ -368,8 +369,8 @@ async fn respond_to_request(
peer: PeerId,
handled: oneshot::Receiver<ImportStatementsResult>,
pending_response: OutgoingResponseSender<DisputeRequest>,
) -> NonFatalResult<ImportStatementsResult> {
let result = handled.await.map_err(|_| NonFatal::ImportCanceled(peer))?;
) -> JfyiErrorResult<ImportStatementsResult> {
let result = handled.await.map_err(|_| JfyiError::ImportCanceled(peer))?;
let response = match result {
ImportStatementsResult::ValidImport => OutgoingResponse {
@@ -386,7 +387,7 @@ async fn respond_to_request(
pending_response
.send_outgoing_response(response)
.map_err(|_| NonFatal::SendResponse(peer))?;
.map_err(|_| JfyiError::SendResponse(peer))?;
Ok(result)
}
@@ -17,46 +17,21 @@
//! Error handling related code and Error/Result definitions.
use thiserror::Error;
use polkadot_node_primitives::disputes::DisputeMessageCheckError;
use polkadot_node_subsystem_util::runtime;
use polkadot_subsystem::SubsystemError;
#[derive(Debug, Error, derive_more::From)]
#[error(transparent)]
#[allow(missing_docs)]
#[fatality::fatality(splitable)]
pub enum Error {
/// All fatal errors.
Fatal(Fatal),
/// All nonfatal/potentially recoverable errors.
NonFatal(NonFatal),
}
impl From<runtime::Error> for Error {
fn from(o: runtime::Error) -> Self {
match o {
runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)),
runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)),
}
}
}
/// Fatal errors of this subsystem.
#[derive(Debug, Error)]
#[error(transparent)]
pub enum Fatal {
/// Spawning a running task failed.
#[fatal]
#[error("Spawning subsystem task failed")]
SpawnTask(#[source] SubsystemError),
/// Errors coming from runtime::Runtime.
#[fatal(forward)]
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::Fatal),
}
Runtime(#[from] runtime::Error),
/// Non-fatal errors of this subsystem.
#[derive(Debug, Error)]
pub enum NonFatal {
/// We need available active heads for finding relevant authorities.
#[error("No active heads available - needed for finding relevant authorities.")]
NoActiveHeads,
@@ -92,11 +67,7 @@ pub enum NonFatal {
/// A statement's `ValidatorIndex` could not be looked up.
#[error("ValidatorIndex of statement could not be found")]
InvalidValidatorIndexFromCoordinator,
/// Errors coming from runtime::Runtime.
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::NonFatal),
}
pub type Result<T> = std::result::Result<T, Error>;
pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
pub type JfyiErrorResult<T> = std::result::Result<T, JfyiError>;
@@ -37,9 +37,9 @@ pub use send_task::TaskFinish;
/// Error and [`Result`] type for sender
mod error;
pub use error::{Error, Fatal, NonFatal, Result};
pub use error::{Error, FatalError, JfyiError, Result};
use self::error::NonFatalResult;
use self::error::JfyiErrorResult;
use crate::{Metrics, LOG_TARGET};
/// The `DisputeSender` keeps track of all ongoing disputes we need to send statements out.
@@ -208,7 +208,7 @@ impl DisputeSender {
None
}
})
.ok_or(NonFatal::NoActiveHeads)?;
.ok_or(JfyiError::NoActiveHeads)?;
let info = runtime
.get_session_info_by_index(ctx.sender(), *ref_head, session_index)
@@ -243,11 +243,12 @@ impl DisputeSender {
let (valid_vote, invalid_vote) = if let Some(our_valid_vote) = our_valid_vote {
// Get some invalid vote as well:
let invalid_vote = votes.invalid.get(0).ok_or(NonFatal::MissingVotesFromCoordinator)?;
let invalid_vote =
votes.invalid.get(0).ok_or(JfyiError::MissingVotesFromCoordinator)?;
(our_valid_vote, invalid_vote)
} else if let Some(our_invalid_vote) = our_invalid_vote {
// Get some valid vote as well:
let valid_vote = votes.valid.get(0).ok_or(NonFatal::MissingVotesFromCoordinator)?;
let valid_vote = votes.valid.get(0).ok_or(JfyiError::MissingVotesFromCoordinator)?;
(valid_vote, our_invalid_vote)
} else {
// There is no vote from us yet - nothing to do.
@@ -258,7 +259,7 @@ impl DisputeSender {
.session_info
.validators
.get(valid_index.0 as usize)
.ok_or(NonFatal::InvalidStatementFromCoordinator)?;
.ok_or(JfyiError::InvalidStatementFromCoordinator)?;
let valid_signed = SignedDisputeStatement::new_checked(
DisputeStatement::Valid(kind.clone()),
candidate_hash,
@@ -266,14 +267,14 @@ impl DisputeSender {
valid_public.clone(),
signature.clone(),
)
.map_err(|()| NonFatal::InvalidStatementFromCoordinator)?;
.map_err(|()| JfyiError::InvalidStatementFromCoordinator)?;
let (kind, invalid_index, signature) = invalid_vote;
let invalid_public = info
.session_info
.validators
.get(invalid_index.0 as usize)
.ok_or(NonFatal::InvalidValidatorIndexFromCoordinator)?;
.ok_or(JfyiError::InvalidValidatorIndexFromCoordinator)?;
let invalid_signed = SignedDisputeStatement::new_checked(
DisputeStatement::Invalid(kind.clone()),
candidate_hash,
@@ -281,7 +282,7 @@ impl DisputeSender {
invalid_public.clone(),
signature.clone(),
)
.map_err(|()| NonFatal::InvalidValidatorIndexFromCoordinator)?;
.map_err(|()| JfyiError::InvalidValidatorIndexFromCoordinator)?;
// Reconstructing the checked signed dispute statements is hardly useful here and wasteful,
// but I don't want to enable a bypass for the below smart constructor and this code path
@@ -297,7 +298,7 @@ impl DisputeSender {
votes.candidate_receipt,
&info.session_info,
)
.map_err(NonFatal::InvalidDisputeFromCoordinator)?;
.map_err(JfyiError::InvalidDisputeFromCoordinator)?;
// Finally, get the party started:
self.start_sender(ctx, runtime, message).await
@@ -341,13 +342,13 @@ async fn get_active_session_indices<Context: SubsystemContext>(
/// Retrieve Set of active disputes from the dispute coordinator.
async fn get_active_disputes<Context: SubsystemContext>(
ctx: &mut Context,
) -> NonFatalResult<Vec<(SessionIndex, CandidateHash)>> {
) -> JfyiErrorResult<Vec<(SessionIndex, CandidateHash)>> {
let (tx, rx) = oneshot::channel();
ctx.send_message(AllMessages::DisputeCoordinator(DisputeCoordinatorMessage::ActiveDisputes(
tx,
)))
.await;
rx.await.map_err(|_| NonFatal::AskActiveDisputesCanceled)
rx.await.map_err(|_| JfyiError::AskActiveDisputesCanceled)
}
/// Get all locally available dispute votes for a given dispute.
@@ -355,7 +356,7 @@ async fn get_candidate_votes<Context: SubsystemContext>(
ctx: &mut Context,
session_index: SessionIndex,
candidate_hash: CandidateHash,
) -> NonFatalResult<Option<CandidateVotes>> {
) -> JfyiErrorResult<Option<CandidateVotes>> {
let (tx, rx) = oneshot::channel();
ctx.send_message(AllMessages::DisputeCoordinator(
DisputeCoordinatorMessage::QueryCandidateVotes(vec![(session_index, candidate_hash)], tx),
@@ -363,5 +364,5 @@ async fn get_candidate_votes<Context: SubsystemContext>(
.await;
rx.await
.map(|v| v.get(0).map(|inner| inner.to_owned().2))
.map_err(|_| NonFatal::AskCandidateVotesCanceled)
.map_err(|_| JfyiError::AskCandidateVotesCanceled)
}
@@ -35,7 +35,7 @@ use polkadot_subsystem::{
SubsystemContext,
};
use super::error::{Fatal, Result};
use super::error::{FatalError, Result};
use crate::{
metrics::{FAILED, SUCCEEDED},
@@ -266,7 +266,7 @@ async fn send_requests<Context: SubsystemContext>(
);
let (remote, remote_handle) = fut.remote_handle();
ctx.spawn("dispute-sender", remote.boxed()).map_err(Fatal::SpawnTask)?;
ctx.spawn("dispute-sender", remote.boxed()).map_err(FatalError::SpawnTask)?;
statuses.insert(receiver, DeliveryStatus::Pending(remote_handle));
}