fatality based errors (#4448)

* seed commit for fatality based errors

* fatality

* first draft of fatality

* cleanup

* differnt approach

* simplify

* first working version for enums, with documentation

* add split

* fix simple split test case

* extend README.md

* update fatality impl

* make tests passed

* apply fatality to first subsystem

* fatality fixes

* use fatality in a subsystem

* fix subsystemg

* fixup proc macro

* fix/test: log::*! do not execute when log handler is missing

* fix spelling

* rename Runtime2 to something sane

* allow nested split with `forward` annotations

* add free license

* enable and fixup all tests

* use external fatality

Makes this more reviewable.

* bump fatality dep

Avoid duplicate expander compilations.

* migrate availability distribution

* more fatality usage

* chore: bump fatality to 0.0.6

* fixup remaining subsystems

* chore: fmt

* make cargo spellcheck happy

* remove single instance of `#[fatal(false)]`

* last quality sweep

* fixup
This commit is contained in:
Bernhard Schuster
2022-02-25 18:25:26 +01:00
committed by GitHub
parent 85fa087405
commit d946582707
48 changed files with 425 additions and 659 deletions
@@ -22,88 +22,58 @@ use polkadot_node_subsystem_util::runtime;
use polkadot_primitives::v1::{CandidateHash, Hash};
use polkadot_subsystem::SubsystemError;
use thiserror::Error;
use crate::LOG_TARGET;
/// General result.
pub type Result<T> = std::result::Result<T, Error>;
/// Result for non-fatal only failures.
pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
pub type JfyiErrorResult<T> = std::result::Result<T, JfyiError>;
/// Result for fatal only failures.
pub type FatalResult<T> = std::result::Result<T, Fatal>;
pub type FatalResult<T> = std::result::Result<T, FatalError>;
/// Errors for statement distribution.
#[derive(Debug, Error, derive_more::From)]
#[error(transparent)]
use fatality::Nested;
#[allow(missing_docs)]
#[fatality::fatality(splitable)]
pub enum Error {
/// Fatal errors of dispute distribution.
Fatal(Fatal),
/// Non-fatal errors of dispute distribution.
NonFatal(NonFatal),
}
impl From<runtime::Error> for Error {
fn from(o: runtime::Error) -> Self {
match o {
runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)),
runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)),
}
}
}
/// Fatal errors.
#[derive(Debug, Error)]
pub enum Fatal {
/// Requester channel is never closed.
#[fatal]
#[error("Requester receiver stream finished")]
RequesterReceiverFinished,
/// Responder channel is never closed.
#[fatal]
#[error("Responder receiver stream finished")]
ResponderReceiverFinished,
/// Spawning a running task failed.
#[fatal]
#[error("Spawning subsystem task failed")]
SpawnTask(#[source] SubsystemError),
/// Receiving subsystem message from overseer failed.
#[fatal]
#[error("Receiving message from overseer failed")]
SubsystemReceive(#[source] SubsystemError),
/// Errors coming from runtime::Runtime.
#[fatal(forward)]
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::Fatal),
}
Runtime(#[from] runtime::Error),
/// Errors for fetching of runtime information.
#[derive(Debug, Error)]
pub enum NonFatal {
/// Errors coming from runtime::Runtime.
#[error("Error while accessing runtime information")]
Runtime(#[from] runtime::NonFatal),
/// Relay parent was not present in active heads.
#[error("Relay parent could not be found in active heads")]
NoSuchHead(Hash),
/// Received message from actually disconnected peer.
#[error("Message from not connected peer")]
NoSuchPeer(PeerId),
/// Peer requested statement data for candidate that was never announced to it.
#[error("Peer requested data for candidate it never received a notification for (malicious?)")]
RequestedUnannouncedCandidate(PeerId, CandidateHash),
/// A large statement status was requested, which could not be found.
// A large statement status was requested, which could not be found.
#[error("Statement status does not exist")]
NoSuchLargeStatementStatus(Hash, CandidateHash),
/// A fetched large statement was requested, but could not be found.
// A fetched large statement was requested, but could not be found.
#[error("Fetched large statement does not exist")]
NoSuchFetchedLargeStatement(Hash, CandidateHash),
/// Responder no longer waits for our data. (Should not happen right now.)
// Responder no longer waits for our data. (Should not happen right now.)
#[error("Oneshot `GetData` channel closed")]
ResponderGetDataCanceled,
}
@@ -112,14 +82,13 @@ pub enum NonFatal {
///
/// We basically always want to try and continue on error. This utility function is meant to
/// consume top-level errors by simply logging them.
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), Fatal> {
match result {
Err(Error::Fatal(f)) => Err(f),
Err(Error::NonFatal(error)) => {
match error {
NonFatal::RequestedUnannouncedCandidate(_, _) =>
tracing::warn!(target: LOG_TARGET, error = %error, ctx),
_ => tracing::debug!(target: LOG_TARGET, error = %error, ctx),
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), FatalError> {
match result.into_nested()? {
Err(jfyi) => {
match jfyi {
JfyiError::RequestedUnannouncedCandidate(_, _) =>
tracing::warn!(target: LOG_TARGET, error = %jfyi, ctx),
_ => tracing::debug!(target: LOG_TARGET, error = %jfyi, ctx),
}
Ok(())
},
@@ -22,7 +22,7 @@
#![deny(unused_crate_dependencies)]
#![warn(missing_docs)]
use error::{log_error, FatalResult, NonFatalResult};
use error::{log_error, FatalResult, JfyiErrorResult};
use parity_scale_codec::Encode;
use polkadot_node_network_protocol::{
@@ -62,8 +62,10 @@ use util::runtime::RuntimeInfo;
use std::collections::{hash_map::Entry, HashMap, HashSet};
use fatality::Nested;
mod error;
pub use error::{Error, Fatal, NonFatal, Result};
pub use error::{Error, FatalError, JfyiError, Result};
/// Background task logic for requesting of large statements.
mod requester;
@@ -608,7 +610,7 @@ impl MuxedMessage {
let from_responder = from_responder.next();
futures::pin_mut!(from_overseer, from_requester, from_responder);
futures::select! {
msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(Fatal::SubsystemReceive)),
msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(FatalError::SubsystemReceive)),
msg = from_requester => MuxedMessage::Requester(msg),
msg = from_responder => MuxedMessage::Responder(msg),
}
@@ -1548,7 +1550,7 @@ impl StatementDistributionSubsystem {
mut self,
mut ctx: (impl SubsystemContext<Message = StatementDistributionMessage>
+ overseer::SubsystemContext<Message = StatementDistributionMessage>),
) -> std::result::Result<(), Fatal> {
) -> std::result::Result<(), FatalError> {
let mut peers: HashMap<PeerId, PeerData> = HashMap::new();
let mut gossip_peers: HashSet<PeerId> = HashSet::new();
let mut authorities: HashMap<AuthorityDiscoveryId, PeerId> = HashMap::new();
@@ -1569,7 +1571,7 @@ impl StatementDistributionSubsystem {
)
.boxed(),
)
.map_err(Fatal::SpawnTask)?;
.map_err(FatalError::SpawnTask)?;
loop {
let message =
@@ -1588,11 +1590,10 @@ impl StatementDistributionSubsystem {
result?,
)
.await;
match result {
match result.into_nested()? {
Ok(true) => break,
Ok(false) => {},
Err(Error::Fatal(f)) => return Err(f),
Err(Error::NonFatal(error)) => tracing::debug!(target: LOG_TARGET, ?error),
Err(jfyi) => tracing::debug!(target: LOG_TARGET, error = ?jfyi),
}
},
MuxedMessage::Requester(result) => {
@@ -1603,7 +1604,7 @@ impl StatementDistributionSubsystem {
&mut peers,
&mut active_heads,
&req_sender,
result.ok_or(Fatal::RequesterReceiverFinished)?,
result.ok_or(FatalError::RequesterReceiverFinished)?,
)
.await;
log_error(result.map_err(From::from), "handle_requester_message")?;
@@ -1613,7 +1614,7 @@ impl StatementDistributionSubsystem {
.handle_responder_message(
&peers,
&mut active_heads,
result.ok_or(Fatal::ResponderReceiverFinished)?,
result.ok_or(FatalError::ResponderReceiverFinished)?,
)
.await;
log_error(result.map_err(From::from), "handle_responder_message")?;
@@ -1629,7 +1630,7 @@ impl StatementDistributionSubsystem {
peers: &HashMap<PeerId, PeerData>,
active_heads: &mut HashMap<Hash, ActiveHeadData>,
message: ResponderMessage,
) -> NonFatalResult<()> {
) -> JfyiErrorResult<()> {
match message {
ResponderMessage::GetData { requesting_peer, relay_parent, candidate_hash, tx } => {
if !requesting_peer_knows_about_candidate(
@@ -1638,25 +1639,25 @@ impl StatementDistributionSubsystem {
&relay_parent,
&candidate_hash,
)? {
return Err(NonFatal::RequestedUnannouncedCandidate(
return Err(JfyiError::RequestedUnannouncedCandidate(
requesting_peer,
candidate_hash,
))
}
let active_head =
active_heads.get(&relay_parent).ok_or(NonFatal::NoSuchHead(relay_parent))?;
active_heads.get(&relay_parent).ok_or(JfyiError::NoSuchHead(relay_parent))?;
let committed = match active_head.waiting_large_statements.get(&candidate_hash) {
Some(LargeStatementStatus::FetchedOrShared(committed)) => committed.clone(),
_ =>
return Err(NonFatal::NoSuchFetchedLargeStatement(
return Err(JfyiError::NoSuchFetchedLargeStatement(
relay_parent,
candidate_hash,
)),
};
tx.send(committed).map_err(|_| NonFatal::ResponderGetDataCanceled)?;
tx.send(committed).map_err(|_| JfyiError::ResponderGetDataCanceled)?;
},
}
Ok(())
@@ -1670,7 +1671,7 @@ impl StatementDistributionSubsystem {
active_heads: &mut HashMap<Hash, ActiveHeadData>,
req_sender: &mpsc::Sender<RequesterMessage>,
message: RequesterMessage,
) -> NonFatalResult<()> {
) -> JfyiErrorResult<()> {
match message {
RequesterMessage::Finished {
relay_parent,
@@ -1686,7 +1687,7 @@ impl StatementDistributionSubsystem {
let active_head = active_heads
.get_mut(&relay_parent)
.ok_or(NonFatal::NoSuchHead(relay_parent))?;
.ok_or(JfyiError::NoSuchHead(relay_parent))?;
let status = active_head.waiting_large_statements.remove(&candidate_hash);
@@ -1697,7 +1698,7 @@ impl StatementDistributionSubsystem {
return Ok(())
},
None =>
return Err(NonFatal::NoSuchLargeStatementStatus(
return Err(JfyiError::NoSuchLargeStatementStatus(
relay_parent,
candidate_hash,
)),
@@ -1734,7 +1735,7 @@ impl StatementDistributionSubsystem {
RequesterMessage::GetMorePeers { relay_parent, candidate_hash, tx } => {
let active_head = active_heads
.get_mut(&relay_parent)
.ok_or(NonFatal::NoSuchHead(relay_parent))?;
.ok_or(JfyiError::NoSuchHead(relay_parent))?;
let status = active_head.waiting_large_statements.get_mut(&candidate_hash);
@@ -1746,7 +1747,7 @@ impl StatementDistributionSubsystem {
return Ok(())
},
None =>
return Err(NonFatal::NoSuchLargeStatementStatus(
return Err(JfyiError::NoSuchLargeStatementStatus(
relay_parent,
candidate_hash,
)),
@@ -1836,7 +1837,7 @@ impl StatementDistributionSubsystem {
.get_mut(&relay_parent)
// This should never be out-of-sync with our view if the view
// updates correspond to actual `StartWork` messages.
.ok_or(NonFatal::NoSuchHead(relay_parent))?;
.ok_or(JfyiError::NoSuchHead(relay_parent))?;
active_head.waiting_large_statements.insert(
statement.payload().candidate_hash(),
LargeStatementStatus::FetchedOrShared(committed.clone()),
@@ -1909,14 +1910,14 @@ fn requesting_peer_knows_about_candidate(
requesting_peer: &PeerId,
relay_parent: &Hash,
candidate_hash: &CandidateHash,
) -> NonFatalResult<bool> {
) -> JfyiErrorResult<bool> {
let peer_data = peers
.get(requesting_peer)
.ok_or_else(|| NonFatal::NoSuchPeer(*requesting_peer))?;
.ok_or_else(|| JfyiError::NoSuchPeer(*requesting_peer))?;
let knowledge = peer_data
.view_knowledge
.get(relay_parent)
.ok_or_else(|| NonFatal::NoSuchHead(*relay_parent))?;
.ok_or_else(|| JfyiError::NoSuchHead(*relay_parent))?;
Ok(knowledge.sent_candidates.get(&candidate_hash).is_some())
}
@@ -20,9 +20,10 @@ use futures::{
SinkExt, StreamExt,
};
use fatality::Nested;
use polkadot_node_network_protocol::{
request_response::{
incoming::{self, OutgoingResponse},
incoming::OutgoingResponse,
v1::{StatementFetchingRequest, StatementFetchingResponse},
IncomingRequestReceiver, MAX_PARALLEL_STATEMENT_REQUESTS,
},
@@ -74,16 +75,16 @@ pub async fn respond(
pending_out.next().await;
}
let req = match receiver.recv(|| vec![COST_INVALID_REQUEST]).await {
Err(incoming::Error::Fatal(f)) => {
tracing::debug!(target: LOG_TARGET, error = ?f, "Shutting down request responder");
let req = match receiver.recv(|| vec![COST_INVALID_REQUEST]).await.into_nested() {
Ok(Ok(v)) => v,
Err(fatal) => {
tracing::debug!(target: LOG_TARGET, error = ?fatal, "Shutting down request responder");
return
},
Err(incoming::Error::NonFatal(err)) => {
tracing::debug!(target: LOG_TARGET, ?err, "Decoding request failed");
Ok(Err(jfyi)) => {
tracing::debug!(target: LOG_TARGET, error = ?jfyi, "Decoding request failed");
continue
},
Ok(v) => v,
};
let (tx, rx) = oneshot::channel();