Remove request multiplexer (#3624)

* WIP: Get rid of request multiplexer.

* WIP

* Receiver for handling of incoming requests.

* Get rid of useless `Fault` abstraction.

The things the type system let us do are not worth getting abstracted in
its own type. Instead error handling is going to be merely a pattern.

* Make most things compile again.

* Port availability distribution away from request multiplexer.

* Formatting.

* Port dispute distribution over.

* Fixup statement distribution.

* Handle request directly in collator protocol.

+ Only allow fatal errors at top level.

* Use direct request channel for availability recovery.

* Finally get rid of request multiplexer

Fixes #2842 and paves the way for more back pressure possibilities.

* Fix overseer and statement distribution tests.

* Fix collator protocol and network bridge tests.

* Fix tests in availability recovery.

* Fix availability distribution tests.

* Fix dispute distribution tests.

* Add missing dependency

* Typos.

* Review remarks.

* More remarks.
This commit is contained in:
Robert Klotzner
2021-08-12 13:11:36 +02:00
committed by GitHub
parent ecf71233c3
commit 55154a8d37
51 changed files with 1509 additions and 1746 deletions
@@ -18,11 +18,11 @@
//! Error handling related code and Error/Result definitions.
use polkadot_node_network_protocol::PeerId;
use polkadot_node_subsystem_util::runtime;
use polkadot_primitives::v1::{CandidateHash, Hash};
use polkadot_subsystem::SubsystemError;
use thiserror::Error;
use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault};
use thiserror::Error;
use crate::LOG_TARGET;
@@ -34,29 +34,25 @@ pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;
pub type FatalResult<T> = std::result::Result<T, Fatal>;
/// Errors for statement distribution.
#[derive(Debug, Error)]
#[derive(Debug, Error, derive_more::From)]
#[error(transparent)]
pub struct Error(pub Fault<NonFatal, Fatal>);
impl From<NonFatal> for Error {
fn from(e: NonFatal) -> Self {
Self(Fault::from_non_fatal(e))
}
}
impl From<Fatal> for Error {
fn from(f: Fatal) -> Self {
Self(Fault::from_fatal(f))
}
pub enum Error {
/// Fatal errors of dispute distribution.
Fatal(Fatal),
/// Non fatal errors of dispute distribution.
NonFatal(NonFatal),
}
impl From<runtime::Error> for Error {
fn from(o: runtime::Error) -> Self {
Self(Fault::from_other(o))
match o {
runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)),
runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)),
}
}
}
/// Fatal runtime errors.
/// Fatal errors.
#[derive(Debug, Error)]
pub enum Fatal {
/// Requester channel is never closed.
@@ -112,9 +108,13 @@ pub enum NonFatal {
///
/// We basically always want to try and continue on error. This utility function is meant to
/// consume top-level errors by simply logging them.
pub fn log_error(result: Result<()>, ctx: &'static str) -> FatalResult<()> {
if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? {
tracing::debug!(target: LOG_TARGET, error = ?error, ctx)
pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), Fatal> {
match result {
Err(Error::Fatal(f)) => Err(f),
Err(Error::NonFatal(error)) => {
tracing::warn!(target: LOG_TARGET, error = ?error, ctx);
Ok(())
},
Ok(()) => Ok(()),
}
Ok(())
}
@@ -27,6 +27,7 @@ use parity_scale_codec::Encode;
use polkadot_node_network_protocol::{
peer_set::{IsAuthority, PeerSet},
request_response::{v1 as request_v1, IncomingRequestReceiver},
v1::{self as protocol_v1, StatementMetadata},
IfDisconnected, PeerId, UnifiedReputationChange as Rep, View,
};
@@ -57,7 +58,7 @@ use futures::{
};
use indexmap::{map::Entry as IEntry, IndexMap};
use sp_keystore::SyncCryptoStorePtr;
use util::{runtime::RuntimeInfo, Fault};
use util::runtime::RuntimeInfo;
use std::collections::{hash_map::Entry, HashMap, HashSet};
@@ -106,6 +107,8 @@ const MAX_LARGE_STATEMENTS_PER_SENDER: usize = 20;
pub struct StatementDistribution {
/// Pointer to a keystore, which is required for determining this nodes validator index.
keystore: SyncCryptoStorePtr,
/// Receiver for incoming large statement requests.
req_receiver: Option<IncomingRequestReceiver<request_v1::StatementFetchingRequest>>,
// Prometheus metrics
metrics: Metrics,
}
@@ -130,8 +133,12 @@ where
impl StatementDistribution {
/// Create a new Statement Distribution Subsystem
pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> StatementDistribution {
StatementDistribution { keystore, metrics }
pub fn new(
keystore: SyncCryptoStorePtr,
req_receiver: IncomingRequestReceiver<request_v1::StatementFetchingRequest>,
metrics: Metrics,
) -> StatementDistribution {
StatementDistribution { keystore, req_receiver: Some(req_receiver), metrics }
}
}
@@ -1526,7 +1533,7 @@ async fn handle_network_update(
impl StatementDistribution {
async fn run(
self,
mut self,
mut ctx: (impl SubsystemContext<Message = StatementDistributionMessage>
+ overseer::SubsystemContext<Message = StatementDistributionMessage>),
) -> std::result::Result<(), Fatal> {
@@ -1542,6 +1549,16 @@ impl StatementDistribution {
// Sender/Receiver for getting news from our responder task.
let (res_sender, mut res_receiver) = mpsc::channel(1);
ctx.spawn(
"large-statement-responder",
respond(
self.req_receiver.take().expect("Mandatory argument to new. qed"),
res_sender.clone(),
)
.boxed(),
)
.map_err(Fatal::SpawnTask)?;
loop {
let message =
MuxedMessage::receive(&mut ctx, &mut req_receiver, &mut res_receiver).await;
@@ -1556,16 +1573,14 @@ impl StatementDistribution {
&mut authorities,
&mut active_heads,
&req_sender,
&res_sender,
result?,
)
.await;
match result {
Ok(true) => break,
Ok(false) => {},
Err(Error(Fault::Fatal(f))) => return Err(f),
Err(Error(Fault::Err(error))) =>
tracing::debug!(target: LOG_TARGET, ?error),
Err(Error::Fatal(f)) => return Err(f),
Err(Error::NonFatal(error)) => tracing::debug!(target: LOG_TARGET, ?error),
}
},
MuxedMessage::Requester(result) => {
@@ -1749,7 +1764,6 @@ impl StatementDistribution {
authorities: &mut HashMap<AuthorityDiscoveryId, PeerId>,
active_heads: &mut HashMap<Hash, ActiveHeadData>,
req_sender: &mpsc::Sender<RequesterMessage>,
res_sender: &mpsc::Sender<ResponderMessage>,
message: FromOverseer<StatementDistributionMessage>,
) -> Result<bool> {
let metrics = &self.metrics;
@@ -1868,13 +1882,6 @@ impl StatementDistribution {
)
.await;
},
StatementDistributionMessage::StatementFetchingReceiver(receiver) => {
ctx.spawn(
"large-statement-responder",
respond(receiver, res_sender.clone()).boxed(),
)
.map_err(Fatal::SpawnTask)?;
},
},
}
Ok(false)
@@ -22,9 +22,9 @@ use futures::{
use polkadot_node_network_protocol::{
request_response::{
request::OutgoingResponse,
incoming::{self, OutgoingResponse},
v1::{StatementFetchingRequest, StatementFetchingResponse},
IncomingRequest, MAX_PARALLEL_STATEMENT_REQUESTS,
IncomingRequestReceiver, MAX_PARALLEL_STATEMENT_REQUESTS,
},
PeerId, UnifiedReputationChange as Rep,
};
@@ -51,7 +51,7 @@ pub enum ResponderMessage {
/// `CommittedCandidateReceipt` from peers, whether this can be used to re-assemble one ore
/// many `SignedFullStatement`s needs to be verified by the caller.
pub async fn respond(
mut receiver: mpsc::Receiver<sc_network::config::IncomingRequest>,
mut receiver: IncomingRequestReceiver<StatementFetchingRequest>,
mut sender: mpsc::Sender<ResponderMessage>,
) {
let mut pending_out = FuturesUnordered::new();
@@ -74,23 +74,16 @@ pub async fn respond(
pending_out.next().await;
}
let raw = match receiver.next().await {
None => {
tracing::debug!(target: LOG_TARGET, "Shutting down request responder");
let req = match receiver.recv(|| vec![COST_INVALID_REQUEST]).await {
Err(incoming::Error::Fatal(f)) => {
tracing::debug!(target: LOG_TARGET, error = ?f, "Shutting down request responder");
return
},
Some(v) => v,
};
let req = match IncomingRequest::<StatementFetchingRequest>::try_from_raw(
raw,
vec![COST_INVALID_REQUEST],
) {
Err(err) => {
Err(incoming::Error::NonFatal(err)) => {
tracing::debug!(target: LOG_TARGET, ?err, "Decoding request failed");
continue
},
Ok(payload) => payload,
Ok(v) => v,
};
let (tx, rx) = oneshot::channel();
@@ -22,7 +22,7 @@ use parity_scale_codec::{Decode, Encode};
use polkadot_node_network_protocol::{
request_response::{
v1::{StatementFetchingRequest, StatementFetchingResponse},
Recipient, Requests,
IncomingRequest, Recipient, Requests,
},
view, ObservedRole,
};
@@ -699,11 +699,14 @@ fn receiving_from_one_sends_to_another_and_to_candidate_backing() {
let pool = sp_core::testing::TaskExecutor::new();
let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool);
let (statement_req_receiver, _) = IncomingRequest::get_config_receiver();
let bg = async move {
let s = StatementDistribution {
metrics: Default::default(),
keystore: Arc::new(LocalKeystore::in_memory()),
};
let s = StatementDistribution::new(
Arc::new(LocalKeystore::in_memory()),
statement_req_receiver,
Default::default(),
);
s.run(ctx).await.unwrap();
};
@@ -888,21 +891,18 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing(
let pool = sp_core::testing::TaskExecutor::new();
let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool);
let (statement_req_receiver, mut req_cfg) = IncomingRequest::get_config_receiver();
let bg = async move {
let s =
StatementDistribution { metrics: Default::default(), keystore: make_ferdie_keystore() };
let s = StatementDistribution::new(
make_ferdie_keystore(),
statement_req_receiver,
Default::default(),
);
s.run(ctx).await.unwrap();
};
let (mut tx_reqs, rx_reqs) = mpsc::channel(1);
let test_fut = async move {
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::StatementFetchingReceiver(rx_reqs),
})
.await;
// register our active heads.
handle
.send(FromOverseer::Signal(OverseerSignal::ActiveLeaves(
@@ -1290,7 +1290,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing(
payload: inner_req.encode(),
pending_response,
};
tx_reqs.send(req).await.unwrap();
req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap();
assert_matches!(
response_rx.await.unwrap().result,
Err(()) => {}
@@ -1308,7 +1308,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing(
payload: inner_req.encode(),
pending_response,
};
tx_reqs.send(req).await.unwrap();
req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap();
assert_matches!(
response_rx.await.unwrap().result,
Err(()) => {}
@@ -1325,7 +1325,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing(
payload: inner_req.encode(),
pending_response,
};
tx_reqs.send(req).await.unwrap();
req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap();
let StatementFetchingResponse::Statement(committed) =
Decode::decode(&mut response_rx.await.unwrap().result.unwrap().as_ref()).unwrap();
assert_eq!(committed, candidate);
@@ -1390,21 +1390,18 @@ fn share_prioritizes_backing_group() {
let pool = sp_core::testing::TaskExecutor::new();
let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool);
let (statement_req_receiver, mut req_cfg) = IncomingRequest::get_config_receiver();
let bg = async move {
let s =
StatementDistribution { metrics: Default::default(), keystore: make_ferdie_keystore() };
let s = StatementDistribution::new(
make_ferdie_keystore(),
statement_req_receiver,
Default::default(),
);
s.run(ctx).await.unwrap();
};
let (mut tx_reqs, rx_reqs) = mpsc::channel(1);
let test_fut = async move {
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::StatementFetchingReceiver(rx_reqs),
})
.await;
// register our active heads.
handle
.send(FromOverseer::Signal(OverseerSignal::ActiveLeaves(
@@ -1632,7 +1629,7 @@ fn share_prioritizes_backing_group() {
payload: inner_req.encode(),
pending_response,
};
tx_reqs.send(req).await.unwrap();
req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap();
let StatementFetchingResponse::Statement(committed) =
Decode::decode(&mut response_rx.await.unwrap().result.unwrap().as_ref()).unwrap();
assert_eq!(committed, candidate);
@@ -1679,21 +1676,17 @@ fn peer_cant_flood_with_large_statements() {
let pool = sp_core::testing::TaskExecutor::new();
let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool);
let (statement_req_receiver, _) = IncomingRequest::get_config_receiver();
let bg = async move {
let s =
StatementDistribution { metrics: Default::default(), keystore: make_ferdie_keystore() };
let s = StatementDistribution::new(
make_ferdie_keystore(),
statement_req_receiver,
Default::default(),
);
s.run(ctx).await.unwrap();
};
let (_, rx_reqs) = mpsc::channel(1);
let test_fut = async move {
handle
.send(FromOverseer::Communication {
msg: StatementDistributionMessage::StatementFetchingReceiver(rx_reqs),
})
.await;
// register our active heads.
handle
.send(FromOverseer::Signal(OverseerSignal::ActiveLeaves(