Remove request multiplexer (#3624)

* WIP: Get rid of request multiplexer. * WIP * Receiver for handling of incoming requests. * Get rid of useless `Fault` abstraction. The things the type system let us do are not worth getting abstracted in its own type. Instead error handling is going to be merely a pattern. * Make most things compile again. * Port availability distribution away from request multiplexer. * Formatting. * Port dispute distribution over. * Fixup statement distribution. * Handle request directly in collator protocol. + Only allow fatal errors at top level. * Use direct request channel for availability recovery. * Finally get rid of request multiplexer Fixes #2842 and paves the way for more back pressure possibilities. * Fix overseer and statement distribution tests. * Fix collator protocol and network bridge tests. * Fix tests in availability recovery. * Fix availability distribution tests. * Fix dispute distribution tests. * Add missing dependency * Typos. * Review remarks. * More remarks.
2026-06-22 13:51:05 +00:00 · 2021-08-12 13:11:36 +02:00
parent ecf71233c3
commit 55154a8d37
51 changed files with 1509 additions and 1746 deletions
@@ -19,43 +19,48 @@

 use thiserror::Error;

-use polkadot_node_network_protocol::{request_response::request::ReceiveError, PeerId};
-use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault};
+use polkadot_node_network_protocol::{request_response::incoming, PeerId};
+use polkadot_node_subsystem_util::runtime;

 use crate::LOG_TARGET;

-#[derive(Debug, Error)]
+#[derive(Debug, Error, derive_more::From)]
 #[error(transparent)]
-pub struct Error(pub Fault<NonFatal, Fatal>);
-
-impl From<NonFatal> for Error {
-	fn from(e: NonFatal) -> Self {
-		Self(Fault::from_non_fatal(e))
-	}
-}
-
-impl From<Fatal> for Error {
-	fn from(f: Fatal) -> Self {
-		Self(Fault::from_fatal(f))
-	}
+pub enum Error {
+	/// All fatal errors.
+	Fatal(Fatal),
+	/// All nonfatal/potentially recoverable errors.
+	NonFatal(NonFatal),
 }

 impl From<runtime::Error> for Error {
 	fn from(o: runtime::Error) -> Self {
-		Self(Fault::from_other(o))
+		match o {
+			runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)),
+			runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)),
+		}
+	}
+}
+
+impl From<incoming::Error> for Error {
+	fn from(o: incoming::Error) -> Self {
+		match o {
+			incoming::Error::Fatal(f) => Self::Fatal(Fatal::IncomingRequest(f)),
+			incoming::Error::NonFatal(f) => Self::NonFatal(NonFatal::IncomingRequest(f)),
+		}
 	}
 }

 /// Fatal errors of this subsystem.
 #[derive(Debug, Error)]
 pub enum Fatal {
-	/// Request channel returned `None`. Likely a system shutdown.
-	#[error("Request channel stream finished.")]
-	RequestChannelFinished,
-
 	/// Errors coming from runtime::Runtime.
 	#[error("Error while accessing runtime information")]
 	Runtime(#[from] runtime::Fatal),
+
+	/// Errors coming from receiving incoming requests.
+	#[error("Retrieving next incoming request failed.")]
+	IncomingRequest(#[from] incoming::Fatal),
 }

 /// Non-fatal errors of this subsystem.
@@ -65,10 +70,6 @@ pub enum NonFatal {
 	#[error("Sending back response to peer {0} failed.")]
 	SendResponse(PeerId),

-	/// Getting request from raw request failed.
-	#[error("Decoding request failed.")]
-	FromRawRequest(#[source] ReceiveError),
-
 	/// Setting reputation for peer failed.
 	#[error("Changing peer's ({0}) reputation failed.")]
 	SetPeerReputation(PeerId),
@@ -88,20 +89,27 @@ pub enum NonFatal {
 	/// Errors coming from runtime::Runtime.
 	#[error("Error while accessing runtime information")]
 	Runtime(#[from] runtime::NonFatal),
+
+	/// Errors coming from receiving incoming requests.
+	#[error("Retrieving next incoming request failed.")]
+	IncomingRequest(#[from] incoming::NonFatal),
 }

 pub type Result<T> = std::result::Result<T, Error>;

-pub type FatalResult<T> = std::result::Result<T, Fatal>;
 pub type NonFatalResult<T> = std::result::Result<T, NonFatal>;

 /// Utility for eating top level errors and log them.
 ///
 /// We basically always want to try and continue on error. This utility function is meant to
-/// consume top-level errors by simply logging them
+/// consume top-level errors by simply logging them.
 pub fn log_error(result: Result<()>) -> std::result::Result<(), Fatal> {
-	if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? {
-		tracing::warn!(target: LOG_TARGET, error = ?error);
+	match result {
+		Err(Error::Fatal(f)) => Err(f),
+		Err(Error::NonFatal(error)) => {
+			tracing::warn!(target: LOG_TARGET, error = ?error);
+			Ok(())
+		},
+		Ok(()) => Ok(()),
 	}
-	Ok(())
 }
@@ -21,19 +21,20 @@ use std::{
 };

 use futures::{
-	channel::{mpsc, oneshot},
+	channel::oneshot,
 	future::{poll_fn, BoxFuture},
+	pin_mut,
 	stream::{FusedStream, FuturesUnordered, StreamExt},
-	FutureExt, Stream,
+	Future, FutureExt, Stream,
 };
 use lru::LruCache;

 use polkadot_node_network_protocol::{
 	authority_discovery::AuthorityDiscovery,
 	request_response::{
-		request::{OutgoingResponse, OutgoingResponseSender},
+		incoming::{OutgoingResponse, OutgoingResponseSender},
 		v1::{DisputeRequest, DisputeResponse},
-		IncomingRequest,
+		IncomingRequest, IncomingRequestReceiver,
 	},
 	PeerId, UnifiedReputationChange as Rep,
 };
@@ -50,7 +51,7 @@ use crate::{
 };

 mod error;
-use self::error::{log_error, Fatal, FatalResult, NonFatal, NonFatalResult, Result};
+use self::error::{log_error, NonFatal, NonFatalResult, Result};

 const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
 const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid.");
@@ -72,7 +73,7 @@ pub struct DisputesReceiver<Sender, AD> {
 	sender: Sender,

 	/// Channel to retrieve incoming requests from.
-	receiver: mpsc::Receiver<sc_network::config::IncomingRequest>,
+	receiver: IncomingRequestReceiver<DisputeRequest>,

 	/// Authority discovery service:
 	authority_discovery: AD,
@@ -103,26 +104,27 @@ enum MuxedMessage {
 	ConfirmedImport(NonFatalResult<(PeerId, ImportStatementsResult)>),

 	/// A new request has arrived and should be handled.
-	NewRequest(sc_network::config::IncomingRequest),
+	NewRequest(IncomingRequest<DisputeRequest>),
 }

 impl MuxedMessage {
 	async fn receive(
 		pending_imports: &mut PendingImports,
-		pending_requests: &mut mpsc::Receiver<sc_network::config::IncomingRequest>,
-	) -> FatalResult<MuxedMessage> {
+		pending_requests: &mut IncomingRequestReceiver<DisputeRequest>,
+	) -> Result<MuxedMessage> {
 		poll_fn(|ctx| {
-			if let Poll::Ready(v) = pending_requests.poll_next_unpin(ctx) {
-				let r = match v {
-					None => Err(Fatal::RequestChannelFinished),
-					Some(msg) => Ok(MuxedMessage::NewRequest(msg)),
-				};
-				return Poll::Ready(r)
+			let next_req = pending_requests.recv(|| vec![COST_INVALID_REQUEST]);
+			pin_mut!(next_req);
+			if let Poll::Ready(r) = next_req.poll(ctx) {
+				return match r {
+					Err(e) => Poll::Ready(Err(e.into())),
+					Ok(v) => Poll::Ready(Ok(Self::NewRequest(v))),
+				}
 			}
 			// In case of Ready(None) return `Pending` below - we want to wait for the next request
 			// in that case.
 			if let Poll::Ready(Some(v)) = pending_imports.poll_next_unpin(ctx) {
-				return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v)))
+				return Poll::Ready(Ok(Self::ConfirmedImport(v)))
 			}
 			Poll::Pending
 		})
@@ -137,7 +139,7 @@ where
 	/// Create a new receiver which can be `run`.
 	pub fn new(
 		sender: Sender,
-		receiver: mpsc::Receiver<sc_network::config::IncomingRequest>,
+		receiver: IncomingRequestReceiver<DisputeRequest>,
 		authority_discovery: AD,
 		metrics: Metrics,
 	) -> Self {
@@ -165,17 +167,14 @@ where
 		loop {
 			match log_error(self.run_inner().await) {
 				Ok(()) => {},
-				Err(Fatal::RequestChannelFinished) => {
+				Err(fatal) => {
 					tracing::debug!(
 						target: LOG_TARGET,
-						"Incoming request stream exhausted - shutting down?"
+						error = ?fatal,
+						"Shutting down"
 					);
 					return
 				},
-				Err(err) => {
-					tracing::warn!(target: LOG_TARGET, ?err, "Dispute receiver died.");
-					return
-				},
 			}
 		}
 	}
@@ -184,7 +183,7 @@ where
 	async fn run_inner(&mut self) -> Result<()> {
 		let msg = MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).await?;

-		let raw = match msg {
+		let incoming = match msg {
 			// We need to clean up futures, to make sure responses are sent:
 			MuxedMessage::ConfirmedImport(m_bad) => {
 				self.ban_bad_peer(m_bad)?;
@@ -195,14 +194,14 @@ where

 		self.metrics.on_received_request();

-		let peer = raw.peer;
+		let peer = incoming.peer;

 		// Only accept messages from validators:
-		if self.authority_discovery.get_authority_id_by_peer_id(raw.peer).await.is_none() {
-			raw.pending_response
-				.send(sc_network::config::OutgoingResponse {
+		if self.authority_discovery.get_authority_id_by_peer_id(peer).await.is_none() {
+			incoming
+				.send_outgoing_response(OutgoingResponse {
 					result: Err(()),
-					reputation_changes: vec![COST_NOT_A_VALIDATOR.into_base_rep()],
+					reputation_changes: vec![COST_NOT_A_VALIDATOR],
 					sent_feedback: None,
 				})
 				.map_err(|_| NonFatal::SendResponse(peer))?;
@@ -210,10 +209,6 @@ where
 			return Err(NonFatal::NotAValidator(peer).into())
 		}

-		let incoming =
-			IncomingRequest::<DisputeRequest>::try_from_raw(raw, vec![COST_INVALID_REQUEST])
-				.map_err(NonFatal::FromRawRequest)?;
-
 		// Immediately drop requests from peers that already have requests in flight or have
 		// been banned recently (flood protection):
 		if self.pending_imports.peer_is_pending(&peer) || self.banned_peers.contains(&peer) {