Request based collation fetching (#2621)

* Introduce collation fetching protocol also move to mod.rs * Allow `PeerId`s in requests to network bridge. * Fix availability distribution tests. * Move CompressedPoV to primitives. * Request based collator protocol: validator side - Missing: tests - Collator side - don't connect, if not connected * Fixes. * Basic request based collator side. * Minor fix on collator side. * Don't connect in requests in collation protocol. Also some cleanup. * Fix PoV distribution * Bump substrate * Add back metrics + whitespace fixes. * Add back missing spans. * More cleanup. * Guide update. * Fix tests * Handle results in tests. * Fix weird compilation issue. * Add missing ) * Get rid of dead code. * Get rid of redundant import. * Fix runtime build. * Cleanup. * Fix wasm build. * Format fixes. Thanks @andronik !
2026-04-26 13:27:57 +00:00 · 2021-03-18 09:06:36 +01:00
parent f33f6badac
commit 503e2b74f9
24 changed files with 576 additions and 737 deletions
@@ -288,10 +288,7 @@ impl View {

 /// v1 protocol types.
 pub mod v1 {
-	use polkadot_primitives::v1::{
-		Hash, CollatorId, Id as ParaId, ErasureChunk, CandidateReceipt,
-		SignedAvailabilityBitfield, PoV, CandidateHash, ValidatorIndex, CandidateIndex, AvailableData,
-	};
+	use polkadot_primitives::v1::{AvailableData, CandidateHash, CandidateIndex, CollatorId, CompressedPoV, ErasureChunk, Hash, Id as ParaId, SignedAvailabilityBitfield, ValidatorIndex};
 	use polkadot_node_primitives::{
 		SignedFullStatement,
 		approval::{IndirectAssignmentCert, IndirectSignedApprovalVote},
@@ -357,73 +354,6 @@ pub mod v1 {
 		Approvals(Vec<IndirectSignedApprovalVote>),
 	}

-	#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
-	#[allow(missing_docs)]
-	pub enum CompressedPoVError {
-		#[error("Failed to compress a PoV")]
-		Compress,
-		#[error("Failed to decompress a PoV")]
-		Decompress,
-		#[error("Failed to decode the uncompressed PoV")]
-		Decode,
-		#[error("Architecture is not supported")]
-		NotSupported,
-	}
-
-	/// SCALE and Zstd encoded [`PoV`].
-	#[derive(Clone, Encode, Decode, PartialEq, Eq)]
-	pub struct CompressedPoV(Vec<u8>);
-
-	impl CompressedPoV {
-		/// Compress the given [`PoV`] and returns a [`CompressedPoV`].
-		#[cfg(not(target_os = "unknown"))]
-		pub fn compress(pov: &PoV) -> Result<Self, CompressedPoVError> {
-			zstd::encode_all(pov.encode().as_slice(), 3).map_err(|_| CompressedPoVError::Compress).map(Self)
-		}
-
-		/// Compress the given [`PoV`] and returns a [`CompressedPoV`].
-		#[cfg(target_os = "unknown")]
-		pub fn compress(_: &PoV) -> Result<Self, CompressedPoVError> {
-			Err(CompressedPoVError::NotSupported)
-		}
-
-		/// Decompress `self` and returns the [`PoV`] on success.
-		#[cfg(not(target_os = "unknown"))]
-		pub fn decompress(&self) -> Result<PoV, CompressedPoVError> {
-			use std::io::Read;
-			const MAX_POV_BLOCK_SIZE: usize = 32 * 1024 * 1024;
-
-			struct InputDecoder<'a, T: std::io::BufRead>(&'a mut zstd::Decoder<T>, usize);
-			impl<'a, T: std::io::BufRead> parity_scale_codec::Input for InputDecoder<'a, T> {
-				fn read(&mut self, into: &mut [u8]) -> Result<(), parity_scale_codec::Error> {
-					self.1 = self.1.saturating_add(into.len());
-					if self.1 > MAX_POV_BLOCK_SIZE {
-						return Err("pov block too big".into())
-					}
-					self.0.read_exact(into).map_err(Into::into)
-				}
-				fn remaining_len(&mut self) -> Result<Option<usize>, parity_scale_codec::Error> {
-					Ok(None)
-				}
-			}
-
-			let mut decoder = zstd::Decoder::new(self.0.as_slice()).map_err(|_| CompressedPoVError::Decompress)?;
-			PoV::decode(&mut InputDecoder(&mut decoder, 0)).map_err(|_| CompressedPoVError::Decode)
-		}
-
-		/// Decompress `self` and returns the [`PoV`] on success.
-		#[cfg(target_os = "unknown")]
-		pub fn decompress(&self) -> Result<PoV, CompressedPoVError> {
-			Err(CompressedPoVError::NotSupported)
-		}
-	}
-
-	impl std::fmt::Debug for CompressedPoV {
-		fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-			write!(f, "CompressedPoV({} bytes)", self.0.len())
-		}
-	}
-
 	/// Network messages used by the collator protocol subsystem
 	#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
 	pub enum CollatorProtocolMessage {
@@ -434,12 +364,6 @@ pub mod v1 {
 		/// that they are a collator with given ID.
 		#[codec(index = 1)]
 		AdvertiseCollation(Hash, ParaId),
-		/// Request the advertised collation at that relay-parent.
-		#[codec(index = 2)]
-		RequestCollation(RequestId, Hash, ParaId),
-		/// A requested collation.
-		#[codec(index = 3)]
-		Collation(RequestId, CandidateReceipt, CompressedPoV),
 		/// A collation sent to a validator was seconded.
 		#[codec(index = 4)]
 		CollationSeconded(SignedFullStatement),
@@ -481,17 +405,3 @@ pub mod v1 {

 	impl_try_from!(CollationProtocol, CollatorProtocol, CollatorProtocolMessage);
 }
-
-#[cfg(test)]
-mod tests {
-	use polkadot_primitives::v1::PoV;
-	use super::v1::{CompressedPoV, CompressedPoVError};
-
-	#[test]
-	fn decompress_huge_pov_block_fails() {
-		let pov = PoV { block_data: vec![0; 63 * 1024 * 1024].into() };
-
-		let compressed = CompressedPoV::compress(&pov).unwrap();
-		assert_eq!(CompressedPoVError::Decode, compressed.decompress().unwrap_err());
-	}
-}
@@ -43,7 +43,7 @@ pub use sc_network::config::RequestResponseConfig;

 /// All requests that can be sent to the network bridge.
 pub mod request;
-pub use request::{IncomingRequest, OutgoingRequest, Requests};
+pub use request::{IncomingRequest, OutgoingRequest, Requests, Recipient, OutgoingResult};

 ///// Multiplexer for incoming requests.
 // pub mod multiplexer;
@@ -57,6 +57,8 @@ pub mod v1;
 pub enum Protocol {
 	/// Protocol for availability fetching, used by availability distribution.
 	AvailabilityFetching,
+	/// Protocol for fetching collations from collators.
+	CollationFetching,
 }

 /// Default request timeout in seconds.
@@ -66,6 +68,10 @@ pub enum Protocol {
 /// sets.
 const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(3); 

+/// Request timeout where we can assume the connection is already open (e.g. we have peers in a
+/// peer set as well).
+const DEFAULT_REQUEST_TIMEOUT_CONNECTED: Duration = Duration::from_secs(1);
+
 impl Protocol {
 	/// Get a configuration for a given Request response protocol.
 	///
@@ -85,14 +91,22 @@ impl Protocol {
 		let cfg = match self {
 			Protocol::AvailabilityFetching => RequestResponseConfig {
 				name: p_name,
-				// Arbitrary very conservative numbers:
-				// TODO: Get better numbers, see https://github.com/paritytech/polkadot/issues/2370
-				max_request_size: 10_000,
-				max_response_size: 1_000_000,
-				// Also just some relative conservative guess:
+				max_request_size: 1_000,
+				max_response_size: 100_000,
 				request_timeout: DEFAULT_REQUEST_TIMEOUT,
 				inbound_queue: Some(tx),
 			},
+			Protocol::CollationFetching => RequestResponseConfig {
+				name: p_name,
+				max_request_size: 1_000,
+				/// Collations are expected to be around 10Meg, probably much smaller with
+				/// compression. So 10Meg should be sufficient, we might be able to reduce this
+				/// further.
+				max_response_size: 10_000_000,
+				// Taken from initial implementation in collator protocol:
+				request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
+				inbound_queue: Some(tx),
+			},
 		};
 		(rx, cfg)
 	}
@@ -106,6 +120,8 @@ impl Protocol {
 			// assuming we can service requests relatively quickly, which would need to be measured
 			// as well.
 			Protocol::AvailabilityFetching => 100,
+			// 10 seems reasonable, considering group sizes of max 10 validators.
+			Protocol::CollationFetching => 10,
 		}
 	}

@@ -118,6 +134,7 @@ impl Protocol {
 	pub const fn get_protocol_name_static(self) -> &'static str {
 		match self {
 			Protocol::AvailabilityFetching => "/polkadot/req_availability/1",
+			Protocol::CollationFetching => "/polkadot/req_collation/1",
 		}
 	}
 }
@@ -40,6 +40,8 @@ pub trait IsRequest {
 pub enum Requests {
 	/// Request an availability chunk from a node.
 	AvailabilityFetching(OutgoingRequest<v1::AvailabilityFetchingRequest>),
+	/// Fetch a collation from a collator which previously announced it.
+	CollationFetching(OutgoingRequest<v1::CollationFetchingRequest>),
 }

 impl Requests {
@@ -47,6 +49,7 @@ impl Requests {
 	pub fn get_protocol(&self) -> Protocol {
 		match self {
 			Self::AvailabilityFetching(_) => Protocol::AvailabilityFetching,
+			Self::CollationFetching(_) => Protocol::CollationFetching,
 		}
 	}

@@ -60,10 +63,20 @@ impl Requests {
 	pub fn encode_request(self) -> (Protocol, OutgoingRequest<Vec<u8>>) {
 		match self {
 			Self::AvailabilityFetching(r) => r.encode_request(),
+			Self::CollationFetching(r) => r.encode_request(),
 		}
 	}
 }

+/// Potential recipients of an outgoing request.
+#[derive(Debug, Eq, Hash, PartialEq)]
+pub enum Recipient {
+	/// Recipient is a regular peer and we know its peer id.
+	Peer(PeerId),
+	/// Recipient is a validator, we address it via this `AuthorityDiscoveryId`.
+	Authority(AuthorityDiscoveryId),
+}
+
 /// A request to be sent to the network bridge, including a sender for sending responses/failures.
 ///
 /// The network implementation will make use of that sender for informing the requesting subsystem
@@ -71,7 +84,7 @@ impl Requests {
 #[derive(Debug)]
 pub struct OutgoingRequest<Req> {
 	/// Intendent recipient of this request.
-	pub peer: AuthorityDiscoveryId,
+	pub peer: Recipient,
 	/// The actual request to send over the wire.
 	pub payload: Req,
 	/// Sender which is used by networking to get us back a response.
@@ -90,6 +103,9 @@ pub enum RequestError {
 	Canceled(oneshot::Canceled),
 }

+/// Responses received for an `OutgoingRequest`.
+pub type OutgoingResult<Res> = Result<Res, RequestError>;
+
 impl<Req> OutgoingRequest<Req>
 where
 	Req: IsRequest + Encode,
@@ -100,11 +116,11 @@ where
 	/// It will contain a sender that is used by the networking for sending back responses. The
 	/// connected receiver is returned as the second element in the returned tuple.
 	pub fn new(
-		peer: AuthorityDiscoveryId,
+		peer: Recipient,
 		payload: Req,
 	) -> (
 		Self,
-		impl Future<Output = Result<Req::Response, RequestError>>,
+		impl Future<Output = OutgoingResult<Req::Response>>,
 	) {
 		let (tx, rx) = oneshot::channel();
 		let r = Self {
@@ -201,7 +217,7 @@ where
 /// Future for actually receiving a typed response for an OutgoingRequest.
 async fn receive_response<Req>(
 	rec: oneshot::Receiver<Result<Vec<u8>, network::RequestFailure>>,
-) -> Result<Req::Response, RequestError>
+) -> OutgoingResult<Req::Response>
 where
 	Req: IsRequest,
 	Req::Response: Decode,
@@ -18,7 +18,8 @@

 use parity_scale_codec::{Decode, Encode};

-use polkadot_primitives::v1::{CandidateHash, ErasureChunk, ValidatorIndex};
+use polkadot_primitives::v1::{CandidateHash, CandidateReceipt, ErasureChunk, ValidatorIndex, CompressedPoV, Hash};
+use polkadot_primitives::v1::Id as ParaId;

 use super::request::IsRequest;
 use super::Protocol;
@@ -78,3 +79,25 @@ impl IsRequest for AvailabilityFetchingRequest {
 	type Response = AvailabilityFetchingResponse;
 	const PROTOCOL: Protocol = Protocol::AvailabilityFetching;
 }
+
+/// Request the advertised collation at that relay-parent.
+#[derive(Debug, Clone, Encode, Decode)]
+pub struct CollationFetchingRequest {
+	/// Relay parent we want a collation for.
+	pub relay_parent: Hash,
+	/// The `ParaId` of the collation.
+	pub para_id: ParaId,
+}
+
+/// Responses as sent by collators.
+#[derive(Debug, Clone, Encode, Decode)]
+pub enum CollationFetchingResponse {
+	/// Deliver requested collation.
+	#[codec(index = 0)]
+	Collation(CandidateReceipt, CompressedPoV),
+}
+
+impl IsRequest for CollationFetchingRequest {
+	type Response = CollationFetchingResponse;
+	const PROTOCOL: Protocol = Protocol::CollationFetching;
+}