Request based PoV distribution (#2640)

* Indentation fix. * Prepare request-response for PoV fetching. * Drop old PoV distribution. * WIP: Fetch PoV directly from backing. * Backing compiles. * Runtime access and connection management for PoV distribution. * Get rid of seemingly dead code. * Implement PoV fetching. Backing does not yet use it. * Don't send `ConnectToValidators` for empty list. * Even better - no need to check over and over again. * PoV fetching implemented. + Typechecks + Should work Missing: - Guide - Tests - Do fallback fetching in case fetching from seconding validator fails. * Check PoV hash upon reception. * Implement retry of PoV fetching in backing. * Avoid pointless validation spawning. * Add jaeger span to pov requesting. * Add back tracing. * Review remarks. * Whitespace. * Whitespace again. * Cleanup + fix tests. * Log to log target in overseer. * Fix more tests. * Don't fail if group cannot be found. * Simple test for PoV fetcher. * Handle missing group membership better. * Add test for retry functionality. * Fix flaky test. * Spaces again. * Guide updates. * Spaces.
2026-04-26 15:47:58 +00:00 · 2021-03-28 17:11:38 +02:00
parent 27b6d83974
commit c6f07d8f31
35 changed files with 1382 additions and 3184 deletions
@@ -289,7 +289,7 @@ pub mod v1 {
 	use std::convert::TryFrom;

 	use polkadot_primitives::v1::{
-		CandidateIndex, CollatorId, CompressedPoV, Hash, Id as ParaId, SignedAvailabilityBitfield,
+		CandidateIndex, CollatorId, Hash, Id as ParaId, SignedAvailabilityBitfield,
 		CollatorSignature,
 	};
 	use polkadot_node_primitives::{
@@ -305,19 +305,6 @@ pub mod v1 {
 		Bitfield(Hash, SignedAvailabilityBitfield),
 	}

-	/// Network messages used by the PoV distribution subsystem.
-	#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
-	pub enum PoVDistributionMessage {
-		/// Notification that we are awaiting the given PoVs (by hash) against a
-		/// specific relay-parent hash.
-		#[codec(index = 0)]
-		Awaiting(Hash, Vec<Hash>),
-		/// Notification of an awaited PoV, in a given relay-parent context.
-		/// (relay_parent, pov_hash, compressed_pov)
-		#[codec(index = 1)]
-		SendPoV(Hash, Hash, CompressedPoV),
-	}
-
 	/// Network messages used by the statement distribution subsystem.
 	#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
 	pub enum StatementDistributionMessage {
@@ -361,9 +348,6 @@ pub mod v1 {
 		/// Bitfield distribution messages
 		#[codec(index = 1)]
 		BitfieldDistribution(BitfieldDistributionMessage),
-		/// PoV Distribution messages
-		#[codec(index = 2)]
-		PoVDistribution(PoVDistributionMessage),
 		/// Statement distribution messages
 		#[codec(index = 3)]
 		StatementDistribution(StatementDistributionMessage),
@@ -373,7 +357,6 @@ pub mod v1 {
 	}

 	impl_try_from!(ValidationProtocol, BitfieldDistribution, BitfieldDistributionMessage);
-	impl_try_from!(ValidationProtocol, PoVDistribution, PoVDistributionMessage);
 	impl_try_from!(ValidationProtocol, StatementDistribution, StatementDistributionMessage);
 	impl_try_from!(ValidationProtocol, ApprovalDistribution, ApprovalDistributionMessage);

@@ -60,6 +60,8 @@ pub enum Protocol {
 	ChunkFetching,
 	/// Protocol for fetching collations from collators.
 	CollationFetching,
+	/// Protocol for fetching seconded PoVs from validators of the same group.
+	PoVFetching,
 	/// Protocol for fetching available data.
 	AvailableDataFetching,
 }
@@ -107,11 +109,18 @@ impl Protocol {
 				request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
 				inbound_queue: Some(tx),
 			},
+			Protocol::PoVFetching => RequestResponseConfig {
+				name: p_name,
+				max_request_size: 1_000,
+				max_response_size: MAX_COMPRESSED_POV_SIZE as u64,
+				request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
+				inbound_queue: Some(tx),
+			},
 			Protocol::AvailableDataFetching => RequestResponseConfig {
 				name: p_name,
 				max_request_size: 1_000,
 				// Available data size is dominated by the PoV size.
-				max_response_size: 30_000_000,
+				max_response_size: MAX_COMPRESSED_POV_SIZE as u64,
 				request_timeout: DEFAULT_REQUEST_TIMEOUT,
 				inbound_queue: Some(tx),
 			},
@@ -130,6 +139,8 @@ impl Protocol {
 			Protocol::ChunkFetching => 100,
 			// 10 seems reasonable, considering group sizes of max 10 validators.
 			Protocol::CollationFetching => 10,
+			// 10 seems reasonable, considering group sizes of max 10 validators.
+			Protocol::PoVFetching => 10,
 			// Validators are constantly self-selecting to request available data which may lead
 			// to constant load and occasional burstiness.
 			Protocol::AvailableDataFetching => 100,
@@ -146,6 +157,7 @@ impl Protocol {
 		match self {
 			Protocol::ChunkFetching => "/polkadot/req_chunk/1",
 			Protocol::CollationFetching => "/polkadot/req_collation/1",
+			Protocol::PoVFetching => "/polkadot/req_pov/1",
 			Protocol::AvailableDataFetching => "/polkadot/req_available_data/1",
 		}
 	}
@@ -17,6 +17,7 @@
 use futures::channel::oneshot;
 use futures::prelude::Future;

+use thiserror::Error;
 use parity_scale_codec::{Decode, Encode, Error as DecodingError};
 use sc_network as network;
 use sc_network::config as netconfig;
@@ -42,6 +43,8 @@ pub enum Requests {
 	ChunkFetching(OutgoingRequest<v1::ChunkFetchingRequest>),
 	/// Fetch a collation from a collator which previously announced it.
 	CollationFetching(OutgoingRequest<v1::CollationFetchingRequest>),
+	/// Fetch a PoV from a validator which previously sent out a seconded statement.
+	PoVFetching(OutgoingRequest<v1::PoVFetchingRequest>),
 	/// Request full available data from a node.
 	AvailableDataFetching(OutgoingRequest<v1::AvailableDataFetchingRequest>),
 }
@@ -52,6 +55,7 @@ impl Requests {
 		match self {
 			Self::ChunkFetching(_) => Protocol::ChunkFetching,
 			Self::CollationFetching(_) => Protocol::CollationFetching,
+			Self::PoVFetching(_) => Protocol::PoVFetching,
 			Self::AvailableDataFetching(_) => Protocol::AvailableDataFetching,
 		}
 	}
@@ -67,6 +71,7 @@ impl Requests {
 		match self {
 			Self::ChunkFetching(r) => r.encode_request(),
 			Self::CollationFetching(r) => r.encode_request(),
+			Self::PoVFetching(r) => r.encode_request(),
 			Self::AvailableDataFetching(r) => r.encode_request(),
 		}
 	}
@@ -96,16 +101,19 @@ pub struct OutgoingRequest<Req> {
 }

 /// Any error that can occur when sending a request.
-#[derive(Debug)]
+#[derive(Debug, Error)]
 pub enum RequestError {
 	/// Response could not be decoded.
-	InvalidResponse(DecodingError),
+	#[error("Response could not be decoded")]
+	InvalidResponse(#[source] DecodingError),

 	/// Some error in substrate/libp2p happened.
-	NetworkError(network::RequestFailure),
+	#[error("Some network error occurred")]
+	NetworkError(#[source] network::RequestFailure),

 	/// Response got canceled by networking.
-	Canceled(oneshot::Canceled),
+	#[error("Response channel got canceled")]
+	Canceled(#[source] oneshot::Canceled),
 }

 /// Responses received for an `OutgoingRequest`.
@@ -114,6 +114,29 @@ impl IsRequest for CollationFetchingRequest {
 	const PROTOCOL: Protocol = Protocol::CollationFetching;
 }

+/// Request the advertised collation at that relay-parent.
+#[derive(Debug, Clone, Encode, Decode)]
+pub struct PoVFetchingRequest {
+	/// Candidate we want a PoV for.
+	pub candidate_hash: CandidateHash,
+}
+
+/// Responses to `PoVFetchingRequest`.
+#[derive(Debug, Clone, Encode, Decode)]
+pub enum PoVFetchingResponse {
+	/// Deliver requested PoV.
+	#[codec(index = 0)]
+	PoV(CompressedPoV),
+	/// PoV was not found in store.
+	#[codec(index = 1)]
+	NoSuchPoV,
+}
+
+impl IsRequest for PoVFetchingRequest {
+	type Response = PoVFetchingResponse;
+	const PROTOCOL: Protocol = Protocol::PoVFetching;
+}
+
 /// Request the entire available data for a candidate.
 #[derive(Debug, Clone, Encode, Decode)]
 pub struct AvailableDataFetchingRequest {