Request based PoV distribution (#2640)

* Indentation fix.

* Prepare request-response for PoV fetching.

* Drop old PoV distribution.

* WIP: Fetch PoV directly from backing.

* Backing compiles.

* Runtime access and connection management for PoV distribution.

* Get rid of seemingly dead code.

* Implement PoV fetching.

Backing does not yet use it.

* Don't send `ConnectToValidators` for empty list.

* Even better - no need to check over and over again.

* PoV fetching implemented.

+ Typechecks
+ Should work

Missing:

- Guide
- Tests
- Do fallback fetching in case fetching from seconding validator fails.

* Check PoV hash upon reception.

* Implement retry of PoV fetching in backing.

* Avoid pointless validation spawning.

* Add jaeger span to pov requesting.

* Add back tracing.

* Review remarks.

* Whitespace.

* Whitespace again.

* Cleanup + fix tests.

* Log to log target in overseer.

* Fix more tests.

* Don't fail if group cannot be found.

* Simple test for PoV fetcher.

* Handle missing group membership better.

* Add test for retry functionality.

* Fix flaky test.

* Spaces again.

* Guide updates.

* Spaces.
This commit is contained in:
Robert Klotzner
2021-03-28 17:11:38 +02:00
committed by GitHub
parent 27b6d83974
commit c6f07d8f31
35 changed files with 1382 additions and 3184 deletions
+1 -18
View File
@@ -289,7 +289,7 @@ pub mod v1 {
use std::convert::TryFrom;
use polkadot_primitives::v1::{
CandidateIndex, CollatorId, CompressedPoV, Hash, Id as ParaId, SignedAvailabilityBitfield,
CandidateIndex, CollatorId, Hash, Id as ParaId, SignedAvailabilityBitfield,
CollatorSignature,
};
use polkadot_node_primitives::{
@@ -305,19 +305,6 @@ pub mod v1 {
Bitfield(Hash, SignedAvailabilityBitfield),
}
/// Network messages used by the PoV distribution subsystem.
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
pub enum PoVDistributionMessage {
/// Notification that we are awaiting the given PoVs (by hash) against a
/// specific relay-parent hash.
#[codec(index = 0)]
Awaiting(Hash, Vec<Hash>),
/// Notification of an awaited PoV, in a given relay-parent context.
/// (relay_parent, pov_hash, compressed_pov)
#[codec(index = 1)]
SendPoV(Hash, Hash, CompressedPoV),
}
/// Network messages used by the statement distribution subsystem.
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
pub enum StatementDistributionMessage {
@@ -361,9 +348,6 @@ pub mod v1 {
/// Bitfield distribution messages
#[codec(index = 1)]
BitfieldDistribution(BitfieldDistributionMessage),
/// PoV Distribution messages
#[codec(index = 2)]
PoVDistribution(PoVDistributionMessage),
/// Statement distribution messages
#[codec(index = 3)]
StatementDistribution(StatementDistributionMessage),
@@ -373,7 +357,6 @@ pub mod v1 {
}
impl_try_from!(ValidationProtocol, BitfieldDistribution, BitfieldDistributionMessage);
impl_try_from!(ValidationProtocol, PoVDistribution, PoVDistributionMessage);
impl_try_from!(ValidationProtocol, StatementDistribution, StatementDistributionMessage);
impl_try_from!(ValidationProtocol, ApprovalDistribution, ApprovalDistributionMessage);
@@ -60,6 +60,8 @@ pub enum Protocol {
ChunkFetching,
/// Protocol for fetching collations from collators.
CollationFetching,
/// Protocol for fetching seconded PoVs from validators of the same group.
PoVFetching,
/// Protocol for fetching available data.
AvailableDataFetching,
}
@@ -107,11 +109,18 @@ impl Protocol {
request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
inbound_queue: Some(tx),
},
Protocol::PoVFetching => RequestResponseConfig {
name: p_name,
max_request_size: 1_000,
max_response_size: MAX_COMPRESSED_POV_SIZE as u64,
request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
inbound_queue: Some(tx),
},
Protocol::AvailableDataFetching => RequestResponseConfig {
name: p_name,
max_request_size: 1_000,
// Available data size is dominated by the PoV size.
max_response_size: 30_000_000,
max_response_size: MAX_COMPRESSED_POV_SIZE as u64,
request_timeout: DEFAULT_REQUEST_TIMEOUT,
inbound_queue: Some(tx),
},
@@ -130,6 +139,8 @@ impl Protocol {
Protocol::ChunkFetching => 100,
// 10 seems reasonable, considering group sizes of max 10 validators.
Protocol::CollationFetching => 10,
// 10 seems reasonable, considering group sizes of max 10 validators.
Protocol::PoVFetching => 10,
// Validators are constantly self-selecting to request available data which may lead
// to constant load and occasional burstiness.
Protocol::AvailableDataFetching => 100,
@@ -146,6 +157,7 @@ impl Protocol {
match self {
Protocol::ChunkFetching => "/polkadot/req_chunk/1",
Protocol::CollationFetching => "/polkadot/req_collation/1",
Protocol::PoVFetching => "/polkadot/req_pov/1",
Protocol::AvailableDataFetching => "/polkadot/req_available_data/1",
}
}
@@ -17,6 +17,7 @@
use futures::channel::oneshot;
use futures::prelude::Future;
use thiserror::Error;
use parity_scale_codec::{Decode, Encode, Error as DecodingError};
use sc_network as network;
use sc_network::config as netconfig;
@@ -42,6 +43,8 @@ pub enum Requests {
ChunkFetching(OutgoingRequest<v1::ChunkFetchingRequest>),
/// Fetch a collation from a collator which previously announced it.
CollationFetching(OutgoingRequest<v1::CollationFetchingRequest>),
/// Fetch a PoV from a validator which previously sent out a seconded statement.
PoVFetching(OutgoingRequest<v1::PoVFetchingRequest>),
/// Request full available data from a node.
AvailableDataFetching(OutgoingRequest<v1::AvailableDataFetchingRequest>),
}
@@ -52,6 +55,7 @@ impl Requests {
match self {
Self::ChunkFetching(_) => Protocol::ChunkFetching,
Self::CollationFetching(_) => Protocol::CollationFetching,
Self::PoVFetching(_) => Protocol::PoVFetching,
Self::AvailableDataFetching(_) => Protocol::AvailableDataFetching,
}
}
@@ -67,6 +71,7 @@ impl Requests {
match self {
Self::ChunkFetching(r) => r.encode_request(),
Self::CollationFetching(r) => r.encode_request(),
Self::PoVFetching(r) => r.encode_request(),
Self::AvailableDataFetching(r) => r.encode_request(),
}
}
@@ -96,16 +101,19 @@ pub struct OutgoingRequest<Req> {
}
/// Any error that can occur when sending a request.
#[derive(Debug)]
#[derive(Debug, Error)]
pub enum RequestError {
/// Response could not be decoded.
InvalidResponse(DecodingError),
#[error("Response could not be decoded")]
InvalidResponse(#[source] DecodingError),
/// Some error in substrate/libp2p happened.
NetworkError(network::RequestFailure),
#[error("Some network error occurred")]
NetworkError(#[source] network::RequestFailure),
/// Response got canceled by networking.
Canceled(oneshot::Canceled),
#[error("Response channel got canceled")]
Canceled(#[source] oneshot::Canceled),
}
/// Responses received for an `OutgoingRequest`.
@@ -114,6 +114,29 @@ impl IsRequest for CollationFetchingRequest {
const PROTOCOL: Protocol = Protocol::CollationFetching;
}
/// Request the advertised collation at that relay-parent.
#[derive(Debug, Clone, Encode, Decode)]
pub struct PoVFetchingRequest {
/// Candidate we want a PoV for.
pub candidate_hash: CandidateHash,
}
/// Responses to `PoVFetchingRequest`.
#[derive(Debug, Clone, Encode, Decode)]
pub enum PoVFetchingResponse {
/// Deliver requested PoV.
#[codec(index = 0)]
PoV(CompressedPoV),
/// PoV was not found in store.
#[codec(index = 1)]
NoSuchPoV,
}
impl IsRequest for PoVFetchingRequest {
type Response = PoVFetchingResponse;
const PROTOCOL: Protocol = Protocol::PoVFetching;
}
/// Request the entire available data for a candidate.
#[derive(Debug, Clone, Encode, Decode)]
pub struct AvailableDataFetchingRequest {