From 8a2911b85d377cbfda7e25f189b38f1a15e2f131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20K=C3=B6cher?= Date: Fri, 6 Nov 2020 09:38:44 +0100 Subject: [PATCH] Distribute a PoV after seconding it (#1924) We need to distribute the PoV after we have seconded it. Other nodes that will receive our `Secondded` statement and want to validate the candidate another time will request this PoV from us. --- polkadot/node/collation-generation/src/lib.rs | 2 +- polkadot/node/core/av-store/src/tests.rs | 12 +++--- polkadot/node/core/backing/src/lib.rs | 38 ++++++++++++++----- .../availability-distribution/src/tests.rs | 4 +- .../node/network/pov-distribution/src/lib.rs | 14 +++---- polkadot/primitives/src/v1.rs | 6 +-- .../src/node/backing/candidate-backing.md | 24 +++++++----- .../src/node/backing/pov-distribution.md | 4 +- .../src/types/availability.md | 2 +- 9 files changed, 63 insertions(+), 43 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 1b4d978589..257f807a1f 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -318,7 +318,7 @@ fn erasure_root( ) -> crate::error::Result { let available_data = AvailableData { validation_data: persisted_validation, - pov, + pov: Arc::new(pov), }; let chunks = polkadot_erasure_coding::obtain_chunks_v1(n_validators, &available_data)?; diff --git a/polkadot/node/core/av-store/src/tests.rs b/polkadot/node/core/av-store/src/tests.rs index acbab53f12..9493e910db 100644 --- a/polkadot/node/core/av-store/src/tests.rs +++ b/polkadot/node/core/av-store/src/tests.rs @@ -282,7 +282,7 @@ fn store_block_works() { }; let available_data = AvailableData { - pov, + pov: Arc::new(pov), validation_data: test_state.persisted_validation_data, }; @@ -335,7 +335,7 @@ fn store_pov_and_query_chunk_works() { }; let available_data = AvailableData { - pov, + pov: Arc::new(pov), validation_data: test_state.persisted_validation_data, }; @@ -433,7 +433,7 @@ fn stored_but_not_included_data_is_pruned() { }; let available_data = AvailableData { - pov, + pov: Arc::new(pov), validation_data: test_state.persisted_validation_data, }; @@ -487,7 +487,7 @@ fn stored_data_kept_until_finalized() { let candidate_hash = candidate.hash(); let available_data = AvailableData { - pov, + pov: Arc::new(pov), validation_data: test_state.persisted_validation_data, }; @@ -726,12 +726,12 @@ fn forkfullness_works() { let candidate_2_hash = candidate_2.hash(); let available_data_1 = AvailableData { - pov: pov_1, + pov: Arc::new(pov_1), validation_data: test_state.persisted_validation_data.clone(), }; let available_data_2 = AvailableData { - pov: pov_2, + pov: Arc::new(pov_2), validation_data: test_state.persisted_validation_data, }; diff --git a/polkadot/node/core/backing/src/lib.rs b/polkadot/node/core/backing/src/lib.rs index 0cb1c40cdc..8a9cb7d034 100644 --- a/polkadot/node/core/backing/src/lib.rs +++ b/polkadot/node/core/backing/src/lib.rs @@ -24,10 +24,7 @@ use std::pin::Pin; use std::sync::Arc; use bitvec::vec::BitVec; -use futures::{ - channel::{mpsc, oneshot}, - Future, FutureExt, SinkExt, StreamExt, -}; +use futures::{channel::{mpsc, oneshot}, Future, FutureExt, SinkExt, StreamExt}; use sp_keystore::SyncCryptoStorePtr; use polkadot_primitives::v1::{ @@ -314,7 +311,7 @@ impl CandidateBackingJob { async fn validate_and_second( &mut self, candidate: &CandidateReceipt, - pov: PoV, + pov: Arc, ) -> Result { // Check that candidate is collated by the right collator. if self.required_collator.as_ref() @@ -326,7 +323,7 @@ impl CandidateBackingJob { let valid = self.request_candidate_validation( candidate.descriptor().clone(), - Arc::new(pov.clone()), + pov.clone(), ).await?; let candidate_hash = candidate.hash(); @@ -491,14 +488,16 @@ impl CandidateBackingJob { if self.seconded.is_none() { // This job has not seconded a candidate yet. let candidate_hash = candidate.hash(); + let pov = Arc::new(pov); if !self.issued_statements.contains(&candidate_hash) { if let Ok(true) = self.validate_and_second( &candidate, - pov, + pov.clone(), ).await { self.metrics.on_candidate_seconded(); self.seconded = Some(candidate_hash); + self.distribute_pov(candidate.descriptor, pov).await?; } } } @@ -559,7 +558,7 @@ impl CandidateBackingJob { ValidationResult::Valid(outputs, validation_data) => { // If validation produces a new set of commitments, we vote the candidate as invalid. let commitments_check = self.make_pov_available( - (&*pov).clone(), + pov, candidate_hash, validation_data, outputs, @@ -633,6 +632,16 @@ impl CandidateBackingJob { Ok(()) } + async fn distribute_pov( + &mut self, + descriptor: CandidateDescriptor, + pov: Arc, + ) -> Result<(), Error> { + self.tx_from.send(FromJob::PoVDistribution( + PoVDistributionMessage::DistributePoV(self.parent, descriptor, pov), + )).await.map_err(Into::into) + } + async fn request_pov_from_distribution( &mut self, descriptor: CandidateDescriptor, @@ -696,7 +705,7 @@ impl CandidateBackingJob { // early without making the PoV available. async fn make_pov_available( &mut self, - pov: PoV, + pov: Arc, candidate_hash: CandidateHash, validation_data: polkadot_primitives::v1::PersistedValidationData, outputs: ValidationOutputs, @@ -1068,7 +1077,7 @@ mod tests { fn make_erasure_root(test: &TestState, pov: PoV) -> Hash { let available_data = AvailableData { validation_data: test.validation_data.persisted.clone(), - pov, + pov: Arc::new(pov), }; let chunks = erasure_coding::obtain_chunks_v1(test.validators.len(), &available_data).unwrap(); @@ -1232,6 +1241,15 @@ mod tests { } ); + assert_matches!( + virtual_overseer.recv().await, + AllMessages::PoVDistribution(PoVDistributionMessage::DistributePoV(hash, descriptor, pov_received)) => { + assert_eq!(test_state.relay_parent, hash); + assert_eq!(candidate.descriptor, descriptor); + assert_eq!(pov, *pov_received); + } + ); + virtual_overseer.send(FromOverseer::Signal( OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::stop_work(test_state.relay_parent))) ).await; diff --git a/polkadot/node/network/availability-distribution/src/tests.rs b/polkadot/node/network/availability-distribution/src/tests.rs index e3e60e1f9e..ba3988d583 100644 --- a/polkadot/node/network/availability-distribution/src/tests.rs +++ b/polkadot/node/network/availability-distribution/src/tests.rs @@ -23,7 +23,7 @@ use polkadot_primitives::v1::{ AvailableData, BlockData, CandidateCommitments, CandidateDescriptor, GroupIndex, GroupRotationInfo, HeadData, OccupiedCore, PersistedValidationData, PoV, ScheduledCore, }; -use polkadot_subsystem_testhelpers::{self as test_helpers}; +use polkadot_subsystem_testhelpers as test_helpers; use futures::{executor, future, Future}; use futures_timer::Delay; @@ -241,7 +241,7 @@ impl Default for TestState { fn make_available_data(test: &TestState, pov: PoV) -> AvailableData { AvailableData { validation_data: test.persisted_validation_data.clone(), - pov, + pov: Arc::new(pov), } } diff --git a/polkadot/node/network/pov-distribution/src/lib.rs b/polkadot/node/network/pov-distribution/src/lib.rs index 3f1581d1c6..e28013a06a 100644 --- a/polkadot/node/network/pov-distribution/src/lib.rs +++ b/polkadot/node/network/pov-distribution/src/lib.rs @@ -30,12 +30,8 @@ use polkadot_subsystem::{ PoVDistributionMessage, RuntimeApiMessage, RuntimeApiRequest, AllMessages, NetworkBridgeMessage, }, }; -use polkadot_node_subsystem_util::{ - metrics::{self, prometheus}, -}; -use polkadot_node_network_protocol::{ - v1 as protocol_v1, ReputationChange as Rep, NetworkBridgeEvent, PeerId, View, -}; +use polkadot_node_subsystem_util::metrics::{self, prometheus}; +use polkadot_node_network_protocol::{v1 as protocol_v1, ReputationChange as Rep, NetworkBridgeEvent, PeerId, View}; use futures::prelude::*; use futures::channel::oneshot; @@ -43,6 +39,9 @@ use futures::channel::oneshot; use std::collections::{hash_map::{Entry, HashMap}, HashSet}; use std::sync::Arc; +#[cfg(test)] +mod tests; + const COST_APPARENT_FLOOD: Rep = Rep::new(-500, "Peer appears to be flooding us with PoV requests"); const COST_UNEXPECTED_POV: Rep = Rep::new(-500, "Peer sent us an unexpected PoV"); const COST_AWAITED_NOT_IN_VIEW: Rep @@ -616,6 +615,3 @@ impl metrics::Metrics for Metrics { Ok(Metrics(Some(metrics))) } } - -#[cfg(test)] -mod tests; diff --git a/polkadot/primitives/src/v1.rs b/polkadot/primitives/src/v1.rs index 6b3a2342f1..67111cff32 100644 --- a/polkadot/primitives/src/v1.rs +++ b/polkadot/primitives/src/v1.rs @@ -494,11 +494,11 @@ pub enum CoreOccupied { } /// This is the data we keep available for each candidate included in the relay chain. -#[derive(Clone, Encode, Decode)] -#[cfg_attr(feature = "std", derive(PartialEq, Debug))] +#[cfg(feature = "std")] +#[derive(Clone, Encode, Decode, PartialEq, Debug)] pub struct AvailableData { /// The Proof-of-Validation of the candidate. - pub pov: PoV, + pub pov: std::sync::Arc, /// The persisted validation data needed for secondary checks. pub validation_data: PersistedValidationData, } diff --git a/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md b/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md index afea5e8ee4..3acf058a7d 100644 --- a/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md +++ b/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md @@ -39,7 +39,7 @@ The subsystem should maintain a set of handles to Candidate Backing Jobs that ar ### On Receiving `CandidateBackingMessage` * If the message is a [`CandidateBackingMessage`][CBM]`::GetBackedCandidates`, get all backable candidates from the statement table and send them back. -* If the message is a [`CandidateBackingMessage`][CBM]`::Second`, sign and dispatch a `Seconded` statement only if we have not seconded any other candidate and have not signed a `Valid` statement for the requested candidate. Signing both a `Seconded` and `Valid` message is a double-voting misbehavior with a heavy penalty, and this could occur if another validator has seconded the same candidate and we've received their message before the internal seconding request. +* If the message is a [`CandidateBackingMessage`][CBM]`::Second`, sign and dispatch a `Seconded` statement only if we have not seconded any other candidate and have not signed a `Valid` statement for the requested candidate. Signing both a `Seconded` and `Valid` message is a double-voting misbehavior with a heavy penalty, and this could occur if another validator has seconded the same candidate and we've received their message before the internal seconding request. After successfully dispatching the `Seconded` statement we have to distribute the PoV. * If the message is a [`CandidateBackingMessage`][CBM]`::Statement`, count the statement to the quorum. If the statement in the message is `Seconded` and it contains a candidate that belongs to our assignment, request the corresponding `PoV` from the `PoVDistribution` and launch validation. Issue our own `Valid` or `Invalid` statement as a result. > big TODO: "contextual execution" @@ -72,16 +72,18 @@ match msg { } CandidateBackingMessage::Second(hash, candidate) => { if candidate is unknown and in local assignment { - spawn_validation_work(candidate, parachain head, validation function) + if spawn_validation_work(candidate, parachain head, validation function).await == Valid { + send(DistributePoV(pov)) + } } } CandidateBackingMessage::Statement(hash, statement) => { // count to the votes on this candidate - if let Statement::Seconded(candidate) = statement { - if candidate.parachain_id == our_assignment { - spawn_validation_work(candidate, parachain head, validation function) - } - } + if let Statement::Seconded(candidate) = statement { + if candidate.parachain_id == our_assignment { + spawn_validation_work(candidate, parachain head, validation function) + } + } } } ``` @@ -110,14 +112,18 @@ fn spawn_validation_work(candidate, parachain head, validation function) { ### Fetch Pov Block Create a `(sender, receiver)` pair. -Dispatch a [`PoVDistributionMessage`][PDM]`::FecthPoV(relay_parent, candidate_hash, sender)` and listen on the receiver for a response. +Dispatch a [`PoVDistributionMessage`][PDM]`::FetchPoV(relay_parent, candidate_hash, sender)` and listen on the receiver for a response. + +### Distribute Pov Block + +Dispatch a [`PoVDistributionMessage`][PDM]`::DistributePoV(relay_parent, candidate_descriptor, pov)`. ### Validate PoV Block Create a `(sender, receiver)` pair. Dispatch a `CandidateValidationMessage::Validate(validation function, candidate, pov, sender)` and listen on the receiver for a response. -### Distribute Signed Statemnet +### Distribute Signed Statement Dispatch a [`StatementDistributionMessage`][PDM]`::Share(relay_parent, SignedFullStatement)`. diff --git a/polkadot/roadmap/implementers-guide/src/node/backing/pov-distribution.md b/polkadot/roadmap/implementers-guide/src/node/backing/pov-distribution.md index 9b32abbd56..3cf6dd995a 100644 --- a/polkadot/roadmap/implementers-guide/src/node/backing/pov-distribution.md +++ b/polkadot/roadmap/implementers-guide/src/node/backing/pov-distribution.md @@ -17,7 +17,7 @@ Output: ## Functionality -This network protocol is responsible for distributing [`PoV`s](../../types/availability.md#proof-of-validity) by gossip. Since PoVs are heavy in practice, gossip is far from the most efficient way to distribute them. In the future, this should be replaced by a better network protocol that finds validators who have validated the block and connects to them directly. This protocol is descrbied. +This network protocol is responsible for distributing [`PoV`s](../../types/availability.md#proof-of-validity) by gossip. Since PoVs are heavy in practice, gossip is far from the most efficient way to distribute them. In the future, this should be replaced by a better network protocol that finds validators who have validated the block and connects to them directly. This protocol is described. This protocol is described in terms of "us" and our peers, with the understanding that this is the procedure that any honest node will run. It has the following goals: - We never have to buffer an unbounded amount of data @@ -25,7 +25,7 @@ This protocol is described in terms of "us" and our peers, with the understandin As we are gossiping, we need to track which PoVs our peers are waiting for to avoid sending them data that they are not expecting. It is not reasonable to expect our peers to buffer unexpected PoVs, just as we will not buffer unexpected PoVs. So notifying our peers about what is being awaited is key. However it is important that the notifications system is also bounded. -For this, in order to avoid reaching into the internals of the [Statement Distribution](statement-distribution.md) Subsystem, we can rely on an expected propery of candidate backing: that each validator can second up to 2 candidates per chain head. This will typically be only one, because they are only supposed to issue one, but they can equivocate if they are willing to be slashed. So we can set a cap on the number of PoVs each peer is allowed to notify us that they are waiting for at a given relay-parent. This cap will be twice the number of validators at that relay-parent. In practice, this is a very lax upper bound that can be reduced much further if desired. +For this, in order to avoid reaching into the internals of the [Statement Distribution](statement-distribution.md) Subsystem, we can rely on an expected property of candidate backing: that each validator can second up to 2 candidates per chain head. This will typically be only one, because they are only supposed to issue one, but they can equivocate if they are willing to be slashed. So we can set a cap on the number of PoVs each peer is allowed to notify us that they are waiting for at a given relay-parent. This cap will be twice the number of validators at that relay-parent. In practice, this is a very lax upper bound that can be reduced much further if desired. The view update mechanism of the [Network Bridge](../utility/network-bridge.md) ensures that peers are only allowed to consider a certain set of relay-parents as live. So this bounding mechanism caps the amount of data we need to store per peer at any time at `sum({ 2 * n_validators_at_head(head) * sizeof(hash) for head in view_heads })`. Additionally, peers should only be allowed to notify us of PoV hashes they are waiting for in the context of relay-parents in our own local view, which means that `n_validators_at_head` is implied to be `0` for relay-parents not in our own local view. diff --git a/polkadot/roadmap/implementers-guide/src/types/availability.md b/polkadot/roadmap/implementers-guide/src/types/availability.md index 0117b174e6..e2b90e86f4 100644 --- a/polkadot/roadmap/implementers-guide/src/types/availability.md +++ b/polkadot/roadmap/implementers-guide/src/types/availability.md @@ -40,7 +40,7 @@ This is the data we want to keep available for each [candidate](candidate.md) in ```rust struct AvailableData { /// The Proof-of-Validation of the candidate. - pov: PoV, + pov: Arc, /// The persisted validation data used to check the candidate. validation_data: PersistedValidationData, }