backing: Remove redundant erasure encoding (#7469)

* Remove redundant erasure encoding

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Review feedback

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* fix comments

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

---------

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>
This commit is contained in:
Andrei Sandu
2023-07-07 11:20:30 +03:00
committed by GitHub
parent 518773a943
commit e8d567a1f5
9 changed files with 213 additions and 62 deletions
+1
View File
@@ -7190,6 +7190,7 @@ dependencies = [
"parity-scale-codec",
"parking_lot 0.12.1",
"polkadot-erasure-coding",
"polkadot-node-jaeger",
"polkadot-node-primitives",
"polkadot-node-subsystem",
"polkadot-node-subsystem-test-helpers",
+1
View File
@@ -20,6 +20,7 @@ polkadot-overseer = { path = "../../overseer" }
polkadot-primitives = { path = "../../../primitives" }
polkadot-node-primitives = { path = "../../primitives" }
sp-consensus = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
polkadot-node-jaeger = { path = "../../jaeger" }
[dev-dependencies]
log = "0.4.17"
+34 -4
View File
@@ -39,10 +39,11 @@ use polkadot_node_subsystem_util::database::{DBTransaction, Database};
use sp_consensus::SyncOracle;
use bitvec::{order::Lsb0 as BitOrderLsb0, vec::BitVec};
use polkadot_node_jaeger as jaeger;
use polkadot_node_primitives::{AvailableData, ErasureChunk};
use polkadot_node_subsystem::{
errors::{ChainApiError, RuntimeApiError},
messages::{AvailabilityStoreMessage, ChainApiMessage},
messages::{AvailabilityStoreMessage, ChainApiMessage, StoreAvailableDataError},
overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, SubsystemError,
};
use polkadot_node_subsystem_util as util;
@@ -372,6 +373,9 @@ pub enum Error {
#[error("Custom databases are not supported")]
CustomDatabase,
#[error("Erasure root does not match expected one")]
InvalidErasureRoot,
}
impl Error {
@@ -1184,21 +1188,34 @@ fn process_message(
candidate_hash,
n_validators,
available_data,
expected_erasure_root,
tx,
} => {
subsystem.metrics.on_chunks_received(n_validators as _);
let _timer = subsystem.metrics.time_store_available_data();
let res =
store_available_data(&subsystem, candidate_hash, n_validators as _, available_data);
let res = store_available_data(
&subsystem,
candidate_hash,
n_validators as _,
available_data,
expected_erasure_root,
);
match res {
Ok(()) => {
let _ = tx.send(Ok(()));
},
Err(Error::InvalidErasureRoot) => {
let _ = tx.send(Err(StoreAvailableDataError::InvalidErasureRoot));
return Err(Error::InvalidErasureRoot)
},
Err(e) => {
let _ = tx.send(Err(()));
// We do not bubble up internal errors to caller subsystems, instead the
// tx channel is dropped and that error is caught by the caller subsystem.
//
// We bubble up the specific error here so `av-store` logs still tell what happend.
return Err(e.into())
},
}
@@ -1250,6 +1267,7 @@ fn store_available_data(
candidate_hash: CandidateHash,
n_validators: usize,
available_data: AvailableData,
expected_erasure_root: Hash,
) -> Result<(), Error> {
let mut tx = DBTransaction::new();
@@ -1276,9 +1294,21 @@ fn store_available_data(
},
};
let erasure_span = jaeger::Span::new(candidate_hash, "erasure-coding")
.with_candidate(candidate_hash)
.with_pov(&available_data.pov);
// Important note: This check below is critical for consensus and the `backing` subsystem relies on it to
// ensure candidate validity.
let chunks = erasure::obtain_chunks_v1(n_validators, &available_data)?;
let branches = erasure::branches(chunks.as_ref());
if branches.root() != expected_erasure_root {
return Err(Error::InvalidErasureRoot)
}
drop(erasure_span);
let erasure_chunks = chunks.iter().zip(branches.map(|(proof, _)| proof)).enumerate().map(
|(index, (chunk, proof))| ErasureChunk {
chunk: chunk.clone(),
+66 -5
View File
@@ -415,6 +415,45 @@ fn query_chunk_checks_meta() {
});
}
#[test]
fn store_available_data_erasure_mismatch() {
let store = test_store();
let test_state = TestState::default();
test_harness(test_state.clone(), store.clone(), |mut virtual_overseer| async move {
let candidate_hash = CandidateHash(Hash::repeat_byte(1));
let validator_index = ValidatorIndex(5);
let n_validators = 10;
let pov = PoV { block_data: BlockData(vec![4, 5, 6]) };
let available_data = AvailableData {
pov: Arc::new(pov),
validation_data: test_state.persisted_validation_data.clone(),
};
let (tx, rx) = oneshot::channel();
let block_msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash,
n_validators,
available_data: available_data.clone(),
tx,
// A dummy erasure root should lead to failure.
expected_erasure_root: Hash::default(),
};
virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await;
assert_eq!(rx.await.unwrap(), Err(StoreAvailableDataError::InvalidErasureRoot));
assert!(query_available_data(&mut virtual_overseer, candidate_hash).await.is_none());
assert!(query_chunk(&mut virtual_overseer, candidate_hash, validator_index)
.await
.is_none());
virtual_overseer
});
}
#[test]
fn store_block_works() {
let store = test_store();
@@ -430,13 +469,17 @@ fn store_block_works() {
pov: Arc::new(pov),
validation_data: test_state.persisted_validation_data.clone(),
};
let (tx, rx) = oneshot::channel();
let chunks = erasure::obtain_chunks_v1(10, &available_data).unwrap();
let mut branches = erasure::branches(chunks.as_ref());
let block_msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash,
n_validators,
available_data: available_data.clone(),
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await;
@@ -449,10 +492,6 @@ fn store_block_works() {
.await
.unwrap();
let chunks = erasure::obtain_chunks_v1(10, &available_data).unwrap();
let mut branches = erasure::branches(chunks.as_ref());
let branch = branches.nth(5).unwrap();
let expected_chunk = ErasureChunk {
chunk: branch.1.to_vec(),
@@ -483,6 +522,7 @@ fn store_pov_and_query_chunk_works() {
let chunks_expected =
erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap();
let branches = erasure::branches(chunks_expected.as_ref());
let (tx, rx) = oneshot::channel();
let block_msg = AvailabilityStoreMessage::StoreAvailableData {
@@ -490,6 +530,7 @@ fn store_pov_and_query_chunk_works() {
n_validators,
available_data,
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await;
@@ -530,12 +571,16 @@ fn query_all_chunks_works() {
};
{
let chunks_expected =
erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap();
let branches = erasure::branches(chunks_expected.as_ref());
let (tx, rx) = oneshot::channel();
let block_msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash: candidate_hash_1,
n_validators,
available_data,
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await;
@@ -619,11 +664,15 @@ fn stored_but_not_included_data_is_pruned() {
};
let (tx, rx) = oneshot::channel();
let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap();
let branches = erasure::branches(chunks.as_ref());
let block_msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash,
n_validators,
available_data: available_data.clone(),
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await;
@@ -670,12 +719,16 @@ fn stored_data_kept_until_finalized() {
let parent = Hash::repeat_byte(2);
let block_number = 10;
let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap();
let branches = erasure::branches(chunks.as_ref());
let (tx, rx) = oneshot::channel();
let block_msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash,
n_validators,
available_data: available_data.clone(),
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await;
@@ -946,24 +999,32 @@ fn forkfullness_works() {
validation_data: test_state.persisted_validation_data.clone(),
};
let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data_1).unwrap();
let branches = erasure::branches(chunks.as_ref());
let (tx, rx) = oneshot::channel();
let msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash: candidate_1_hash,
n_validators,
available_data: available_data_1.clone(),
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg }).await;
rx.await.unwrap().unwrap();
let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data_2).unwrap();
let branches = erasure::branches(chunks.as_ref());
let (tx, rx) = oneshot::channel();
let msg = AvailabilityStoreMessage::StoreAvailableData {
candidate_hash: candidate_2_hash,
n_validators,
available_data: available_data_2.clone(),
tx,
expected_erasure_root: branches.root(),
};
virtual_overseer.send(FromOrchestra::Communication { msg }).await;
+8 -2
View File
@@ -17,7 +17,10 @@
use fatality::Nested;
use futures::channel::{mpsc, oneshot};
use polkadot_node_subsystem::{messages::ValidationFailed, SubsystemError};
use polkadot_node_subsystem::{
messages::{StoreAvailableDataError, ValidationFailed},
SubsystemError,
};
use polkadot_node_subsystem_util::Error as UtilError;
use polkadot_primitives::BackedCandidate;
@@ -50,7 +53,7 @@ pub enum Error {
ValidateFromChainState(#[source] oneshot::Canceled),
#[error("StoreAvailableData channel closed before receipt")]
StoreAvailableData(#[source] oneshot::Canceled),
StoreAvailableDataChannel(#[source] oneshot::Canceled),
#[error("a channel was closed before receipt in try_join!")]
JoinMultiple(#[source] oneshot::Canceled),
@@ -74,6 +77,9 @@ pub enum Error {
#[fatal]
#[error(transparent)]
OverseerExited(SubsystemError),
#[error("Availability store error")]
StoreAvailableData(#[source] StoreAvailableDataError),
}
/// Utility for eating top level errors and log them.
+28 -32
View File
@@ -38,7 +38,7 @@ use polkadot_node_subsystem::{
messages::{
AvailabilityDistributionMessage, AvailabilityStoreMessage, CandidateBackingMessage,
CandidateValidationMessage, CollatorProtocolMessage, ProvisionableData, ProvisionerMessage,
RuntimeApiRequest, StatementDistributionMessage,
RuntimeApiRequest, StatementDistributionMessage, StoreAvailableDataError,
},
overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, PerLeafSpan, SpawnedSubsystem,
Stage, SubsystemError,
@@ -490,8 +490,6 @@ impl TableContextTrait for TableContext {
}
}
struct InvalidErasureRoot;
// It looks like it's not possible to do an `impl From` given the current state of
// the code. So this does the necessary conversion.
fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement {
@@ -561,26 +559,34 @@ async fn store_available_data(
n_validators: u32,
candidate_hash: CandidateHash,
available_data: AvailableData,
expected_erasure_root: Hash,
) -> Result<(), Error> {
let (tx, rx) = oneshot::channel();
// Important: the `av-store` subsystem will check if the erasure root of the `available_data` matches `expected_erasure_root`
// which was provided by the collator in the `CandidateReceipt`. This check is consensus critical and the `backing` subsystem
// relies on it for ensuring candidate validity.
sender
.send_message(AvailabilityStoreMessage::StoreAvailableData {
candidate_hash,
n_validators,
available_data,
expected_erasure_root,
tx,
})
.await;
let _ = rx.await.map_err(Error::StoreAvailableData)?;
Ok(())
rx.await
.map_err(Error::StoreAvailableDataChannel)?
.map_err(Error::StoreAvailableData)
}
// Make a `PoV` available.
//
// This will compute the erasure root internally and compare it to the expected erasure root.
// This returns `Err()` iff there is an internal error. Otherwise, it returns either `Ok(Ok(()))` or `Ok(Err(_))`.
// This calls the AV store to write the available data to storage. The AV store also checks the erasure root matches
// the `expected_erasure_root`.
// This returns `Err()` on erasure root mismatch or due to any AV store subsystem error.
//
// Otherwise, it returns either `Ok(())`
async fn make_pov_available(
sender: &mut impl overseer::CandidateBackingSenderTrait,
@@ -590,29 +596,17 @@ async fn make_pov_available(
validation_data: polkadot_primitives::PersistedValidationData,
expected_erasure_root: Hash,
span: Option<&jaeger::Span>,
) -> Result<Result<(), InvalidErasureRoot>, Error> {
let available_data = AvailableData { pov, validation_data };
) -> Result<(), Error> {
let _span = span.as_ref().map(|s| s.child("store-data").with_candidate(candidate_hash));
{
let _span = span.as_ref().map(|s| s.child("erasure-coding").with_candidate(candidate_hash));
let chunks = erasure_coding::obtain_chunks_v1(n_validators, &available_data)?;
let branches = erasure_coding::branches(chunks.as_ref());
let erasure_root = branches.root();
if erasure_root != expected_erasure_root {
return Ok(Err(InvalidErasureRoot))
}
}
{
let _span = span.as_ref().map(|s| s.child("store-data").with_candidate(candidate_hash));
store_available_data(sender, n_validators as u32, candidate_hash, available_data).await?;
}
Ok(Ok(()))
store_available_data(
sender,
n_validators as u32,
candidate_hash,
AvailableData { pov, validation_data },
expected_erasure_root,
)
.await
}
async fn request_pov(
@@ -749,11 +743,11 @@ async fn validate_and_make_available(
candidate.descriptor.erasure_root,
span.as_ref(),
)
.await?;
.await;
match erasure_valid {
Ok(()) => Ok((candidate, commitments, pov.clone())),
Err(InvalidErasureRoot) => {
Err(Error::StoreAvailableData(StoreAvailableDataError::InvalidErasureRoot)) => {
gum::debug!(
target: LOG_TARGET,
candidate_hash = ?candidate.hash(),
@@ -762,6 +756,8 @@ async fn validate_and_make_available(
);
Err(candidate)
},
// Bubble up any other error.
Err(e) => return Err(e),
}
},
ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch) => {
+14 -3
View File
@@ -462,9 +462,10 @@ pub enum AvailabilityStoreMessage {
tx: oneshot::Sender<Result<(), ()>>,
},
/// Store a `AvailableData` and all of its chunks in the AV store.
/// Computes and checks the erasure root of `AvailableData` before storing all of its chunks in
/// the AV store.
///
/// Return `Ok(())` if the store operation succeeded, `Err(())` if it failed.
/// Return `Ok(())` if the store operation succeeded, `Err(StoreAvailableData)` if it failed.
StoreAvailableData {
/// A hash of the candidate this `available_data` belongs to.
candidate_hash: CandidateHash,
@@ -472,11 +473,21 @@ pub enum AvailabilityStoreMessage {
n_validators: u32,
/// The `AvailableData` itself.
available_data: AvailableData,
/// Erasure root we expect to get after chunking.
expected_erasure_root: Hash,
/// Sending side of the channel to send result to.
tx: oneshot::Sender<Result<(), ()>>,
tx: oneshot::Sender<Result<(), StoreAvailableDataError>>,
},
}
/// The error result type of a [`AvailabilityStoreMessage::StoreAvailableData`] request.
#[derive(Error, Debug, Clone, PartialEq, Eq)]
#[allow(missing_docs)]
pub enum StoreAvailableDataError {
#[error("The computed erasure root did not match expected one")]
InvalidErasureRoot,
}
/// A response channel for the result of a chain API request.
pub type ChainApiResponseChannel<T> = oneshot::Sender<Result<T, crate::errors::ChainApiError>>;
@@ -155,6 +155,7 @@ On `StoreChunk` message:
On `StoreAvailableData` message:
- Compute the erasure root of the available data and compare it with `expected_erasure_root`. Return `StoreAvailableDataError::InvalidErasureRoot` on mismatch.
- If there is no `CandidateMeta` under the candidate hash, create it with `State::Unavailable(now)`. Load the `CandidateMeta` otherwise.
- Store `data` under `("available", candidate_hash)` and set `data_available` to true.
- Store each chunk under `("chunk", candidate_hash, index)` and set every bit in `chunks_stored` to `1`.
@@ -254,22 +254,66 @@ enum AvailabilityRecoveryMessage {
Messages to and from the availability store.
```rust
enum AvailabilityStoreMessage {
/// Query the `AvailableData` of a candidate by hash.
QueryAvailableData(CandidateHash, ResponseChannel<Option<AvailableData>>),
/// Query whether an `AvailableData` exists within the AV Store.
QueryDataAvailability(CandidateHash, ResponseChannel<bool>),
/// Query a specific availability chunk of the candidate's erasure-coding by validator index.
/// Returns the chunk and its inclusion proof against the candidate's erasure-root.
QueryChunk(CandidateHash, ValidatorIndex, ResponseChannel<Option<ErasureChunk>>),
/// Query all chunks that we have locally for the given candidate hash.
QueryAllChunks(CandidateHash, ResponseChannel<Vec<ErasureChunk>>),
/// Store a specific chunk of the candidate's erasure-coding by validator index, with an
/// accompanying proof.
StoreChunk(CandidateHash, ErasureChunk, ResponseChannel<Result<()>>),
/// Store `AvailableData`. If `ValidatorIndex` is provided, also store this validator's
/// `ErasureChunk`.
StoreAvailableData(CandidateHash, Option<ValidatorIndex>, u32, AvailableData, ResponseChannel<Result<()>>),
pub enum AvailabilityStoreMessage {
/// Query a `AvailableData` from the AV store.
QueryAvailableData(CandidateHash, oneshot::Sender<Option<AvailableData>>),
/// Query whether a `AvailableData` exists within the AV Store.
///
/// This is useful in cases when existence
/// matters, but we don't want to necessarily pass around multiple
/// megabytes of data to get a single bit of information.
QueryDataAvailability(CandidateHash, oneshot::Sender<bool>),
/// Query an `ErasureChunk` from the AV store by the candidate hash and validator index.
QueryChunk(CandidateHash, ValidatorIndex, oneshot::Sender<Option<ErasureChunk>>),
/// Get the size of an `ErasureChunk` from the AV store by the candidate hash.
QueryChunkSize(CandidateHash, oneshot::Sender<Option<usize>>),
/// Query all chunks that we have for the given candidate hash.
QueryAllChunks(CandidateHash, oneshot::Sender<Vec<ErasureChunk>>),
/// Query whether an `ErasureChunk` exists within the AV Store.
///
/// This is useful in cases like bitfield signing, when existence
/// matters, but we don't want to necessarily pass around large
/// quantities of data to get a single bit of information.
QueryChunkAvailability(CandidateHash, ValidatorIndex, oneshot::Sender<bool>),
/// Store an `ErasureChunk` in the AV store.
///
/// Return `Ok(())` if the store operation succeeded, `Err(())` if it failed.
StoreChunk {
/// A hash of the candidate this chunk belongs to.
candidate_hash: CandidateHash,
/// The chunk itself.
chunk: ErasureChunk,
/// Sending side of the channel to send result to.
tx: oneshot::Sender<Result<(), ()>>,
},
/// Computes and checks the erasure root of `AvailableData` before storing all of its chunks in
/// the AV store.
///
/// Return `Ok(())` if the store operation succeeded, `Err(StoreAvailableData)` if it failed.
StoreAvailableData {
/// A hash of the candidate this `available_data` belongs to.
candidate_hash: CandidateHash,
/// The number of validators in the session.
n_validators: u32,
/// The `AvailableData` itself.
available_data: AvailableData,
/// Erasure root we expect to get after chunking.
expected_erasure_root: Hash,
/// Sending side of the channel to send result to.
tx: oneshot::Sender<Result<(), StoreAvailableDataError>>,
},
}
/// The error result type of a [`AvailabilityStoreMessage::StoreAvailableData`] request.
pub enum StoreAvailableDataError {
InvalidErasureRoot,
}
```