cumulus-pov-recovery: check pov_hash instead of reencoding data (#2287)

Collators were previously reencoding the available data and checking the
erasure root.
Replace that with just checking the PoV hash, which consumes much less
CPU and takes less time.

We also don't need to check the `PersistedValidationData` hash, as
collators don't use it.

Reason:
https://github.com/paritytech/polkadot-sdk/issues/575#issuecomment-1806572230

After systematic chunks recovery is merged, collators will no longer do
any reed-solomon encoding/decoding, which has proven to be a great CPU
consumer.

Signed-off-by: alindima <alin@parity.io>
This commit is contained in:
Alin Dima
2023-11-14 10:37:41 +02:00
committed by GitHub
parent 8d2637905b
commit 689b9d91c7
5 changed files with 137 additions and 87 deletions
@@ -16,12 +16,12 @@
use sp_runtime::traits::Block as BlockT;
use polkadot_node_primitives::AvailableData;
use polkadot_node_primitives::PoV;
use polkadot_node_subsystem::messages::AvailabilityRecoveryMessage;
use futures::{channel::oneshot, stream::FuturesUnordered, Future, FutureExt, StreamExt};
use std::{collections::HashSet, pin::Pin};
use std::{collections::HashSet, pin::Pin, sync::Arc};
use crate::RecoveryHandle;
@@ -30,9 +30,8 @@ use crate::RecoveryHandle;
/// This handles the candidate recovery and tracks the activate recoveries.
pub(crate) struct ActiveCandidateRecovery<Block: BlockT> {
/// The recoveries that are currently being executed.
recoveries: FuturesUnordered<
Pin<Box<dyn Future<Output = (Block::Hash, Option<AvailableData>)> + Send>>,
>,
recoveries:
FuturesUnordered<Pin<Box<dyn Future<Output = (Block::Hash, Option<Arc<PoV>>)> + Send>>>,
/// The block hashes of the candidates currently being recovered.
candidates: HashSet<Block::Hash>,
recovery_handle: Box<dyn RecoveryHandle>,
@@ -68,7 +67,7 @@ impl<Block: BlockT> ActiveCandidateRecovery<Block> {
self.recoveries.push(
async move {
match rx.await {
Ok(Ok(res)) => (block_hash, Some(res)),
Ok(Ok(res)) => (block_hash, Some(res.pov)),
Ok(Err(error)) => {
tracing::debug!(
target: crate::LOG_TARGET,
@@ -93,8 +92,8 @@ impl<Block: BlockT> ActiveCandidateRecovery<Block> {
/// Waits for the next recovery.
///
/// If the returned [`AvailableData`] is `None`, it means that the recovery failed.
pub async fn wait_for_recovery(&mut self) -> (Block::Hash, Option<AvailableData>) {
/// If the returned [`PoV`] is `None`, it means that the recovery failed.
pub async fn wait_for_recovery(&mut self) -> (Block::Hash, Option<Arc<PoV>>) {
loop {
if let Some(res) = self.recoveries.next().await {
self.candidates.remove(&res.0);
+16 -22
View File
@@ -51,7 +51,7 @@ use sc_consensus::import_queue::{ImportQueueService, IncomingBlock};
use sp_consensus::{BlockOrigin, BlockStatus, SyncOracle};
use sp_runtime::traits::{Block as BlockT, Header as HeaderT, NumberFor};
use polkadot_node_primitives::{AvailableData, POV_BOMB_LIMIT};
use polkadot_node_primitives::{PoV, POV_BOMB_LIMIT};
use polkadot_node_subsystem::messages::AvailabilityRecoveryMessage;
use polkadot_overseer::Handle as OverseerHandle;
use polkadot_primitives::{
@@ -346,15 +346,11 @@ where
}
/// Handle a recovered candidate.
async fn handle_candidate_recovered(
&mut self,
block_hash: Block::Hash,
available_data: Option<AvailableData>,
) {
let available_data = match available_data {
Some(data) => {
async fn handle_candidate_recovered(&mut self, block_hash: Block::Hash, pov: Option<&PoV>) {
let pov = match pov {
Some(pov) => {
self.candidates_in_retry.remove(&block_hash);
data
pov
},
None =>
if self.candidates_in_retry.insert(block_hash) {
@@ -373,18 +369,16 @@ where
},
};
let raw_block_data = match sp_maybe_compressed_blob::decompress(
&available_data.pov.block_data.0,
POV_BOMB_LIMIT,
) {
Ok(r) => r,
Err(error) => {
tracing::debug!(target: LOG_TARGET, ?error, "Failed to decompress PoV");
let raw_block_data =
match sp_maybe_compressed_blob::decompress(&pov.block_data.0, POV_BOMB_LIMIT) {
Ok(r) => r,
Err(error) => {
tracing::debug!(target: LOG_TARGET, ?error, "Failed to decompress PoV");
self.reset_candidate(block_hash);
return
},
};
self.reset_candidate(block_hash);
return
},
};
let block_data = match ParachainBlockData::<Block>::decode(&mut &raw_block_data[..]) {
Ok(d) => d,
@@ -595,10 +589,10 @@ where
next_to_recover = self.candidate_recovery_queue.next_recovery().fuse() => {
self.recover_candidate(next_to_recover).await;
},
(block_hash, available_data) =
(block_hash, pov) =
self.active_candidate_recovery.wait_for_recovery().fuse() =>
{
self.handle_candidate_recovered(block_hash, available_data).await;
self.handle_candidate_recovered(block_hash, pov.as_deref()).await;
},
}
}