Add retry mechanism for pov-recovery, fix full-node pov-recovery (#2164)

* Increase delay for pov-recovery * Update client/service/src/lib.rs Co-authored-by: Bastian Köcher <git@kchr.de> * Comment * FMT * Clear waiting_recovery when block is recovered or recovery failed * Introduce recovery queue that preserved insertion order * Better error logs * Decrease slot duration * Style improvements * Add option to use unordered queue * Maintain cache of finalized blocks * Wait for one relay chain slot before recovery * Make retries testable * fmt * Improve docs * Improve docs * Simplify RecoveryQueue * Remove unwanted changes * Adjust to comments * Apply suggestions from code review Co-authored-by: Bastian Köcher <git@kchr.de> * Move recovery delay into the queue * Check for finalized number * Clean up * Use timer Co-authored-by: Bastian Köcher <git@kchr.de> * Simplify implementation * Revert "Use timer" This reverts commit 3809eed840d3a09d54212f99486782ff80cdc1c9. * Properly clear `to_recover` flag --------- Co-authored-by: Bastian Köcher <git@kchr.de>
2026-04-27 09:17:58 +00:00 · 2023-02-09 14:18:55 +01:00
parent b4d0992ca8
commit afcfd2404a
15 changed files with 419 additions and 161 deletions
@@ -18,11 +18,12 @@ use sc_client_api::{
 	Backend, BlockBackend, BlockImportNotification, BlockchainEvents, Finalizer, UsageProvider,
 };
 use sc_consensus::{BlockImport, BlockImportParams, ForkChoiceStrategy};
+use schnellru::{ByLength, LruMap};
 use sp_blockchain::Error as ClientError;
 use sp_consensus::{BlockOrigin, BlockStatus};
 use sp_runtime::traits::{Block as BlockT, Header as HeaderT};

-use cumulus_client_pov_recovery::{RecoveryDelay, RecoveryKind, RecoveryRequest};
+use cumulus_client_pov_recovery::{RecoveryKind, RecoveryRequest};
 use cumulus_relay_chain_interface::{RelayChainInterface, RelayChainResult};

 use polkadot_primitives::{Hash as PHash, Id as ParaId, OccupiedCoreAssumption};
@@ -30,16 +31,60 @@ use polkadot_primitives::{Hash as PHash, Id as ParaId, OccupiedCoreAssumption};
 use codec::Decode;
 use futures::{channel::mpsc::Sender, pin_mut, select, FutureExt, Stream, StreamExt};

-use std::{sync::Arc, time::Duration};
+use std::sync::Arc;

 const LOG_TARGET: &str = "cumulus-consensus";
+const FINALIZATION_CACHE_SIZE: u32 = 40;

-// Delay range to trigger explicit requests.
-// The chosen value doesn't have any special meaning, a random delay within the order of
-// seconds in practice should be a good enough to allow a quick recovery without DOSing
-// the relay chain.
-const RECOVERY_DELAY: RecoveryDelay =
-	RecoveryDelay { min: Duration::ZERO, max: Duration::from_secs(30) };
+fn handle_new_finalized_head<P, Block, B>(
+	parachain: &Arc<P>,
+	finalized_head: Vec<u8>,
+	last_seen_finalized_hashes: &mut LruMap<Block::Hash, ()>,
+) where
+	Block: BlockT,
+	B: Backend<Block>,
+	P: Finalizer<Block, B> + UsageProvider<Block> + BlockchainEvents<Block>,
+{
+	let header = match Block::Header::decode(&mut &finalized_head[..]) {
+		Ok(header) => header,
+		Err(err) => {
+			tracing::debug!(
+				target: LOG_TARGET,
+				error = ?err,
+				"Could not decode parachain header while following finalized heads.",
+			);
+			return
+		},
+	};
+
+	let hash = header.hash();
+
+	last_seen_finalized_hashes.insert(hash, ());
+
+	// Only finalize if we are below the incoming finalized parachain head
+	if parachain.usage_info().chain.finalized_number < *header.number() {
+		tracing::debug!(
+			target: LOG_TARGET,
+			block_hash = ?hash,
+			"Attempting to finalize header.",
+		);
+		if let Err(e) = parachain.finalize_block(hash, None, true) {
+			match e {
+				ClientError::UnknownBlock(_) => tracing::debug!(
+					target: LOG_TARGET,
+					block_hash = ?hash,
+					"Could not finalize block because it is unknown.",
+				),
+				_ => tracing::warn!(
+					target: LOG_TARGET,
+					error = ?e,
+					block_hash = ?hash,
+					"Failed to finalize block",
+				),
+			}
+		}
+	}
+}

 /// Follow the finalized head of the given parachain.
 ///
@@ -48,57 +93,75 @@ const RECOVERY_DELAY: RecoveryDelay =
 async fn follow_finalized_head<P, Block, B, R>(para_id: ParaId, parachain: Arc<P>, relay_chain: R)
 where
 	Block: BlockT,
-	P: Finalizer<Block, B> + UsageProvider<Block>,
+	P: Finalizer<Block, B> + UsageProvider<Block> + BlockchainEvents<Block>,
 	R: RelayChainInterface + Clone,
 	B: Backend<Block>,
 {
 	let finalized_heads = match finalized_heads(relay_chain, para_id).await {
-		Ok(finalized_heads_stream) => finalized_heads_stream,
+		Ok(finalized_heads_stream) => finalized_heads_stream.fuse(),
 		Err(err) => {
 			tracing::error!(target: LOG_TARGET, error = ?err, "Unable to retrieve finalized heads stream.");
 			return
 		},
 	};

+	let mut imported_blocks = parachain.import_notification_stream().fuse();
+
 	pin_mut!(finalized_heads);

+	// We use this cache to finalize blocks that are imported late.
+	// For example, a block that has been recovered via PoV-Recovery
+	// on a full node can have several minutes delay. With this cache
+	// we have some "memory" of recently finalized blocks.
+	let mut last_seen_finalized_hashes = LruMap::new(ByLength::new(FINALIZATION_CACHE_SIZE));
+
 	loop {
-		let finalized_head = if let Some(h) = finalized_heads.next().await {
-			h
-		} else {
-			tracing::debug!(target: LOG_TARGET, "Stopping following finalized head.");
-			return
-		};
-
-		let header = match Block::Header::decode(&mut &finalized_head[..]) {
-			Ok(header) => header,
-			Err(err) => {
-				tracing::debug!(
-					target: LOG_TARGET,
-					error = ?err,
-					"Could not decode parachain header while following finalized heads.",
-				);
-				continue
+		select! {
+			fin = finalized_heads.next() => {
+				match fin {
+					Some(finalized_head) =>
+						handle_new_finalized_head(&parachain, finalized_head, &mut last_seen_finalized_hashes),
+					None => {
+						tracing::debug!(target: LOG_TARGET, "Stopping following finalized head.");
+						return
+					}
+				}
 			},
-		};
+			imported = imported_blocks.next() => {
+				match imported {
+					Some(imported_block) => {
+						// When we see a block import that is already finalized, we immediately finalize it.
+						if last_seen_finalized_hashes.peek(&imported_block.hash).is_some() {
+							tracing::debug!(
+								target: LOG_TARGET,
+								block_hash = ?imported_block.hash,
+								"Setting newly imported block as finalized.",
+							);

-		let hash = header.hash();
-
-		// don't finalize the same block multiple times.
-		if parachain.usage_info().chain.finalized_hash != hash {
-			if let Err(e) = parachain.finalize_block(hash, None, true) {
-				match e {
-					ClientError::UnknownBlock(_) => tracing::debug!(
-						target: LOG_TARGET,
-						block_hash = ?hash,
-						"Could not finalize block because it is unknown.",
-					),
-					_ => tracing::warn!(
-						target: LOG_TARGET,
-						error = ?e,
-						block_hash = ?hash,
-						"Failed to finalize block",
-					),
+							if let Err(e) = parachain.finalize_block(imported_block.hash, None, true) {
+								match e {
+									ClientError::UnknownBlock(_) => tracing::debug!(
+										target: LOG_TARGET,
+										block_hash = ?imported_block.hash,
+										"Could not finalize block because it is unknown.",
+									),
+									_ => tracing::warn!(
+										target: LOG_TARGET,
+										error = ?e,
+										block_hash = ?imported_block.hash,
+										"Failed to finalize block",
+									),
+								}
+							}
+						}
+					},
+					None => {
+						tracing::debug!(
+							target: LOG_TARGET,
+							"Stopping following imported blocks.",
+						);
+						return
+					}
 				}
 			}
 		}
@@ -266,7 +329,11 @@ async fn handle_new_block_imported<Block, P>(
 			let unset_best_header = unset_best_header_opt
 				.take()
 				.expect("We checked above that the value is set; qed");
-
+			tracing::debug!(
+				target: LOG_TARGET,
+				?unset_hash,
+				"Importing block as new best for parachain.",
+			);
 			import_block_as_new_best(unset_hash, unset_best_header, parachain).await;
 		},
 		state => tracing::debug!(
@@ -315,7 +382,11 @@ async fn handle_new_best_parachain_head<Block, P>(
 		match parachain.block_status(hash) {
 			Ok(BlockStatus::InChainWithState) => {
 				unset_best_header.take();
-
+				tracing::debug!(
+					target: LOG_TARGET,
+					?hash,
+					"Importing block as new best for parachain.",
+				);
 				import_block_as_new_best(hash, parachain_head, parachain).await;
 			},
 			Ok(BlockStatus::InChainPruned) => {
@@ -338,8 +409,7 @@ async fn handle_new_best_parachain_head<Block, P>(
 					// Best effort channel to actively encourage block recovery.
 					// An error here is not fatal; the relay chain continuously re-announces
 					// the best block, thus we will have other opportunities to retry.
-					let req =
-						RecoveryRequest { hash, delay: RECOVERY_DELAY, kind: RecoveryKind::Full };
+					let req = RecoveryRequest { hash, kind: RecoveryKind::Full };
 					if let Err(err) = recovery_chan_tx.try_send(req) {
 						tracing::warn!(
 							target: LOG_TARGET,