mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-30 05:51:02 +00:00
Add retry mechanism for pov-recovery, fix full-node pov-recovery (#2164)
* Increase delay for pov-recovery * Update client/service/src/lib.rs Co-authored-by: Bastian Köcher <git@kchr.de> * Comment * FMT * Clear waiting_recovery when block is recovered or recovery failed * Introduce recovery queue that preserved insertion order * Better error logs * Decrease slot duration * Style improvements * Add option to use unordered queue * Maintain cache of finalized blocks * Wait for one relay chain slot before recovery * Make retries testable * fmt * Improve docs * Improve docs * Simplify RecoveryQueue * Remove unwanted changes * Adjust to comments * Apply suggestions from code review Co-authored-by: Bastian Köcher <git@kchr.de> * Move recovery delay into the queue * Check for finalized number * Clean up * Use timer Co-authored-by: Bastian Köcher <git@kchr.de> * Simplify implementation * Revert "Use timer" This reverts commit 3809eed840d3a09d54212f99486782ff80cdc1c9. * Properly clear `to_recover` flag --------- Co-authored-by: Bastian Köcher <git@kchr.de>
This commit is contained in:
Generated
+7
-2
@@ -372,9 +372,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.63"
|
version = "0.1.64"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "eff18d764974428cf3a9328e23fc5c986f5fbed46e6cd4cdf42544df5d297ec1"
|
checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -1863,6 +1863,7 @@ dependencies = [
|
|||||||
"polkadot-primitives",
|
"polkadot-primitives",
|
||||||
"sc-client-api",
|
"sc-client-api",
|
||||||
"sc-consensus",
|
"sc-consensus",
|
||||||
|
"schnellru",
|
||||||
"sp-blockchain",
|
"sp-blockchain",
|
||||||
"sp-consensus",
|
"sp-consensus",
|
||||||
"sp-runtime",
|
"sp-runtime",
|
||||||
@@ -1931,6 +1932,7 @@ dependencies = [
|
|||||||
name = "cumulus-client-pov-recovery"
|
name = "cumulus-client-pov-recovery"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
"cumulus-primitives-core",
|
"cumulus-primitives-core",
|
||||||
"cumulus-relay-chain-interface",
|
"cumulus-relay-chain-interface",
|
||||||
"cumulus-test-service",
|
"cumulus-test-service",
|
||||||
@@ -2421,6 +2423,7 @@ dependencies = [
|
|||||||
"cumulus-client-consensus-common",
|
"cumulus-client-consensus-common",
|
||||||
"cumulus-client-consensus-relay-chain",
|
"cumulus-client-consensus-relay-chain",
|
||||||
"cumulus-client-network",
|
"cumulus-client-network",
|
||||||
|
"cumulus-client-pov-recovery",
|
||||||
"cumulus-client-service",
|
"cumulus-client-service",
|
||||||
"cumulus-primitives-core",
|
"cumulus-primitives-core",
|
||||||
"cumulus-primitives-parachain-inherent",
|
"cumulus-primitives-parachain-inherent",
|
||||||
@@ -2438,6 +2441,8 @@ dependencies = [
|
|||||||
"parachains-common",
|
"parachains-common",
|
||||||
"parity-scale-codec",
|
"parity-scale-codec",
|
||||||
"polkadot-cli",
|
"polkadot-cli",
|
||||||
|
"polkadot-node-subsystem",
|
||||||
|
"polkadot-overseer",
|
||||||
"polkadot-primitives",
|
"polkadot-primitives",
|
||||||
"polkadot-service",
|
"polkadot-service",
|
||||||
"polkadot-test-service",
|
"polkadot-test-service",
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ polkadot-primitives = { git = "https://github.com/paritytech/polkadot", branch =
|
|||||||
cumulus-primitives-core = { path = "../../../primitives/core" }
|
cumulus-primitives-core = { path = "../../../primitives/core" }
|
||||||
cumulus-relay-chain-interface = { path = "../../relay-chain-interface" }
|
cumulus-relay-chain-interface = { path = "../../relay-chain-interface" }
|
||||||
cumulus-client-pov-recovery = { path = "../../pov-recovery" }
|
cumulus-client-pov-recovery = { path = "../../pov-recovery" }
|
||||||
|
schnellru = "0.2.1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
futures-timer = "3.0.2"
|
futures-timer = "3.0.2"
|
||||||
|
|||||||
@@ -18,11 +18,12 @@ use sc_client_api::{
|
|||||||
Backend, BlockBackend, BlockImportNotification, BlockchainEvents, Finalizer, UsageProvider,
|
Backend, BlockBackend, BlockImportNotification, BlockchainEvents, Finalizer, UsageProvider,
|
||||||
};
|
};
|
||||||
use sc_consensus::{BlockImport, BlockImportParams, ForkChoiceStrategy};
|
use sc_consensus::{BlockImport, BlockImportParams, ForkChoiceStrategy};
|
||||||
|
use schnellru::{ByLength, LruMap};
|
||||||
use sp_blockchain::Error as ClientError;
|
use sp_blockchain::Error as ClientError;
|
||||||
use sp_consensus::{BlockOrigin, BlockStatus};
|
use sp_consensus::{BlockOrigin, BlockStatus};
|
||||||
use sp_runtime::traits::{Block as BlockT, Header as HeaderT};
|
use sp_runtime::traits::{Block as BlockT, Header as HeaderT};
|
||||||
|
|
||||||
use cumulus_client_pov_recovery::{RecoveryDelay, RecoveryKind, RecoveryRequest};
|
use cumulus_client_pov_recovery::{RecoveryKind, RecoveryRequest};
|
||||||
use cumulus_relay_chain_interface::{RelayChainInterface, RelayChainResult};
|
use cumulus_relay_chain_interface::{RelayChainInterface, RelayChainResult};
|
||||||
|
|
||||||
use polkadot_primitives::{Hash as PHash, Id as ParaId, OccupiedCoreAssumption};
|
use polkadot_primitives::{Hash as PHash, Id as ParaId, OccupiedCoreAssumption};
|
||||||
@@ -30,46 +31,20 @@ use polkadot_primitives::{Hash as PHash, Id as ParaId, OccupiedCoreAssumption};
|
|||||||
use codec::Decode;
|
use codec::Decode;
|
||||||
use futures::{channel::mpsc::Sender, pin_mut, select, FutureExt, Stream, StreamExt};
|
use futures::{channel::mpsc::Sender, pin_mut, select, FutureExt, Stream, StreamExt};
|
||||||
|
|
||||||
use std::{sync::Arc, time::Duration};
|
use std::sync::Arc;
|
||||||
|
|
||||||
const LOG_TARGET: &str = "cumulus-consensus";
|
const LOG_TARGET: &str = "cumulus-consensus";
|
||||||
|
const FINALIZATION_CACHE_SIZE: u32 = 40;
|
||||||
|
|
||||||
// Delay range to trigger explicit requests.
|
fn handle_new_finalized_head<P, Block, B>(
|
||||||
// The chosen value doesn't have any special meaning, a random delay within the order of
|
parachain: &Arc<P>,
|
||||||
// seconds in practice should be a good enough to allow a quick recovery without DOSing
|
finalized_head: Vec<u8>,
|
||||||
// the relay chain.
|
last_seen_finalized_hashes: &mut LruMap<Block::Hash, ()>,
|
||||||
const RECOVERY_DELAY: RecoveryDelay =
|
) where
|
||||||
RecoveryDelay { min: Duration::ZERO, max: Duration::from_secs(30) };
|
|
||||||
|
|
||||||
/// Follow the finalized head of the given parachain.
|
|
||||||
///
|
|
||||||
/// For every finalized block of the relay chain, it will get the included parachain header
|
|
||||||
/// corresponding to `para_id` and will finalize it in the parachain.
|
|
||||||
async fn follow_finalized_head<P, Block, B, R>(para_id: ParaId, parachain: Arc<P>, relay_chain: R)
|
|
||||||
where
|
|
||||||
Block: BlockT,
|
Block: BlockT,
|
||||||
P: Finalizer<Block, B> + UsageProvider<Block>,
|
|
||||||
R: RelayChainInterface + Clone,
|
|
||||||
B: Backend<Block>,
|
B: Backend<Block>,
|
||||||
|
P: Finalizer<Block, B> + UsageProvider<Block> + BlockchainEvents<Block>,
|
||||||
{
|
{
|
||||||
let finalized_heads = match finalized_heads(relay_chain, para_id).await {
|
|
||||||
Ok(finalized_heads_stream) => finalized_heads_stream,
|
|
||||||
Err(err) => {
|
|
||||||
tracing::error!(target: LOG_TARGET, error = ?err, "Unable to retrieve finalized heads stream.");
|
|
||||||
return
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
pin_mut!(finalized_heads);
|
|
||||||
|
|
||||||
loop {
|
|
||||||
let finalized_head = if let Some(h) = finalized_heads.next().await {
|
|
||||||
h
|
|
||||||
} else {
|
|
||||||
tracing::debug!(target: LOG_TARGET, "Stopping following finalized head.");
|
|
||||||
return
|
|
||||||
};
|
|
||||||
|
|
||||||
let header = match Block::Header::decode(&mut &finalized_head[..]) {
|
let header = match Block::Header::decode(&mut &finalized_head[..]) {
|
||||||
Ok(header) => header,
|
Ok(header) => header,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@@ -78,14 +53,21 @@ where
|
|||||||
error = ?err,
|
error = ?err,
|
||||||
"Could not decode parachain header while following finalized heads.",
|
"Could not decode parachain header while following finalized heads.",
|
||||||
);
|
);
|
||||||
continue
|
return
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let hash = header.hash();
|
let hash = header.hash();
|
||||||
|
|
||||||
// don't finalize the same block multiple times.
|
last_seen_finalized_hashes.insert(hash, ());
|
||||||
if parachain.usage_info().chain.finalized_hash != hash {
|
|
||||||
|
// Only finalize if we are below the incoming finalized parachain head
|
||||||
|
if parachain.usage_info().chain.finalized_number < *header.number() {
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
block_hash = ?hash,
|
||||||
|
"Attempting to finalize header.",
|
||||||
|
);
|
||||||
if let Err(e) = parachain.finalize_block(hash, None, true) {
|
if let Err(e) = parachain.finalize_block(hash, None, true) {
|
||||||
match e {
|
match e {
|
||||||
ClientError::UnknownBlock(_) => tracing::debug!(
|
ClientError::UnknownBlock(_) => tracing::debug!(
|
||||||
@@ -102,6 +84,87 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Follow the finalized head of the given parachain.
|
||||||
|
///
|
||||||
|
/// For every finalized block of the relay chain, it will get the included parachain header
|
||||||
|
/// corresponding to `para_id` and will finalize it in the parachain.
|
||||||
|
async fn follow_finalized_head<P, Block, B, R>(para_id: ParaId, parachain: Arc<P>, relay_chain: R)
|
||||||
|
where
|
||||||
|
Block: BlockT,
|
||||||
|
P: Finalizer<Block, B> + UsageProvider<Block> + BlockchainEvents<Block>,
|
||||||
|
R: RelayChainInterface + Clone,
|
||||||
|
B: Backend<Block>,
|
||||||
|
{
|
||||||
|
let finalized_heads = match finalized_heads(relay_chain, para_id).await {
|
||||||
|
Ok(finalized_heads_stream) => finalized_heads_stream.fuse(),
|
||||||
|
Err(err) => {
|
||||||
|
tracing::error!(target: LOG_TARGET, error = ?err, "Unable to retrieve finalized heads stream.");
|
||||||
|
return
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut imported_blocks = parachain.import_notification_stream().fuse();
|
||||||
|
|
||||||
|
pin_mut!(finalized_heads);
|
||||||
|
|
||||||
|
// We use this cache to finalize blocks that are imported late.
|
||||||
|
// For example, a block that has been recovered via PoV-Recovery
|
||||||
|
// on a full node can have several minutes delay. With this cache
|
||||||
|
// we have some "memory" of recently finalized blocks.
|
||||||
|
let mut last_seen_finalized_hashes = LruMap::new(ByLength::new(FINALIZATION_CACHE_SIZE));
|
||||||
|
|
||||||
|
loop {
|
||||||
|
select! {
|
||||||
|
fin = finalized_heads.next() => {
|
||||||
|
match fin {
|
||||||
|
Some(finalized_head) =>
|
||||||
|
handle_new_finalized_head(¶chain, finalized_head, &mut last_seen_finalized_hashes),
|
||||||
|
None => {
|
||||||
|
tracing::debug!(target: LOG_TARGET, "Stopping following finalized head.");
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
imported = imported_blocks.next() => {
|
||||||
|
match imported {
|
||||||
|
Some(imported_block) => {
|
||||||
|
// When we see a block import that is already finalized, we immediately finalize it.
|
||||||
|
if last_seen_finalized_hashes.peek(&imported_block.hash).is_some() {
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
block_hash = ?imported_block.hash,
|
||||||
|
"Setting newly imported block as finalized.",
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Err(e) = parachain.finalize_block(imported_block.hash, None, true) {
|
||||||
|
match e {
|
||||||
|
ClientError::UnknownBlock(_) => tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
block_hash = ?imported_block.hash,
|
||||||
|
"Could not finalize block because it is unknown.",
|
||||||
|
),
|
||||||
|
_ => tracing::warn!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
error = ?e,
|
||||||
|
block_hash = ?imported_block.hash,
|
||||||
|
"Failed to finalize block",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
"Stopping following imported blocks.",
|
||||||
|
);
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,7 +329,11 @@ async fn handle_new_block_imported<Block, P>(
|
|||||||
let unset_best_header = unset_best_header_opt
|
let unset_best_header = unset_best_header_opt
|
||||||
.take()
|
.take()
|
||||||
.expect("We checked above that the value is set; qed");
|
.expect("We checked above that the value is set; qed");
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
?unset_hash,
|
||||||
|
"Importing block as new best for parachain.",
|
||||||
|
);
|
||||||
import_block_as_new_best(unset_hash, unset_best_header, parachain).await;
|
import_block_as_new_best(unset_hash, unset_best_header, parachain).await;
|
||||||
},
|
},
|
||||||
state => tracing::debug!(
|
state => tracing::debug!(
|
||||||
@@ -315,7 +382,11 @@ async fn handle_new_best_parachain_head<Block, P>(
|
|||||||
match parachain.block_status(hash) {
|
match parachain.block_status(hash) {
|
||||||
Ok(BlockStatus::InChainWithState) => {
|
Ok(BlockStatus::InChainWithState) => {
|
||||||
unset_best_header.take();
|
unset_best_header.take();
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
?hash,
|
||||||
|
"Importing block as new best for parachain.",
|
||||||
|
);
|
||||||
import_block_as_new_best(hash, parachain_head, parachain).await;
|
import_block_as_new_best(hash, parachain_head, parachain).await;
|
||||||
},
|
},
|
||||||
Ok(BlockStatus::InChainPruned) => {
|
Ok(BlockStatus::InChainPruned) => {
|
||||||
@@ -338,8 +409,7 @@ async fn handle_new_best_parachain_head<Block, P>(
|
|||||||
// Best effort channel to actively encourage block recovery.
|
// Best effort channel to actively encourage block recovery.
|
||||||
// An error here is not fatal; the relay chain continuously re-announces
|
// An error here is not fatal; the relay chain continuously re-announces
|
||||||
// the best block, thus we will have other opportunities to retry.
|
// the best block, thus we will have other opportunities to retry.
|
||||||
let req =
|
let req = RecoveryRequest { hash, kind: RecoveryKind::Full };
|
||||||
RecoveryRequest { hash, delay: RECOVERY_DELAY, kind: RecoveryKind::Full };
|
|
||||||
if let Err(err) = recovery_chan_tx.try_send(req) {
|
if let Err(err) = recovery_chan_tx.try_send(req) {
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ polkadot-primitives = { git = "https://github.com/paritytech/polkadot", branch =
|
|||||||
# Cumulus
|
# Cumulus
|
||||||
cumulus-primitives-core = { path = "../../primitives/core" }
|
cumulus-primitives-core = { path = "../../primitives/core" }
|
||||||
cumulus-relay-chain-interface = {path = "../relay-chain-interface"}
|
cumulus-relay-chain-interface = {path = "../relay-chain-interface"}
|
||||||
|
async-trait = "0.1.64"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio = { version = "1.25.0", features = ["macros"] }
|
tokio = { version = "1.25.0", features = ["macros"] }
|
||||||
|
|||||||
@@ -18,12 +18,13 @@ use sp_runtime::traits::Block as BlockT;
|
|||||||
|
|
||||||
use polkadot_node_primitives::AvailableData;
|
use polkadot_node_primitives::AvailableData;
|
||||||
use polkadot_node_subsystem::messages::AvailabilityRecoveryMessage;
|
use polkadot_node_subsystem::messages::AvailabilityRecoveryMessage;
|
||||||
use polkadot_overseer::Handle as OverseerHandle;
|
|
||||||
|
|
||||||
use futures::{channel::oneshot, stream::FuturesUnordered, Future, FutureExt, StreamExt};
|
use futures::{channel::oneshot, stream::FuturesUnordered, Future, FutureExt, StreamExt};
|
||||||
|
|
||||||
use std::{collections::HashSet, pin::Pin};
|
use std::{collections::HashSet, pin::Pin};
|
||||||
|
|
||||||
|
use crate::RecoveryHandle;
|
||||||
|
|
||||||
/// The active candidate recovery.
|
/// The active candidate recovery.
|
||||||
///
|
///
|
||||||
/// This handles the candidate recovery and tracks the activate recoveries.
|
/// This handles the candidate recovery and tracks the activate recoveries.
|
||||||
@@ -34,12 +35,12 @@ pub(crate) struct ActiveCandidateRecovery<Block: BlockT> {
|
|||||||
>,
|
>,
|
||||||
/// The block hashes of the candidates currently being recovered.
|
/// The block hashes of the candidates currently being recovered.
|
||||||
candidates: HashSet<Block::Hash>,
|
candidates: HashSet<Block::Hash>,
|
||||||
overseer_handle: OverseerHandle,
|
recovery_handle: Box<dyn RecoveryHandle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Block: BlockT> ActiveCandidateRecovery<Block> {
|
impl<Block: BlockT> ActiveCandidateRecovery<Block> {
|
||||||
pub fn new(overseer_handle: OverseerHandle) -> Self {
|
pub fn new(recovery_handle: Box<dyn RecoveryHandle>) -> Self {
|
||||||
Self { recoveries: Default::default(), candidates: Default::default(), overseer_handle }
|
Self { recoveries: Default::default(), candidates: Default::default(), recovery_handle }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Recover the given `candidate`.
|
/// Recover the given `candidate`.
|
||||||
@@ -50,8 +51,8 @@ impl<Block: BlockT> ActiveCandidateRecovery<Block> {
|
|||||||
) {
|
) {
|
||||||
let (tx, rx) = oneshot::channel();
|
let (tx, rx) = oneshot::channel();
|
||||||
|
|
||||||
self.overseer_handle
|
self.recovery_handle
|
||||||
.send_msg(
|
.send_recovery_msg(
|
||||||
AvailabilityRecoveryMessage::RecoverAvailableData(
|
AvailabilityRecoveryMessage::RecoverAvailableData(
|
||||||
candidate.receipt.clone(),
|
candidate.receipt.clone(),
|
||||||
candidate.session_index,
|
candidate.session_index,
|
||||||
@@ -90,11 +91,6 @@ impl<Block: BlockT> ActiveCandidateRecovery<Block> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns if the given `candidate` is being recovered.
|
|
||||||
pub fn is_being_recovered(&self, candidate: &Block::Hash) -> bool {
|
|
||||||
self.candidates.contains(candidate)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Waits for the next recovery.
|
/// Waits for the next recovery.
|
||||||
///
|
///
|
||||||
/// If the returned [`AvailableData`] is `None`, it means that the recovery failed.
|
/// If the returned [`AvailableData`] is `None`, it means that the recovery failed.
|
||||||
|
|||||||
@@ -29,14 +29,18 @@
|
|||||||
//!
|
//!
|
||||||
//! 1. For every included relay chain block we note the backed candidate of our parachain. If the
|
//! 1. For every included relay chain block we note the backed candidate of our parachain. If the
|
||||||
//! block belonging to the PoV is already known, we do nothing. Otherwise we start
|
//! block belonging to the PoV is already known, we do nothing. Otherwise we start
|
||||||
//! a timer that waits a random time between 0..relay_chain_slot_length before starting to recover
|
//! a timer that waits for a randomized time inside a specified interval before starting to recover
|
||||||
//! the PoV.
|
//! the PoV.
|
||||||
//!
|
//!
|
||||||
//! 2. If between starting and firing the timer the block is imported, we skip the recovery of the
|
//! 2. If between starting and firing the timer the block is imported, we skip the recovery of the
|
||||||
//! PoV.
|
//! PoV.
|
||||||
//!
|
//!
|
||||||
//! 3. If the timer fired we recover the PoV using the relay chain PoV recovery protocol. After it
|
//! 3. If the timer fired we recover the PoV using the relay chain PoV recovery protocol.
|
||||||
//! is recovered, we restore the block and import it.
|
//!
|
||||||
|
//! 4a. After it is recovered, we restore the block and import it.
|
||||||
|
//!
|
||||||
|
//! 4b. Since we are trying to recover pending candidates, availability is not guaranteed. If the block
|
||||||
|
//! PoV is not yet available, we retry.
|
||||||
//!
|
//!
|
||||||
//! If we need to recover multiple PoV blocks (which should hopefully not happen in real life), we
|
//! If we need to recover multiple PoV blocks (which should hopefully not happen in real life), we
|
||||||
//! make sure that the blocks are imported in the correct order.
|
//! make sure that the blocks are imported in the correct order.
|
||||||
@@ -47,6 +51,7 @@ use sp_consensus::{BlockOrigin, BlockStatus};
|
|||||||
use sp_runtime::traits::{Block as BlockT, Header as HeaderT, NumberFor};
|
use sp_runtime::traits::{Block as BlockT, Header as HeaderT, NumberFor};
|
||||||
|
|
||||||
use polkadot_node_primitives::{AvailableData, POV_BOMB_LIMIT};
|
use polkadot_node_primitives::{AvailableData, POV_BOMB_LIMIT};
|
||||||
|
use polkadot_node_subsystem::messages::AvailabilityRecoveryMessage;
|
||||||
use polkadot_overseer::Handle as OverseerHandle;
|
use polkadot_overseer::Handle as OverseerHandle;
|
||||||
use polkadot_primitives::{
|
use polkadot_primitives::{
|
||||||
CandidateReceipt, CommittedCandidateReceipt, Id as ParaId, SessionIndex,
|
CandidateReceipt, CommittedCandidateReceipt, Id as ParaId, SessionIndex,
|
||||||
@@ -60,10 +65,10 @@ use futures::{
|
|||||||
channel::mpsc::Receiver, select, stream::FuturesUnordered, Future, FutureExt, Stream, StreamExt,
|
channel::mpsc::Receiver, select, stream::FuturesUnordered, Future, FutureExt, Stream, StreamExt,
|
||||||
};
|
};
|
||||||
use futures_timer::Delay;
|
use futures_timer::Delay;
|
||||||
use rand::{thread_rng, Rng};
|
use rand::{distributions::Uniform, prelude::Distribution, thread_rng};
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashMap, VecDeque},
|
collections::{HashMap, HashSet, VecDeque},
|
||||||
pin::Pin,
|
pin::Pin,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
time::Duration,
|
time::Duration,
|
||||||
@@ -74,6 +79,28 @@ use active_candidate_recovery::ActiveCandidateRecovery;
|
|||||||
|
|
||||||
const LOG_TARGET: &str = "cumulus-pov-recovery";
|
const LOG_TARGET: &str = "cumulus-pov-recovery";
|
||||||
|
|
||||||
|
/// Test-friendly wrapper trait for the overseer handle.
|
||||||
|
/// Can be used to simulate failing recovery requests.
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
pub trait RecoveryHandle: Send {
|
||||||
|
async fn send_recovery_msg(
|
||||||
|
&mut self,
|
||||||
|
message: AvailabilityRecoveryMessage,
|
||||||
|
origin: &'static str,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl RecoveryHandle for OverseerHandle {
|
||||||
|
async fn send_recovery_msg(
|
||||||
|
&mut self,
|
||||||
|
message: AvailabilityRecoveryMessage,
|
||||||
|
origin: &'static str,
|
||||||
|
) {
|
||||||
|
self.send_msg(message, origin).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Type of recovery to trigger.
|
/// Type of recovery to trigger.
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum RecoveryKind {
|
pub enum RecoveryKind {
|
||||||
@@ -87,24 +114,30 @@ pub enum RecoveryKind {
|
|||||||
pub struct RecoveryRequest<Block: BlockT> {
|
pub struct RecoveryRequest<Block: BlockT> {
|
||||||
/// Hash of the last block to recover.
|
/// Hash of the last block to recover.
|
||||||
pub hash: Block::Hash,
|
pub hash: Block::Hash,
|
||||||
/// Recovery delay range. Randomizing the start of the recovery within this interval
|
|
||||||
/// can be used to prevent self-DOSing if the recovery request is part of a
|
|
||||||
/// distributed protocol and there is the possibility that multiple actors are
|
|
||||||
/// requiring to perform the recovery action at approximately the same time.
|
|
||||||
pub delay: RecoveryDelay,
|
|
||||||
/// Recovery type.
|
/// Recovery type.
|
||||||
pub kind: RecoveryKind,
|
pub kind: RecoveryKind,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The delay between observing an unknown block and triggering the recovery of a block.
|
/// The delay between observing an unknown block and triggering the recovery of a block.
|
||||||
|
/// Randomizing the start of the recovery within this interval
|
||||||
|
/// can be used to prevent self-DOSing if the recovery request is part of a
|
||||||
|
/// distributed protocol and there is the possibility that multiple actors are
|
||||||
|
/// requiring to perform the recovery action at approximately the same time.
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct RecoveryDelay {
|
pub struct RecoveryDelayRange {
|
||||||
/// Start recovering after `min` delay.
|
/// Start recovering after `min` delay.
|
||||||
pub min: Duration,
|
pub min: Duration,
|
||||||
/// Start recovering before `max` delay.
|
/// Start recovering before `max` delay.
|
||||||
pub max: Duration,
|
pub max: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl RecoveryDelayRange {
|
||||||
|
/// Produce a randomized duration between `min` and `max`.
|
||||||
|
fn duration(&self) -> Duration {
|
||||||
|
Uniform::from(self.min..=self.max).sample(&mut thread_rng())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Represents an outstanding block candidate.
|
/// Represents an outstanding block candidate.
|
||||||
struct Candidate<Block: BlockT> {
|
struct Candidate<Block: BlockT> {
|
||||||
receipt: CandidateReceipt,
|
receipt: CandidateReceipt,
|
||||||
@@ -112,9 +145,66 @@ struct Candidate<Block: BlockT> {
|
|||||||
block_number: NumberFor<Block>,
|
block_number: NumberFor<Block>,
|
||||||
parent_hash: Block::Hash,
|
parent_hash: Block::Hash,
|
||||||
// Lazy recovery has been submitted.
|
// Lazy recovery has been submitted.
|
||||||
|
// Should be true iff a block is either queued to be recovered or
|
||||||
|
// recovery is currently in progress.
|
||||||
waiting_recovery: bool,
|
waiting_recovery: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Queue that is used to decide when to start PoV-recovery operations.
|
||||||
|
struct RecoveryQueue<Block: BlockT> {
|
||||||
|
recovery_delay_range: RecoveryDelayRange,
|
||||||
|
// Queue that keeps the hashes of blocks to be recovered.
|
||||||
|
recovery_queue: VecDeque<Block::Hash>,
|
||||||
|
// Futures that resolve when a new recovery should be started.
|
||||||
|
signaling_queue: FuturesUnordered<Pin<Box<dyn Future<Output = ()> + Send>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Block: BlockT> RecoveryQueue<Block> {
|
||||||
|
pub fn new(recovery_delay_range: RecoveryDelayRange) -> Self {
|
||||||
|
Self {
|
||||||
|
recovery_delay_range,
|
||||||
|
recovery_queue: Default::default(),
|
||||||
|
signaling_queue: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add hash of a block that should go to the end of the recovery queue.
|
||||||
|
/// A new recovery will be signaled after `delay` has passed.
|
||||||
|
pub fn push_recovery(&mut self, hash: Block::Hash) {
|
||||||
|
let delay = self.recovery_delay_range.duration();
|
||||||
|
tracing::debug!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
block_hash = ?hash,
|
||||||
|
"Adding block to queue and adding new recovery slot in {:?} sec",
|
||||||
|
delay.as_secs(),
|
||||||
|
);
|
||||||
|
self.recovery_queue.push_back(hash);
|
||||||
|
self.signaling_queue.push(
|
||||||
|
async move {
|
||||||
|
Delay::new(delay).await;
|
||||||
|
}
|
||||||
|
.boxed(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the next hash for block recovery.
|
||||||
|
pub async fn next_recovery(&mut self) -> Block::Hash {
|
||||||
|
loop {
|
||||||
|
if let Some(_) = self.signaling_queue.next().await {
|
||||||
|
if let Some(hash) = self.recovery_queue.pop_front() {
|
||||||
|
return hash
|
||||||
|
} else {
|
||||||
|
tracing::error!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
"Recovery was signaled, but no candidate hash available. This is a bug."
|
||||||
|
);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
futures::pending!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Encapsulates the logic of the pov recovery.
|
/// Encapsulates the logic of the pov recovery.
|
||||||
pub struct PoVRecovery<Block: BlockT, PC, RC> {
|
pub struct PoVRecovery<Block: BlockT, PC, RC> {
|
||||||
/// All the pending candidates that we are waiting for to be imported or that need to be
|
/// All the pending candidates that we are waiting for to be imported or that need to be
|
||||||
@@ -122,21 +212,22 @@ pub struct PoVRecovery<Block: BlockT, PC, RC> {
|
|||||||
candidates: HashMap<Block::Hash, Candidate<Block>>,
|
candidates: HashMap<Block::Hash, Candidate<Block>>,
|
||||||
/// A stream of futures that resolve to hashes of candidates that need to be recovered.
|
/// A stream of futures that resolve to hashes of candidates that need to be recovered.
|
||||||
///
|
///
|
||||||
/// The candidates to the hashes are stored in `pending_candidates`. If a candidate is not
|
/// The candidates to the hashes are stored in `candidates`. If a candidate is not
|
||||||
/// available anymore in this map, it means that it was already imported.
|
/// available anymore in this map, it means that it was already imported.
|
||||||
next_candidate_to_recover: FuturesUnordered<Pin<Box<dyn Future<Output = Block::Hash> + Send>>>,
|
candidate_recovery_queue: RecoveryQueue<Block>,
|
||||||
active_candidate_recovery: ActiveCandidateRecovery<Block>,
|
active_candidate_recovery: ActiveCandidateRecovery<Block>,
|
||||||
/// Blocks that wait that the parent is imported.
|
/// Blocks that wait that the parent is imported.
|
||||||
///
|
///
|
||||||
/// Uses parent -> blocks mapping.
|
/// Uses parent -> blocks mapping.
|
||||||
waiting_for_parent: HashMap<Block::Hash, Vec<Block>>,
|
waiting_for_parent: HashMap<Block::Hash, Vec<Block>>,
|
||||||
recovery_delay: RecoveryDelay,
|
|
||||||
parachain_client: Arc<PC>,
|
parachain_client: Arc<PC>,
|
||||||
parachain_import_queue: Box<dyn ImportQueueService<Block>>,
|
parachain_import_queue: Box<dyn ImportQueueService<Block>>,
|
||||||
relay_chain_interface: RC,
|
relay_chain_interface: RC,
|
||||||
para_id: ParaId,
|
para_id: ParaId,
|
||||||
/// Explicit block recovery requests channel.
|
/// Explicit block recovery requests channel.
|
||||||
recovery_chan_rx: Receiver<RecoveryRequest<Block>>,
|
recovery_chan_rx: Receiver<RecoveryRequest<Block>>,
|
||||||
|
/// Blocks that we are retrying currently
|
||||||
|
candidates_in_retry: HashSet<Block::Hash>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Block: BlockT, PC, RCInterface> PoVRecovery<Block, PC, RCInterface>
|
impl<Block: BlockT, PC, RCInterface> PoVRecovery<Block, PC, RCInterface>
|
||||||
@@ -146,8 +237,8 @@ where
|
|||||||
{
|
{
|
||||||
/// Create a new instance.
|
/// Create a new instance.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
overseer_handle: OverseerHandle,
|
recovery_handle: Box<dyn RecoveryHandle>,
|
||||||
recovery_delay: RecoveryDelay,
|
recovery_delay_range: RecoveryDelayRange,
|
||||||
parachain_client: Arc<PC>,
|
parachain_client: Arc<PC>,
|
||||||
parachain_import_queue: Box<dyn ImportQueueService<Block>>,
|
parachain_import_queue: Box<dyn ImportQueueService<Block>>,
|
||||||
relay_chain_interface: RCInterface,
|
relay_chain_interface: RCInterface,
|
||||||
@@ -156,14 +247,14 @@ where
|
|||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
candidates: HashMap::new(),
|
candidates: HashMap::new(),
|
||||||
next_candidate_to_recover: Default::default(),
|
candidate_recovery_queue: RecoveryQueue::new(recovery_delay_range),
|
||||||
active_candidate_recovery: ActiveCandidateRecovery::new(overseer_handle),
|
active_candidate_recovery: ActiveCandidateRecovery::new(recovery_handle),
|
||||||
recovery_delay,
|
|
||||||
waiting_for_parent: HashMap::new(),
|
waiting_for_parent: HashMap::new(),
|
||||||
parachain_client,
|
parachain_client,
|
||||||
parachain_import_queue,
|
parachain_import_queue,
|
||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
para_id,
|
para_id,
|
||||||
|
candidates_in_retry: HashSet::new(),
|
||||||
recovery_chan_rx,
|
recovery_chan_rx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -210,15 +301,11 @@ where
|
|||||||
|
|
||||||
// If required, triggers a lazy recovery request that will eventually be blocked
|
// If required, triggers a lazy recovery request that will eventually be blocked
|
||||||
// if in the meantime the block is imported.
|
// if in the meantime the block is imported.
|
||||||
self.recover(RecoveryRequest {
|
self.recover(RecoveryRequest { hash, kind: RecoveryKind::Simple });
|
||||||
hash,
|
|
||||||
delay: self.recovery_delay,
|
|
||||||
kind: RecoveryKind::Simple,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle an imported block.
|
/// Block is no longer waiting for recovery
|
||||||
fn handle_block_imported(&mut self, block_hash: &Block::Hash) {
|
fn clear_waiting_recovery(&mut self, block_hash: &Block::Hash) {
|
||||||
self.candidates.get_mut(block_hash).map(|candidate| {
|
self.candidates.get_mut(block_hash).map(|candidate| {
|
||||||
// Prevents triggering an already enqueued recovery request
|
// Prevents triggering an already enqueued recovery request
|
||||||
candidate.waiting_recovery = false;
|
candidate.waiting_recovery = false;
|
||||||
@@ -241,9 +328,9 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear `waiting_for_parent` from the given `hash` and do this recursively for all child
|
/// Clear `waiting_for_parent` and `waiting_recovery` for the candidate with `hash`.
|
||||||
/// blocks.
|
/// Also clears children blocks waiting for this parent.
|
||||||
fn clear_waiting_for_parent(&mut self, hash: Block::Hash) {
|
fn reset_candidate(&mut self, hash: Block::Hash) {
|
||||||
let mut blocks_to_delete = vec![hash];
|
let mut blocks_to_delete = vec![hash];
|
||||||
|
|
||||||
while let Some(delete) = blocks_to_delete.pop() {
|
while let Some(delete) = blocks_to_delete.pop() {
|
||||||
@@ -251,6 +338,7 @@ where
|
|||||||
blocks_to_delete.extend(childs.iter().map(BlockT::hash));
|
blocks_to_delete.extend(childs.iter().map(BlockT::hash));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
self.clear_waiting_recovery(&hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle a recovered candidate.
|
/// Handle a recovered candidate.
|
||||||
@@ -260,9 +348,23 @@ where
|
|||||||
available_data: Option<AvailableData>,
|
available_data: Option<AvailableData>,
|
||||||
) {
|
) {
|
||||||
let available_data = match available_data {
|
let available_data = match available_data {
|
||||||
Some(data) => data,
|
Some(data) => {
|
||||||
None => {
|
self.candidates_in_retry.remove(&block_hash);
|
||||||
self.clear_waiting_for_parent(block_hash);
|
data
|
||||||
|
},
|
||||||
|
None =>
|
||||||
|
if self.candidates_in_retry.insert(block_hash) {
|
||||||
|
tracing::debug!(target: LOG_TARGET, ?block_hash, "Recovery failed, retrying.");
|
||||||
|
self.candidate_recovery_queue.push_recovery(block_hash);
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
tracing::warn!(
|
||||||
|
target: LOG_TARGET,
|
||||||
|
?block_hash,
|
||||||
|
"Unable to recover block after retry.",
|
||||||
|
);
|
||||||
|
self.candidates_in_retry.remove(&block_hash);
|
||||||
|
self.reset_candidate(block_hash);
|
||||||
return
|
return
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@@ -275,8 +377,7 @@ where
|
|||||||
Err(error) => {
|
Err(error) => {
|
||||||
tracing::debug!(target: LOG_TARGET, ?error, "Failed to decompress PoV");
|
tracing::debug!(target: LOG_TARGET, ?error, "Failed to decompress PoV");
|
||||||
|
|
||||||
self.clear_waiting_for_parent(block_hash);
|
self.reset_candidate(block_hash);
|
||||||
|
|
||||||
return
|
return
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@@ -290,8 +391,7 @@ where
|
|||||||
"Failed to decode parachain block data from recovered PoV",
|
"Failed to decode parachain block data from recovered PoV",
|
||||||
);
|
);
|
||||||
|
|
||||||
self.clear_waiting_for_parent(block_hash);
|
self.reset_candidate(block_hash);
|
||||||
|
|
||||||
return
|
return
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@@ -302,12 +402,17 @@ where
|
|||||||
|
|
||||||
match self.parachain_client.block_status(parent) {
|
match self.parachain_client.block_status(parent) {
|
||||||
Ok(BlockStatus::Unknown) => {
|
Ok(BlockStatus::Unknown) => {
|
||||||
if self.active_candidate_recovery.is_being_recovered(&parent) {
|
// If the parent block is currently being recovered or is scheduled to be recovered,
|
||||||
|
// we want to wait for the parent.
|
||||||
|
let parent_scheduled_for_recovery =
|
||||||
|
self.candidates.get(&parent).map_or(false, |parent| parent.waiting_recovery);
|
||||||
|
if parent_scheduled_for_recovery {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
?block_hash,
|
?block_hash,
|
||||||
parent_hash = ?parent,
|
parent_hash = ?parent,
|
||||||
"Parent is still being recovered, waiting.",
|
parent_scheduled_for_recovery,
|
||||||
|
"Waiting for recovery of parent.",
|
||||||
);
|
);
|
||||||
|
|
||||||
self.waiting_for_parent.entry(parent).or_default().push(block);
|
self.waiting_for_parent.entry(parent).or_default().push(block);
|
||||||
@@ -320,8 +425,7 @@ where
|
|||||||
"Parent not found while trying to import recovered block.",
|
"Parent not found while trying to import recovered block.",
|
||||||
);
|
);
|
||||||
|
|
||||||
self.clear_waiting_for_parent(block_hash);
|
self.reset_candidate(block_hash);
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -333,8 +437,7 @@ where
|
|||||||
"Error while checking block status",
|
"Error while checking block status",
|
||||||
);
|
);
|
||||||
|
|
||||||
self.clear_waiting_for_parent(block_hash);
|
self.reset_candidate(block_hash);
|
||||||
|
|
||||||
return
|
return
|
||||||
},
|
},
|
||||||
// Any other status is fine to "ignore/accept"
|
// Any other status is fine to "ignore/accept"
|
||||||
@@ -383,10 +486,10 @@ where
|
|||||||
|
|
||||||
/// Attempts an explicit recovery of one or more blocks.
|
/// Attempts an explicit recovery of one or more blocks.
|
||||||
pub fn recover(&mut self, req: RecoveryRequest<Block>) {
|
pub fn recover(&mut self, req: RecoveryRequest<Block>) {
|
||||||
let RecoveryRequest { mut hash, delay, kind } = req;
|
let RecoveryRequest { mut hash, kind } = req;
|
||||||
let mut to_recover = Vec::new();
|
let mut to_recover = Vec::new();
|
||||||
|
|
||||||
let do_recover = loop {
|
loop {
|
||||||
let candidate = match self.candidates.get_mut(&hash) {
|
let candidate = match self.candidates.get_mut(&hash) {
|
||||||
Some(candidate) => candidate,
|
Some(candidate) => candidate,
|
||||||
None => {
|
None => {
|
||||||
@@ -395,7 +498,7 @@ where
|
|||||||
block_hash = ?hash,
|
block_hash = ?hash,
|
||||||
"Cound not recover. Block was never announced as candidate"
|
"Cound not recover. Block was never announced as candidate"
|
||||||
);
|
);
|
||||||
break false
|
return
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -404,7 +507,7 @@ where
|
|||||||
candidate.waiting_recovery = true;
|
candidate.waiting_recovery = true;
|
||||||
to_recover.push(hash);
|
to_recover.push(hash);
|
||||||
},
|
},
|
||||||
Ok(_) => break true,
|
Ok(_) => break,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
tracing::error!(
|
tracing::error!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
@@ -412,36 +515,22 @@ where
|
|||||||
block_hash = ?hash,
|
block_hash = ?hash,
|
||||||
"Failed to get block status",
|
"Failed to get block status",
|
||||||
);
|
);
|
||||||
break false
|
for hash in to_recover {
|
||||||
|
self.clear_waiting_recovery(&hash);
|
||||||
|
}
|
||||||
|
return
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if kind == RecoveryKind::Simple {
|
if kind == RecoveryKind::Simple {
|
||||||
break true
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
hash = candidate.parent_hash;
|
hash = candidate.parent_hash;
|
||||||
};
|
}
|
||||||
|
|
||||||
if do_recover {
|
|
||||||
for hash in to_recover.into_iter().rev() {
|
for hash in to_recover.into_iter().rev() {
|
||||||
let delay =
|
self.candidate_recovery_queue.push_recovery(hash);
|
||||||
delay.min + delay.max.saturating_sub(delay.min).mul_f64(thread_rng().gen());
|
|
||||||
tracing::debug!(
|
|
||||||
target: LOG_TARGET,
|
|
||||||
block_hash = ?hash,
|
|
||||||
"Starting {:?} block recovery in {:?} sec",
|
|
||||||
kind,
|
|
||||||
delay.as_secs(),
|
|
||||||
);
|
|
||||||
self.next_candidate_to_recover.push(
|
|
||||||
async move {
|
|
||||||
Delay::new(delay).await;
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
.boxed(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -480,7 +569,7 @@ where
|
|||||||
},
|
},
|
||||||
imported = imported_blocks.next() => {
|
imported = imported_blocks.next() => {
|
||||||
if let Some(imported) = imported {
|
if let Some(imported) = imported {
|
||||||
self.handle_block_imported(&imported.hash);
|
self.clear_waiting_recovery(&imported.hash);
|
||||||
} else {
|
} else {
|
||||||
tracing::debug!(target: LOG_TARGET, "Imported blocks stream ended");
|
tracing::debug!(target: LOG_TARGET, "Imported blocks stream ended");
|
||||||
return;
|
return;
|
||||||
@@ -494,10 +583,8 @@ where
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
next_to_recover = self.next_candidate_to_recover.next() => {
|
next_to_recover = self.candidate_recovery_queue.next_recovery().fuse() => {
|
||||||
if let Some(block_hash) = next_to_recover {
|
self.recover_candidate(next_to_recover).await;
|
||||||
self.recover_candidate(block_hash).await;
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
(block_hash, available_data) =
|
(block_hash, available_data) =
|
||||||
self.active_candidate_recovery.wait_for_recovery().fuse() =>
|
self.active_candidate_recovery.wait_for_recovery().fuse() =>
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
use cumulus_client_cli::CollatorOptions;
|
use cumulus_client_cli::CollatorOptions;
|
||||||
use cumulus_client_consensus_common::ParachainConsensus;
|
use cumulus_client_consensus_common::ParachainConsensus;
|
||||||
use cumulus_client_pov_recovery::{PoVRecovery, RecoveryDelay};
|
use cumulus_client_pov_recovery::{PoVRecovery, RecoveryDelayRange, RecoveryHandle};
|
||||||
use cumulus_primitives_core::{CollectCollationInfo, ParaId};
|
use cumulus_primitives_core::{CollectCollationInfo, ParaId};
|
||||||
use cumulus_relay_chain_inprocess_interface::build_inprocess_relay_chain;
|
use cumulus_relay_chain_inprocess_interface::build_inprocess_relay_chain;
|
||||||
use cumulus_relay_chain_interface::{RelayChainInterface, RelayChainResult};
|
use cumulus_relay_chain_interface::{RelayChainInterface, RelayChainResult};
|
||||||
@@ -59,6 +59,7 @@ pub struct StartCollatorParams<'a, Block: BlockT, BS, Client, RCInterface, Spawn
|
|||||||
pub import_queue: Box<dyn ImportQueueService<Block>>,
|
pub import_queue: Box<dyn ImportQueueService<Block>>,
|
||||||
pub collator_key: CollatorPair,
|
pub collator_key: CollatorPair,
|
||||||
pub relay_chain_slot_duration: Duration,
|
pub relay_chain_slot_duration: Duration,
|
||||||
|
pub recovery_handle: Box<dyn RecoveryHandle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start a collator node for a parachain.
|
/// Start a collator node for a parachain.
|
||||||
@@ -79,6 +80,7 @@ pub async fn start_collator<'a, Block, BS, Client, Backend, RCInterface, Spawner
|
|||||||
import_queue,
|
import_queue,
|
||||||
collator_key,
|
collator_key,
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
|
recovery_handle,
|
||||||
}: StartCollatorParams<'a, Block, BS, Client, RCInterface, Spawner>,
|
}: StartCollatorParams<'a, Block, BS, Client, RCInterface, Spawner>,
|
||||||
) -> sc_service::error::Result<()>
|
) -> sc_service::error::Result<()>
|
||||||
where
|
where
|
||||||
@@ -113,15 +115,12 @@ where
|
|||||||
.spawn_essential_handle()
|
.spawn_essential_handle()
|
||||||
.spawn("cumulus-consensus", None, consensus);
|
.spawn("cumulus-consensus", None, consensus);
|
||||||
|
|
||||||
let overseer_handle = relay_chain_interface
|
|
||||||
.overseer_handle()
|
|
||||||
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
|
||||||
|
|
||||||
let pov_recovery = PoVRecovery::new(
|
let pov_recovery = PoVRecovery::new(
|
||||||
overseer_handle.clone(),
|
recovery_handle,
|
||||||
// We want that collators wait at maximum the relay chain slot duration before starting
|
// We want that collators wait at maximum the relay chain slot duration before starting
|
||||||
// to recover blocks.
|
// to recover blocks. Additionally, we wait at least half the slot time to give the
|
||||||
RecoveryDelay { min: core::time::Duration::ZERO, max: relay_chain_slot_duration },
|
// relay chain the chance to increase availability.
|
||||||
|
RecoveryDelayRange { min: relay_chain_slot_duration / 2, max: relay_chain_slot_duration },
|
||||||
client.clone(),
|
client.clone(),
|
||||||
import_queue,
|
import_queue,
|
||||||
relay_chain_interface.clone(),
|
relay_chain_interface.clone(),
|
||||||
@@ -132,6 +131,10 @@ where
|
|||||||
task_manager
|
task_manager
|
||||||
.spawn_essential_handle()
|
.spawn_essential_handle()
|
||||||
.spawn("cumulus-pov-recovery", None, pov_recovery.run());
|
.spawn("cumulus-pov-recovery", None, pov_recovery.run());
|
||||||
|
|
||||||
|
let overseer_handle = relay_chain_interface
|
||||||
|
.overseer_handle()
|
||||||
|
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
||||||
cumulus_client_collator::start_collator(cumulus_client_collator::StartCollatorParams {
|
cumulus_client_collator::start_collator(cumulus_client_collator::StartCollatorParams {
|
||||||
runtime_api: client,
|
runtime_api: client,
|
||||||
block_status,
|
block_status,
|
||||||
@@ -156,6 +159,7 @@ pub struct StartFullNodeParams<'a, Block: BlockT, Client, RCInterface> {
|
|||||||
pub announce_block: Arc<dyn Fn(Block::Hash, Option<Vec<u8>>) + Send + Sync>,
|
pub announce_block: Arc<dyn Fn(Block::Hash, Option<Vec<u8>>) + Send + Sync>,
|
||||||
pub relay_chain_slot_duration: Duration,
|
pub relay_chain_slot_duration: Duration,
|
||||||
pub import_queue: Box<dyn ImportQueueService<Block>>,
|
pub import_queue: Box<dyn ImportQueueService<Block>>,
|
||||||
|
pub recovery_handle: Box<dyn RecoveryHandle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start a full node for a parachain.
|
/// Start a full node for a parachain.
|
||||||
@@ -171,6 +175,7 @@ pub fn start_full_node<Block, Client, Backend, RCInterface>(
|
|||||||
para_id,
|
para_id,
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
import_queue,
|
import_queue,
|
||||||
|
recovery_handle,
|
||||||
}: StartFullNodeParams<Block, Client, RCInterface>,
|
}: StartFullNodeParams<Block, Client, RCInterface>,
|
||||||
) -> sc_service::error::Result<()>
|
) -> sc_service::error::Result<()>
|
||||||
where
|
where
|
||||||
@@ -200,18 +205,17 @@ where
|
|||||||
.spawn_essential_handle()
|
.spawn_essential_handle()
|
||||||
.spawn("cumulus-consensus", None, consensus);
|
.spawn("cumulus-consensus", None, consensus);
|
||||||
|
|
||||||
let overseer_handle = relay_chain_interface
|
|
||||||
.overseer_handle()
|
|
||||||
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
|
||||||
|
|
||||||
let pov_recovery = PoVRecovery::new(
|
let pov_recovery = PoVRecovery::new(
|
||||||
overseer_handle,
|
recovery_handle,
|
||||||
// Full nodes should at least wait 2.5 minutes (assuming 6 seconds slot duration) and
|
// Full nodes should at least wait 2.5 minutes (assuming 6 seconds slot duration) and
|
||||||
// in maximum 5 minutes before starting to recover blocks. Collators should already start
|
// in maximum 5 minutes before starting to recover blocks. Collators should already start
|
||||||
// the recovery way before full nodes try to recover a certain block and then share the
|
// the recovery way before full nodes try to recover a certain block and then share the
|
||||||
// block with the network using "the normal way". Full nodes are just the "last resort"
|
// block with the network using "the normal way". Full nodes are just the "last resort"
|
||||||
// for block recovery.
|
// for block recovery.
|
||||||
RecoveryDelay { min: relay_chain_slot_duration * 25, max: relay_chain_slot_duration * 50 },
|
RecoveryDelayRange {
|
||||||
|
min: relay_chain_slot_duration * 25,
|
||||||
|
max: relay_chain_slot_duration * 50,
|
||||||
|
},
|
||||||
client,
|
client,
|
||||||
import_queue,
|
import_queue,
|
||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
|
|||||||
@@ -256,6 +256,10 @@ async fn start_node_impl(
|
|||||||
|
|
||||||
let relay_chain_slot_duration = Duration::from_secs(6);
|
let relay_chain_slot_duration = Duration::from_secs(6);
|
||||||
|
|
||||||
|
let overseer_handle = relay_chain_interface
|
||||||
|
.overseer_handle()
|
||||||
|
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
||||||
|
|
||||||
if validator {
|
if validator {
|
||||||
let parachain_consensus = build_consensus(
|
let parachain_consensus = build_consensus(
|
||||||
client.clone(),
|
client.clone(),
|
||||||
@@ -284,6 +288,7 @@ async fn start_node_impl(
|
|||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_collator(params).await?;
|
start_collator(params).await?;
|
||||||
@@ -296,6 +301,7 @@ async fn start_node_impl(
|
|||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_full_node(params)?;
|
start_full_node(params)?;
|
||||||
|
|||||||
@@ -452,6 +452,10 @@ where
|
|||||||
|
|
||||||
let relay_chain_slot_duration = Duration::from_secs(6);
|
let relay_chain_slot_duration = Duration::from_secs(6);
|
||||||
|
|
||||||
|
let overseer_handle = relay_chain_interface
|
||||||
|
.overseer_handle()
|
||||||
|
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
||||||
|
|
||||||
if validator {
|
if validator {
|
||||||
let parachain_consensus = build_consensus(
|
let parachain_consensus = build_consensus(
|
||||||
client.clone(),
|
client.clone(),
|
||||||
@@ -480,6 +484,7 @@ where
|
|||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_collator(params).await?;
|
start_collator(params).await?;
|
||||||
@@ -492,6 +497,7 @@ where
|
|||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_full_node(params)?;
|
start_full_node(params)?;
|
||||||
@@ -652,6 +658,9 @@ where
|
|||||||
|
|
||||||
let relay_chain_slot_duration = Duration::from_secs(6);
|
let relay_chain_slot_duration = Duration::from_secs(6);
|
||||||
|
|
||||||
|
let overseer_handle = relay_chain_interface
|
||||||
|
.overseer_handle()
|
||||||
|
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
||||||
if validator {
|
if validator {
|
||||||
let parachain_consensus = build_consensus(
|
let parachain_consensus = build_consensus(
|
||||||
client.clone(),
|
client.clone(),
|
||||||
@@ -680,6 +689,7 @@ where
|
|||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_collator(params).await?;
|
start_collator(params).await?;
|
||||||
@@ -692,6 +702,7 @@ where
|
|||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_full_node(params)?;
|
start_full_node(params)?;
|
||||||
@@ -1425,6 +1436,9 @@ where
|
|||||||
|
|
||||||
let relay_chain_slot_duration = Duration::from_secs(6);
|
let relay_chain_slot_duration = Duration::from_secs(6);
|
||||||
|
|
||||||
|
let overseer_handle = relay_chain_interface
|
||||||
|
.overseer_handle()
|
||||||
|
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
||||||
if validator {
|
if validator {
|
||||||
let parachain_consensus = build_consensus(
|
let parachain_consensus = build_consensus(
|
||||||
client.clone(),
|
client.clone(),
|
||||||
@@ -1453,6 +1467,7 @@ where
|
|||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
collator_key: collator_key.expect("Command line arguments do not allow this. qed"),
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_collator(params).await?;
|
start_collator(params).await?;
|
||||||
@@ -1465,6 +1480,7 @@ where
|
|||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
relay_chain_slot_duration,
|
relay_chain_slot_duration,
|
||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
|
recovery_handle: Box::new(overseer_handle),
|
||||||
};
|
};
|
||||||
|
|
||||||
start_full_node(params)?;
|
start_full_node(params)?;
|
||||||
|
|||||||
@@ -54,6 +54,8 @@ polkadot-primitives = { git = "https://github.com/paritytech/polkadot", branch =
|
|||||||
polkadot-service = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
polkadot-service = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
||||||
polkadot-test-service = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
polkadot-test-service = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
||||||
polkadot-cli = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
polkadot-cli = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
||||||
|
polkadot-node-subsystem = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
||||||
|
polkadot-overseer = { git = "https://github.com/paritytech/polkadot", branch = "master" }
|
||||||
|
|
||||||
# Cumulus
|
# Cumulus
|
||||||
cumulus-client-cli = { path = "../../client/cli" }
|
cumulus-client-cli = { path = "../../client/cli" }
|
||||||
@@ -70,6 +72,7 @@ cumulus-relay-chain-rpc-interface = { path = "../../client/relay-chain-rpc-inter
|
|||||||
cumulus-test-relay-validation-worker-provider = { path = "../relay-validation-worker-provider" }
|
cumulus-test-relay-validation-worker-provider = { path = "../relay-validation-worker-provider" }
|
||||||
cumulus-test-runtime = { path = "../runtime" }
|
cumulus-test-runtime = { path = "../runtime" }
|
||||||
cumulus-relay-chain-minimal-node = { path = "../../client/relay-chain-minimal-node" }
|
cumulus-relay-chain-minimal-node = { path = "../../client/relay-chain-minimal-node" }
|
||||||
|
cumulus-client-pov-recovery = { path = "../../client/pov-recovery" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
futures = "0.3.26"
|
futures = "0.3.26"
|
||||||
|
|||||||
@@ -49,6 +49,9 @@ pub struct TestCollatorCli {
|
|||||||
|
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub disable_block_announcements: bool,
|
pub disable_block_announcements: bool,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
pub fail_pov_recovery: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, clap::Subcommand)]
|
#[derive(Debug, clap::Subcommand)]
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ use cumulus_client_consensus_common::{
|
|||||||
ParachainBlockImport as TParachainBlockImport, ParachainCandidate, ParachainConsensus,
|
ParachainBlockImport as TParachainBlockImport, ParachainCandidate, ParachainConsensus,
|
||||||
};
|
};
|
||||||
use cumulus_client_network::BlockAnnounceValidator;
|
use cumulus_client_network::BlockAnnounceValidator;
|
||||||
|
use cumulus_client_pov_recovery::RecoveryHandle;
|
||||||
use cumulus_client_service::{
|
use cumulus_client_service::{
|
||||||
prepare_node_config, start_collator, start_full_node, StartCollatorParams, StartFullNodeParams,
|
prepare_node_config, start_collator, start_full_node, StartCollatorParams, StartFullNodeParams,
|
||||||
};
|
};
|
||||||
@@ -45,6 +46,8 @@ use cumulus_relay_chain_minimal_node::build_minimal_relay_chain_node;
|
|||||||
use cumulus_test_runtime::{Hash, Header, NodeBlock as Block, RuntimeApi};
|
use cumulus_test_runtime::{Hash, Header, NodeBlock as Block, RuntimeApi};
|
||||||
|
|
||||||
use frame_system_rpc_runtime_api::AccountNonceApi;
|
use frame_system_rpc_runtime_api::AccountNonceApi;
|
||||||
|
use polkadot_node_subsystem::{errors::RecoveryError, messages::AvailabilityRecoveryMessage};
|
||||||
|
use polkadot_overseer::Handle as OverseerHandle;
|
||||||
use polkadot_primitives::{CollatorPair, Hash as PHash, PersistedValidationData};
|
use polkadot_primitives::{CollatorPair, Hash as PHash, PersistedValidationData};
|
||||||
use polkadot_service::ProvideRuntimeApi;
|
use polkadot_service::ProvideRuntimeApi;
|
||||||
use sc_client_api::execution_extensions::ExecutionStrategies;
|
use sc_client_api::execution_extensions::ExecutionStrategies;
|
||||||
@@ -76,6 +79,8 @@ pub use cumulus_test_runtime as runtime;
|
|||||||
pub use genesis::*;
|
pub use genesis::*;
|
||||||
pub use sp_keyring::Sr25519Keyring as Keyring;
|
pub use sp_keyring::Sr25519Keyring as Keyring;
|
||||||
|
|
||||||
|
const LOG_TARGET: &str = "cumulus-test-service";
|
||||||
|
|
||||||
/// A consensus that will never produce any block.
|
/// A consensus that will never produce any block.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct NullConsensus;
|
struct NullConsensus;
|
||||||
@@ -126,6 +131,41 @@ pub type ParachainBlockImport = TParachainBlockImport<Block, Arc<Client>, Backen
|
|||||||
/// Transaction pool type used by the test service
|
/// Transaction pool type used by the test service
|
||||||
pub type TransactionPool = Arc<sc_transaction_pool::FullPool<Block, Client>>;
|
pub type TransactionPool = Arc<sc_transaction_pool::FullPool<Block, Client>>;
|
||||||
|
|
||||||
|
/// Recovery handle that fails regularly to simulate unavailable povs.
|
||||||
|
pub struct FailingRecoveryHandle {
|
||||||
|
overseer_handle: OverseerHandle,
|
||||||
|
counter: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FailingRecoveryHandle {
|
||||||
|
/// Create a new FailingRecoveryHandle
|
||||||
|
pub fn new(overseer_handle: OverseerHandle) -> Self {
|
||||||
|
Self { overseer_handle, counter: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl RecoveryHandle for FailingRecoveryHandle {
|
||||||
|
async fn send_recovery_msg(
|
||||||
|
&mut self,
|
||||||
|
message: AvailabilityRecoveryMessage,
|
||||||
|
origin: &'static str,
|
||||||
|
) {
|
||||||
|
// For every 5th block we immediately signal unavailability to trigger
|
||||||
|
// a retry.
|
||||||
|
if self.counter % 5 == 0 {
|
||||||
|
let AvailabilityRecoveryMessage::RecoverAvailableData(_, _, _, back_sender) = message;
|
||||||
|
tracing::info!(target: LOG_TARGET, "Failing pov recovery.");
|
||||||
|
back_sender
|
||||||
|
.send(Err(RecoveryError::Unavailable))
|
||||||
|
.expect("Return channel should work here.");
|
||||||
|
} else {
|
||||||
|
self.overseer_handle.send_msg(message, origin).await;
|
||||||
|
}
|
||||||
|
self.counter += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Starts a `ServiceBuilder` for a full service.
|
/// Starts a `ServiceBuilder` for a full service.
|
||||||
///
|
///
|
||||||
/// Use this macro if you don't actually need the full service, but just the builder in order to
|
/// Use this macro if you don't actually need the full service, but just the builder in order to
|
||||||
@@ -236,6 +276,7 @@ pub async fn start_node_impl<RB>(
|
|||||||
relay_chain_config: Configuration,
|
relay_chain_config: Configuration,
|
||||||
para_id: ParaId,
|
para_id: ParaId,
|
||||||
wrap_announce_block: Option<Box<dyn FnOnce(AnnounceBlockFn) -> AnnounceBlockFn>>,
|
wrap_announce_block: Option<Box<dyn FnOnce(AnnounceBlockFn) -> AnnounceBlockFn>>,
|
||||||
|
fail_pov_recovery: bool,
|
||||||
rpc_ext_builder: RB,
|
rpc_ext_builder: RB,
|
||||||
consensus: Consensus,
|
consensus: Consensus,
|
||||||
collator_options: CollatorOptions,
|
collator_options: CollatorOptions,
|
||||||
@@ -320,6 +361,17 @@ where
|
|||||||
.unwrap_or_else(|| announce_block);
|
.unwrap_or_else(|| announce_block);
|
||||||
|
|
||||||
let relay_chain_interface_for_closure = relay_chain_interface.clone();
|
let relay_chain_interface_for_closure = relay_chain_interface.clone();
|
||||||
|
|
||||||
|
let overseer_handle = relay_chain_interface
|
||||||
|
.overseer_handle()
|
||||||
|
.map_err(|e| sc_service::Error::Application(Box::new(e)))?;
|
||||||
|
|
||||||
|
let recovery_handle: Box<dyn RecoveryHandle> = if fail_pov_recovery {
|
||||||
|
Box::new(FailingRecoveryHandle::new(overseer_handle))
|
||||||
|
} else {
|
||||||
|
Box::new(overseer_handle)
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(collator_key) = collator_key {
|
if let Some(collator_key) = collator_key {
|
||||||
let parachain_consensus: Box<dyn ParachainConsensus<Block>> = match consensus {
|
let parachain_consensus: Box<dyn ParachainConsensus<Block>> = match consensus {
|
||||||
Consensus::RelayChain => {
|
Consensus::RelayChain => {
|
||||||
@@ -374,6 +426,7 @@ where
|
|||||||
collator_key,
|
collator_key,
|
||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
relay_chain_slot_duration: Duration::from_secs(6),
|
relay_chain_slot_duration: Duration::from_secs(6),
|
||||||
|
recovery_handle,
|
||||||
};
|
};
|
||||||
|
|
||||||
start_collator(params).await?;
|
start_collator(params).await?;
|
||||||
@@ -385,10 +438,8 @@ where
|
|||||||
para_id,
|
para_id,
|
||||||
relay_chain_interface,
|
relay_chain_interface,
|
||||||
import_queue: import_queue_service,
|
import_queue: import_queue_service,
|
||||||
// The slot duration is currently used internally only to configure
|
relay_chain_slot_duration: Duration::from_secs(6),
|
||||||
// the recovery delay of pov-recovery. We don't want to wait for too
|
recovery_handle,
|
||||||
// long on the full node to recover, so we reduce this time here.
|
|
||||||
relay_chain_slot_duration: Duration::from_millis(6),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
start_full_node(params)?;
|
start_full_node(params)?;
|
||||||
@@ -600,6 +651,7 @@ impl TestNodeBuilder {
|
|||||||
relay_chain_config,
|
relay_chain_config,
|
||||||
self.para_id,
|
self.para_id,
|
||||||
self.wrap_announce_block,
|
self.wrap_announce_block,
|
||||||
|
false,
|
||||||
|_| Ok(jsonrpsee::RpcModule::new(())),
|
|_| Ok(jsonrpsee::RpcModule::new(())),
|
||||||
self.consensus,
|
self.consensus,
|
||||||
collator_options,
|
collator_options,
|
||||||
|
|||||||
@@ -123,6 +123,9 @@ fn main() -> Result<(), sc_cli::Error> {
|
|||||||
"Is collating: {}",
|
"Is collating: {}",
|
||||||
if config.role.is_authority() { "yes" } else { "no" }
|
if config.role.is_authority() { "yes" } else { "no" }
|
||||||
);
|
);
|
||||||
|
if cli.fail_pov_recovery {
|
||||||
|
tracing::info!("PoV recovery failure enabled");
|
||||||
|
}
|
||||||
|
|
||||||
let collator_key = config.role.is_authority().then(|| CollatorPair::generate().0);
|
let collator_key = config.role.is_authority().then(|| CollatorPair::generate().0);
|
||||||
|
|
||||||
@@ -141,6 +144,7 @@ fn main() -> Result<(), sc_cli::Error> {
|
|||||||
polkadot_config,
|
polkadot_config,
|
||||||
parachain_id,
|
parachain_id,
|
||||||
cli.disable_block_announcements.then(wrap_announce_block),
|
cli.disable_block_announcements.then(wrap_announce_block),
|
||||||
|
cli.fail_pov_recovery,
|
||||||
|_| Ok(jsonrpsee::RpcModule::new(())),
|
|_| Ok(jsonrpsee::RpcModule::new(())),
|
||||||
consensus,
|
consensus,
|
||||||
collator_options,
|
collator_options,
|
||||||
|
|||||||
@@ -12,9 +12,10 @@ bob: is up within 60 seconds
|
|||||||
charlie: is up within 60 seconds
|
charlie: is up within 60 seconds
|
||||||
one: is up within 60 seconds
|
one: is up within 60 seconds
|
||||||
two: is up within 60 seconds
|
two: is up within 60 seconds
|
||||||
|
eve: is up within 60 seconds
|
||||||
|
|
||||||
# wait 30 blocks and register parachain
|
# wait 20 blocks and register parachain
|
||||||
validator-3: reports block height is at least 30 within 250 seconds
|
validator-3: reports block height is at least 20 within 250 seconds
|
||||||
validator-0: js-script ./register-para.js with "2000" within 240 seconds
|
validator-0: js-script ./register-para.js with "2000" within 240 seconds
|
||||||
validator-0: parachain 2000 is registered within 300 seconds
|
validator-0: parachain 2000 is registered within 300 seconds
|
||||||
|
|
||||||
@@ -22,5 +23,6 @@ validator-0: parachain 2000 is registered within 300 seconds
|
|||||||
bob: reports block height is at least 20 within 600 seconds
|
bob: reports block height is at least 20 within 600 seconds
|
||||||
alice: reports block height is at least 20 within 600 seconds
|
alice: reports block height is at least 20 within 600 seconds
|
||||||
charlie: reports block height is at least 20 within 600 seconds
|
charlie: reports block height is at least 20 within 600 seconds
|
||||||
one: reports block height is at least 20 within 600 seconds
|
one: reports block height is at least 20 within 800 seconds
|
||||||
two: reports block height is at least 20 within 600 seconds
|
two: reports block height is at least 20 within 800 seconds
|
||||||
|
eve: reports block height is at least 20 within 800 seconds
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ add_to_genesis = false
|
|||||||
validator = true # collator
|
validator = true # collator
|
||||||
image = "{{COL_IMAGE}}"
|
image = "{{COL_IMAGE}}"
|
||||||
command = "test-parachain"
|
command = "test-parachain"
|
||||||
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug", "--use-null-consensus", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--use-null-consensus", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
||||||
|
|
||||||
# run eve as a parachain full node
|
# run eve as a parachain full node
|
||||||
[[parachains.collators]]
|
[[parachains.collators]]
|
||||||
@@ -48,7 +48,15 @@ add_to_genesis = false
|
|||||||
validator = false # full node
|
validator = false # full node
|
||||||
image = "{{COL_IMAGE}}"
|
image = "{{COL_IMAGE}}"
|
||||||
command = "test-parachain"
|
command = "test-parachain"
|
||||||
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}","--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}","--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
||||||
|
|
||||||
|
# we fail recovery for eve from time to time to test retries
|
||||||
|
[[parachains.collators]]
|
||||||
|
name = "eve"
|
||||||
|
validator = true # collator
|
||||||
|
image = "{{COL_IMAGE}}"
|
||||||
|
command = "test-parachain"
|
||||||
|
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--fail-pov-recovery", "--use-null-consensus", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
||||||
|
|
||||||
# run one as a RPC collator who does not produce blocks
|
# run one as a RPC collator who does not produce blocks
|
||||||
[[parachains.collators]]
|
[[parachains.collators]]
|
||||||
@@ -56,7 +64,7 @@ add_to_genesis = false
|
|||||||
validator = true # collator
|
validator = true # collator
|
||||||
image = "{{COL_IMAGE}}"
|
image = "{{COL_IMAGE}}"
|
||||||
command = "test-parachain"
|
command = "test-parachain"
|
||||||
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug", "--use-null-consensus", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--relay-chain-rpc-url {{'ferdie'|zombie('wsUri')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--use-null-consensus", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--relay-chain-rpc-url {{'ferdie'|zombie('wsUri')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
||||||
|
|
||||||
# run two as a RPC parachain full node
|
# run two as a RPC parachain full node
|
||||||
[[parachains.collators]]
|
[[parachains.collators]]
|
||||||
@@ -64,4 +72,4 @@ add_to_genesis = false
|
|||||||
validator = false # full node
|
validator = false # full node
|
||||||
image = "{{COL_IMAGE}}"
|
image = "{{COL_IMAGE}}"
|
||||||
command = "test-parachain"
|
command = "test-parachain"
|
||||||
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--relay-chain-rpc-url {{'ferdie'|zombie('wsUri')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
args = ["-lparachain::availability=trace,sync=debug,parachain=debug,cumulus-pov-recovery=debug,cumulus-consensus=debug", "--disable-block-announcements", "--bootnodes {{'bob'|zombie('multiAddress')}}", "--relay-chain-rpc-url {{'ferdie'|zombie('wsUri')}}", "--", "--reserved-only", "--reserved-nodes {{'ferdie'|zombie('multiAddress')}}"]
|
||||||
|
|||||||
Reference in New Issue
Block a user