diff --git a/polkadot/Cargo.lock b/polkadot/Cargo.lock index 6bf618976a..3283c95694 100644 --- a/polkadot/Cargo.lock +++ b/polkadot/Cargo.lock @@ -6260,6 +6260,7 @@ dependencies = [ "futures 0.3.14", "futures-timer 3.0.2", "kv-log-macro", + "lru", "polkadot-node-network-protocol", "polkadot-node-primitives", "polkadot-node-subsystem", diff --git a/polkadot/node/core/av-store/src/tests.rs b/polkadot/node/core/av-store/src/tests.rs index 78c080f367..718b52306a 100644 --- a/polkadot/node/core/av-store/src/tests.rs +++ b/polkadot/node/core/av-store/src/tests.rs @@ -33,6 +33,7 @@ use polkadot_node_primitives::{AvailableData, BlockData, PoV}; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_subsystem::{ ActiveLeavesUpdate, errors::RuntimeApiError, jaeger, messages::AllMessages, ActivatedLeaf, + LeafStatus, }; use polkadot_node_subsystem_test_helpers as test_helpers; use sp_keyring::Sr25519Keyring; @@ -258,6 +259,7 @@ fn runtime_api_error_does_not_stop_the_subsystem() { activated: vec![ActivatedLeaf { hash: new_leaf, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: vec![].into(), @@ -994,6 +996,7 @@ async fn import_leaf( activated: vec![ActivatedLeaf { hash: new_leaf, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: vec![].into(), diff --git a/polkadot/node/core/backing/src/lib.rs b/polkadot/node/core/backing/src/lib.rs index 549659e380..67f635a774 100644 --- a/polkadot/node/core/backing/src/lib.rs +++ b/polkadot/node/core/backing/src/lib.rs @@ -1333,7 +1333,7 @@ mod tests { use polkadot_primitives::v1::{GroupRotationInfo, HeadData, PersistedValidationData, ScheduledCore}; use polkadot_subsystem::{ messages::{RuntimeApiRequest, RuntimeApiMessage}, - ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, + ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, LeafStatus, }; use polkadot_node_primitives::{InvalidCandidate, BlockData}; use polkadot_node_subsystem_test_helpers as test_helpers; @@ -1526,6 +1526,7 @@ mod tests { OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: test_state.relay_parent, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }))) ).await; diff --git a/polkadot/node/network/availability-distribution/src/tests/state.rs b/polkadot/node/network/availability-distribution/src/tests/state.rs index 7364b7fb3f..11680c38b9 100644 --- a/polkadot/node/network/availability-distribution/src/tests/state.rs +++ b/polkadot/node/network/availability-distribution/src/tests/state.rs @@ -29,7 +29,8 @@ use sc_network as network; use sc_network::IfDisconnected; use sc_network::config as netconfig; -use polkadot_subsystem::{ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, +use polkadot_subsystem::{ + ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, LeafStatus, messages::{ AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest, @@ -173,6 +174,7 @@ impl TestState { activated: smallvec![ActivatedLeaf { hash: new.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![old.clone()], diff --git a/polkadot/node/network/availability-recovery/src/tests.rs b/polkadot/node/network/availability-recovery/src/tests.rs index a20f3829c5..81d9a53c9a 100644 --- a/polkadot/node/network/availability-recovery/src/tests.rs +++ b/polkadot/node/network/availability-recovery/src/tests.rs @@ -33,7 +33,9 @@ use polkadot_node_primitives::{PoV, BlockData}; use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_subsystem_testhelpers as test_helpers; -use polkadot_subsystem::{messages::{RuntimeApiMessage, RuntimeApiRequest}, jaeger, ActivatedLeaf}; +use polkadot_subsystem::{ + messages::{RuntimeApiMessage, RuntimeApiRequest}, jaeger, ActivatedLeaf, LeafStatus, +}; type VirtualOverseer = test_helpers::TestSubsystemContextHandle; @@ -448,6 +450,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -529,6 +532,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -610,6 +614,7 @@ fn bad_merkle_path_leads_to_recovery_error() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -666,6 +671,7 @@ fn wrong_chunk_index_leads_to_recovery_error() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -739,6 +745,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -786,6 +793,7 @@ fn fast_path_backing_group_recovers() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -838,6 +846,7 @@ fn no_answers_in_fast_path_causes_chunk_requests() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -900,6 +909,7 @@ fn task_canceled_when_receivers_dropped() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -942,6 +952,7 @@ fn chunks_retry_until_all_nodes_respond() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -1000,6 +1011,7 @@ fn returns_early_if_we_have_the_data() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], @@ -1037,6 +1049,7 @@ fn does_not_query_local_validator() { activated: smallvec![ActivatedLeaf { hash: test_state.current.clone(), number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }], deactivated: smallvec![], diff --git a/polkadot/node/network/bridge/src/lib.rs b/polkadot/node/network/bridge/src/lib.rs index 28aea415a3..5d2808bb5e 100644 --- a/polkadot/node/network/bridge/src/lib.rs +++ b/polkadot/node/network/bridge/src/lib.rs @@ -1157,7 +1157,7 @@ mod tests { use sc_network::{Event as NetworkEvent, IfDisconnected}; - use polkadot_subsystem::{jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal}; + use polkadot_subsystem::{jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, LeafStatus}; use polkadot_subsystem::messages::{ ApprovalDistributionMessage, BitfieldDistributionMessage, @@ -1471,6 +1471,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: head, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -1568,6 +1569,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -1652,6 +1654,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -1667,6 +1670,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: hash_b, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -1739,6 +1743,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -1956,6 +1961,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -2171,6 +2177,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: hash_b, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }) )) @@ -2400,6 +2407,7 @@ mod tests { activated: hashes.enumerate().map(|(i, h)| ActivatedLeaf { hash: h, number: i as _, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }).rev().collect(), deactivated: Default::default(), diff --git a/polkadot/node/network/collator-protocol/src/collator_side.rs b/polkadot/node/network/collator-protocol/src/collator_side.rs index 69d26298fd..87b88a15f4 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side.rs @@ -921,7 +921,7 @@ mod tests { use polkadot_subsystem::{ jaeger, messages::{RuntimeApiMessage, RuntimeApiRequest}, - ActiveLeavesUpdate, ActivatedLeaf, + ActiveLeavesUpdate, ActivatedLeaf, LeafStatus, }; use polkadot_subsystem_testhelpers as test_helpers; @@ -1159,6 +1159,7 @@ mod tests { activated: vec![ActivatedLeaf { hash: test_state.relay_parent, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: [][..].into(), diff --git a/polkadot/node/network/gossip-support/src/tests.rs b/polkadot/node/network/gossip-support/src/tests.rs index 72b71d0d65..3fa99e9e07 100644 --- a/polkadot/node/network/gossip-support/src/tests.rs +++ b/polkadot/node/network/gossip-support/src/tests.rs @@ -18,7 +18,7 @@ use super::*; use polkadot_node_subsystem::{ - jaeger, ActivatedLeaf, + jaeger, ActivatedLeaf, LeafStatus, messages::{RuntimeApiMessage, RuntimeApiRequest}, }; use polkadot_node_subsystem_test_helpers as test_helpers; @@ -73,6 +73,7 @@ async fn overseer_signal_active_leaves( let leaf = ActivatedLeaf { hash: leaf, number: 0xdeadcafe, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }; overseer diff --git a/polkadot/node/network/statement-distribution/src/lib.rs b/polkadot/node/network/statement-distribution/src/lib.rs index 2b35103205..6b1324efac 100644 --- a/polkadot/node/network/statement-distribution/src/lib.rs +++ b/polkadot/node/network/statement-distribution/src/lib.rs @@ -903,7 +903,7 @@ async fn circulate_statement_and_dependents( fn statement_message(relay_parent: Hash, statement: SignedFullStatement) -> protocol_v1::ValidationProtocol -{ +{ let msg = if is_statement_large(&statement) { protocol_v1::StatementDistributionMessage::LargeStatement( StatementMetadata { @@ -1198,7 +1198,7 @@ async fn retrieve_statement_from_message<'a>( ).await { vacant.insert(new_status); } - } + } protocol_v1::StatementDistributionMessage::Statement(_, s) => { // No fetch in progress, safe to return any statement immediately (we don't bother // about normal network jitter which might cause `Valid` statements to arrive early @@ -1594,7 +1594,7 @@ impl StatementDistribution { match result { Ok(true) => break, Ok(false) => {} - Err(Error(Fault::Fatal(f))) => return Err(f), + Err(Error(Fault::Fatal(f))) => return Err(f), Err(Error(Fault::Err(error))) => tracing::debug!(target: LOG_TARGET, ?error) } @@ -2072,7 +2072,9 @@ mod tests { use sp_keystore::{CryptoStore, SyncCryptoStorePtr, SyncCryptoStore}; use sc_keystore::LocalKeystore; use polkadot_node_network_protocol::{view, ObservedRole, request_response::Recipient}; - use polkadot_subsystem::{jaeger, ActivatedLeaf, messages::{RuntimeApiMessage, RuntimeApiRequest}}; + use polkadot_subsystem::{ + jaeger, ActivatedLeaf, messages::{RuntimeApiMessage, RuntimeApiRequest}, LeafStatus, + }; use polkadot_node_network_protocol::request_response::{ Requests, v1::{ @@ -2690,6 +2692,7 @@ mod tests { activated: vec![ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: vec![].into(), @@ -2865,6 +2868,7 @@ mod tests { activated: vec![ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: vec![].into(), @@ -3336,6 +3340,7 @@ mod tests { activated: vec![ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: vec![].into(), @@ -3591,6 +3596,7 @@ mod tests { activated: vec![ActivatedLeaf { hash: hash_a, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].into(), deactivated: vec![].into(), @@ -3634,7 +3640,7 @@ mod tests { NetworkBridgeEvent::PeerViewChange(peer_a.clone(), view![hash_a]) ) }).await; - + // receive a seconded statement from peer A. let statement = { let signing_context = SigningContext { diff --git a/polkadot/node/overseer/Cargo.toml b/polkadot/node/overseer/Cargo.toml index c9b23b5e70..f62f67e64a 100644 --- a/polkadot/node/overseer/Cargo.toml +++ b/polkadot/node/overseer/Cargo.toml @@ -16,6 +16,7 @@ polkadot-procmacro-overseer-subsystems-gen = { path = "./subsystems-gen" } polkadot-primitives = { path = "../../primitives" } polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../subsystem" } tracing = "0.1.26" +lru = "0.6" [dev-dependencies] sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/polkadot/node/overseer/src/lib.rs b/polkadot/node/overseer/src/lib.rs index 7a6176d8df..2c656b6396 100644 --- a/polkadot/node/overseer/src/lib.rs +++ b/polkadot/node/overseer/src/lib.rs @@ -74,6 +74,7 @@ use futures::{ Future, FutureExt, StreamExt, }; use futures_timer::Delay; +use lru::LruCache; use polkadot_primitives::v1::{Block, BlockId,BlockNumber, Hash, ParachainHost}; use client::{BlockImportNotification, BlockchainEvents, FinalityNotification}; @@ -91,6 +92,7 @@ use polkadot_subsystem::messages::{ pub use polkadot_subsystem::{ Subsystem, SubsystemContext, SubsystemSender, OverseerSignal, FromOverseer, SubsystemError, SubsystemResult, SpawnedSubsystem, ActiveLeavesUpdate, ActivatedLeaf, DummySubsystem, jaeger, + LeafStatus, }; use polkadot_node_subsystem_util::{TimeoutExt, metrics::{self, prometheus}, metered, Metronome}; use polkadot_node_primitives::SpawnNamed; @@ -1045,6 +1047,11 @@ struct SubsystemMeterReadouts { signals: metered::Readout, } + +/// Store 2 days worth of blocks, not accounting for forks, +/// in the LRU cache. Assumes a 6-second block time. +const KNOWN_LEAVES_CACHE_SIZE: usize = 2 * 24 * 3600 / 6; + /// The `Overseer` itself. pub struct Overseer { /// Handles to all subsystems. @@ -1098,6 +1105,9 @@ pub struct Overseer { /// An implementation for checking whether a header supports parachain consensus. supports_parachains: SupportsParachains, + /// An LRU cache for keeping track of relay-chain heads that have already been seen. + known_leaves: LruCache, + /// Various Prometheus metrics. metrics: Metrics, } @@ -1832,6 +1842,7 @@ where active_leaves, metrics, span_per_active_leaf: Default::default(), + known_leaves: LruCache::new(KNOWN_LEAVES_CACHE_SIZE), supports_parachains, }; @@ -1881,10 +1892,11 @@ where for (hash, number) in std::mem::take(&mut self.leaves) { let _ = self.active_leaves.insert(hash, number); - if let Some(span) = self.on_head_activated(&hash, None) { + if let Some((span, status)) = self.on_head_activated(&hash, None) { update.activated.push(ActivatedLeaf { hash, number, + status, span, }); } @@ -1967,9 +1979,10 @@ where }; let mut update = match self.on_head_activated(&block.hash, Some(block.parent_hash)) { - Some(span) => ActiveLeavesUpdate::start_work(ActivatedLeaf { + Some((span, status)) => ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: block.hash, number: block.number, + status, span }), None => ActiveLeavesUpdate::default(), @@ -2110,7 +2123,7 @@ where /// this returns `None`. #[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))] fn on_head_activated(&mut self, hash: &Hash, parent_hash: Option) - -> Option> + -> Option<(Arc, LeafStatus)> { if !self.supports_parachains.head_supports_parachains(hash) { return None; @@ -2132,7 +2145,14 @@ where let span = Arc::new(span); self.span_per_active_leaf.insert(*hash, span.clone()); - Some(span) + + let status = if let Some(_) = self.known_leaves.put(*hash, ()) { + LeafStatus::Stale + } else { + LeafStatus::Fresh + }; + + Some((span, status)) } #[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))] @@ -2620,12 +2640,14 @@ mod tests { OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: first_block_hash, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), })), OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated: [ActivatedLeaf { hash: second_block_hash, number: 2, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].as_ref().into(), deactivated: [first_block_hash].as_ref().into(), @@ -2634,6 +2656,7 @@ mod tests { activated: [ActivatedLeaf { hash: third_block_hash, number: 3, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }].as_ref().into(), deactivated: [second_block_hash].as_ref().into(), @@ -2728,11 +2751,13 @@ mod tests { ActivatedLeaf { hash: first_block_hash, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }, ActivatedLeaf { hash: second_block_hash, number: 2, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }, ].as_ref().into(), @@ -2825,6 +2850,7 @@ mod tests { ActivatedLeaf { hash: imported_block.hash, number: imported_block.number, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled) } ].as_ref().into(), @@ -2859,6 +2885,146 @@ mod tests { }); } + // Tests that duplicate leaves have an attached 'Stale' status. + #[test] + fn overseer_stale_detection() { + let spawner = sp_core::testing::TaskExecutor::new(); + + executor::block_on(async move { + let a1_hash = [1; 32].into(); + let b1_hash = [2; 32].into(); + + let a2_hash = [3; 32].into(); + let b2_hash = [4; 32].into(); + + let first_block = BlockInfo { + hash: a1_hash, + parent_hash: [0; 32].into(), + number: 1, + }; + let second_block = BlockInfo { + hash: b1_hash, + parent_hash: [0; 32].into(), + number: 1, + }; + + let third_block = BlockInfo { + hash: a2_hash, + parent_hash: a1_hash, + number: 2, + }; + + let fourth_block = BlockInfo { + hash: b2_hash, + parent_hash: b1_hash, + number: 2, + }; + + let (tx_5, mut rx_5) = metered::channel(64); + let (tx_6, mut rx_6) = metered::channel(64); + let all_subsystems = AllSubsystems::<()>::dummy() + .replace_candidate_validation(TestSubsystem5(tx_5)) + .replace_candidate_backing(TestSubsystem6(tx_6)); + + let (overseer, mut handler) = Overseer::new( + vec![first_block.clone()], + all_subsystems, + None, + MockSupportsParachains, + spawner, + ).unwrap(); + + let overseer_fut = overseer.run().fuse(); + pin_mut!(overseer_fut); + + let mut ss5_results = Vec::new(); + let mut ss6_results = Vec::new(); + + handler.block_imported(second_block.clone()).await; + + // import the second block of each chain to deactivate the heads. + handler.block_imported(third_block).await; + handler.block_imported(fourth_block).await; + + // import the first blocks again (emulating a revert) + handler.block_imported(first_block).await; + handler.block_imported(second_block).await; + + let expected_heartbeats = vec![ + OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { + hash: a1_hash, + number: 1, + status: LeafStatus::Fresh, + span: Arc::new(jaeger::Span::Disabled), + })), + OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { + hash: b1_hash, + number: 1, + status: LeafStatus::Fresh, + span: Arc::new(jaeger::Span::Disabled), + })), + OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { + activated: [ActivatedLeaf { + hash: a2_hash, + number: 2, + status: LeafStatus::Fresh, + span: Arc::new(jaeger::Span::Disabled), + }].as_ref().into(), + deactivated: [a1_hash].as_ref().into(), + }), + OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { + activated: [ActivatedLeaf { + hash: b2_hash, + number: 2, + status: LeafStatus::Fresh, + span: Arc::new(jaeger::Span::Disabled), + }].as_ref().into(), + deactivated: [b1_hash].as_ref().into(), + }), + OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { + hash: a1_hash, + number: 1, + status: LeafStatus::Stale, + span: Arc::new(jaeger::Span::Disabled), + })), + OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { + hash: b1_hash, + number: 1, + status: LeafStatus::Stale, + span: Arc::new(jaeger::Span::Disabled), + })), + ]; + + loop { + select! { + res = overseer_fut => { + assert!(res.is_ok()); + break; + }, + res = rx_5.next() => { + if let Some(res) = res { + ss5_results.push(res); + } + } + res = rx_6.next() => { + if let Some(res) = res { + ss6_results.push(res); + } + } + complete => break, + } + + if ss5_results.len() == expected_heartbeats.len() && + ss6_results.len() == expected_heartbeats.len() { + handler.stop().await; + } + } + + assert_eq!(ss5_results, expected_heartbeats); + assert_eq!(ss6_results, expected_heartbeats); + }); + } + #[derive(Clone)] struct CounterSubsystem { stop_signals_received: Arc, diff --git a/polkadot/node/subsystem-util/src/lib.rs b/polkadot/node/subsystem-util/src/lib.rs index 0a7f85196d..0664d70438 100644 --- a/polkadot/node/subsystem-util/src/lib.rs +++ b/polkadot/node/subsystem-util/src/lib.rs @@ -868,7 +868,7 @@ mod tests { use polkadot_node_jaeger as jaeger; use polkadot_node_subsystem::{ messages::{AllMessages, CandidateSelectionMessage}, ActiveLeavesUpdate, FromOverseer, OverseerSignal, - SpawnedSubsystem, ActivatedLeaf, + SpawnedSubsystem, ActivatedLeaf, LeafStatus, }; use assert_matches::assert_matches; use futures::{channel::mpsc, executor, StreamExt, future, Future, FutureExt, SinkExt}; @@ -998,6 +998,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: relay_parent, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }), ))) @@ -1028,6 +1029,7 @@ mod tests { ActiveLeavesUpdate::start_work(ActivatedLeaf { hash: relay_parent, number: 1, + status: LeafStatus::Fresh, span: Arc::new(jaeger::Span::Disabled), }), ))) diff --git a/polkadot/node/subsystem/src/lib.rs b/polkadot/node/subsystem/src/lib.rs index 7cc21beeb9..871b6f2c80 100644 --- a/polkadot/node/subsystem/src/lib.rs +++ b/polkadot/node/subsystem/src/lib.rs @@ -46,6 +46,36 @@ use self::messages::AllMessages; /// If there are greater than this number of slots, then we fall back to a heap vector. const ACTIVE_LEAVES_SMALLVEC_CAPACITY: usize = 8; + +/// The status of an activated leaf. +#[derive(Debug, Clone)] +pub enum LeafStatus { + /// A leaf is fresh when it's the first time the leaf has been encountered. + /// Most leaves should be fresh. + Fresh, + /// A leaf is stale when it's encountered for a subsequent time. This will happen + /// when the chain is reverted or the fork-choice rule abandons some chain. + Stale, +} + +impl LeafStatus { + /// Returns a bool indicating fresh status. + pub fn is_fresh(&self) -> bool { + match *self { + LeafStatus::Fresh => true, + LeafStatus::Stale => false, + } + } + + /// Returns a bool indicating stale status. + pub fn is_stale(&self) -> bool { + match *self { + LeafStatus::Fresh => false, + LeafStatus::Stale => true, + } + } +} + /// Activated leaf. #[derive(Debug, Clone)] pub struct ActivatedLeaf { @@ -53,6 +83,8 @@ pub struct ActivatedLeaf { pub hash: Hash, /// The block number. pub number: BlockNumber, + /// The status of the leaf. + pub status: LeafStatus, /// An associated [`jaeger::Span`]. /// /// NOTE: Each span should only be kept active as long as the leaf is considered active and should be dropped diff --git a/polkadot/roadmap/implementers-guide/src/node/approval/approval-voting.md b/polkadot/roadmap/implementers-guide/src/node/approval/approval-voting.md index 02f0073a6b..89042ebd7e 100644 --- a/polkadot/roadmap/implementers-guide/src/node/approval/approval-voting.md +++ b/polkadot/roadmap/implementers-guide/src/node/approval/approval-voting.md @@ -170,7 +170,7 @@ On receiving an `OverseerSignal::BlockFinalized(h)`, we fetch the block number ` #### `OverseerSignal::ActiveLeavesUpdate` On receiving an `OverseerSignal::ActiveLeavesUpdate(update)`: - * We determine the set of new blocks that were not in our previous view. This is done by querying the ancestry of all new items in the view and contrasting against the stored `BlockNumber`s. Typically, there will be only one new block. We fetch the headers and information on these blocks from the ChainApi subsystem. + * We determine the set of new blocks that were not in our previous view. This is done by querying the ancestry of all new items in the view and contrasting against the stored `BlockNumber`s. Typically, there will be only one new block. We fetch the headers and information on these blocks from the ChainApi subsystem. Stale leaves in the update can be ignored. * We update the `StoredBlockRange` and the `BlockNumber` maps. * We use the RuntimeApiSubsystem to determine information about these blocks. It is generally safe to assume that runtime state is available for recent, unfinalized blocks. In the case that it isn't, it means that we are catching up to the head of the chain and needn't worry about assignments to those blocks anyway, as the security assumption of the protocol tolerates nodes being temporarily offline or out-of-date. * We fetch the set of candidates included by each block by dispatching a `RuntimeApiRequest::CandidateEvents` and checking the `CandidateIncluded` events. diff --git a/polkadot/roadmap/implementers-guide/src/node/availability/bitfield-signing.md b/polkadot/roadmap/implementers-guide/src/node/availability/bitfield-signing.md index f3ef3a4e75..4fdea331db 100644 --- a/polkadot/roadmap/implementers-guide/src/node/availability/bitfield-signing.md +++ b/polkadot/roadmap/implementers-guide/src/node/availability/bitfield-signing.md @@ -15,7 +15,7 @@ Output: ## Functionality -Upon receipt of an `ActiveLeavesUpdate`, launch bitfield signing job for each `activated` head. Stop the job for each `deactivated` head. +Upon receipt of an `ActiveLeavesUpdate`, launch bitfield signing job for each `activated` head referring to a fresh leaf. Stop the job for each `deactivated` head. ## Bitfield Signing Job diff --git a/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md b/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md index 8cd51b8bed..363320d1a7 100644 --- a/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md +++ b/polkadot/roadmap/implementers-guide/src/node/backing/candidate-backing.md @@ -32,7 +32,7 @@ The subsystem should maintain a set of handles to Candidate Backing Jobs that ar ### On Overseer Signal * If the signal is an [`OverseerSignal`][OverseerSignal]`::ActiveLeavesUpdate`: - * spawn a Candidate Backing Job for each `activated` head, storing a bidirectional channel with the Candidate Backing Job in the set of handles. + * spawn a Candidate Backing Job for each `activated` head referring to a fresh leaf, storing a bidirectional channel with the Candidate Backing Job in the set of handles. * cease the Candidate Backing Job for each `deactivated` head, if any. * If the signal is an [`OverseerSignal`][OverseerSignal]`::Conclude`: Forward conclude messages to all jobs, wait a small amount of time for them to join, and then exit. diff --git a/polkadot/roadmap/implementers-guide/src/node/overseer.md b/polkadot/roadmap/implementers-guide/src/node/overseer.md index 0466185430..35b69ff283 100644 --- a/polkadot/roadmap/implementers-guide/src/node/overseer.md +++ b/polkadot/roadmap/implementers-guide/src/node/overseer.md @@ -26,6 +26,8 @@ The hierarchy of subsystems: The overseer determines work to do based on block import events and block finalization events. It does this by keeping track of the set of relay-parents for which work is currently being done. This is known as the "active leaves" set. It determines an initial set of active leaves on startup based on the data on-disk, and uses events about blockchain import to update the active leaves. Updates lead to [`OverseerSignal`](../types/overseer-protocol.md#overseer-signal)`::ActiveLeavesUpdate` being sent according to new relay-parents, as well as relay-parents to stop considering. Block import events inform the overseer of leaves that no longer need to be built on, now that they have children, and inform us to begin building on those children. Block finalization events inform us when we can stop focusing on blocks that appear to have been orphaned. +The overseer is also responsible for tracking the freshness of active leaves. Leaves are fresh when they're encountered for the first time, and stale when they're encountered for subsequent times. This can occur after chain reversions or when the fork-choice rule abandons some chain. This distinction is used to manage **Reversion Safety**. Consensus messages are often localized to a specific relay-parent, and it is often a misbehavior to equivocate or sign two conflicting messages. When reverting the chain, we may begin work on a leaf that subsystems have already signed messages for. Subsystems which need to account for reversion safety should avoid performing work on stale leaves. + The overseer's logic can be described with these functions: ## On Startup @@ -38,6 +40,7 @@ The overseer's logic can be described with these functions: ## On Block Import Event * Apply the block import event to the active leaves. A new block should lead to its addition to the active leaves set and its parent being deactivated. +* Mark any stale leaves as stale. The overseer should track all leaves it activates to determine whether leaves are fresh or stale. * Send an `OverseerSignal::ActiveLeavesUpdate` message to all subsystems containing all activated and deactivated leaves. * Ensure all `ActiveLeavesUpdate` messages are flushed before resuming activity as a message router. diff --git a/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md b/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md index 6222744cbf..891be333ef 100644 --- a/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md +++ b/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md @@ -29,8 +29,18 @@ Either way, there will be some top-level type encapsulating messages from the ov Indicates a change in active leaves. Activated leaves should have jobs, whereas deactivated leaves should lead to winding-down of work based on those leaves. ```rust +enum LeafStatus { + // A leaf is fresh when it's the first time the leaf has been encountered. + // Most leaves should be fresh. + Fresh, + // A leaf is stale when it's encountered for a subsequent time. This will + // happen when the chain is reverted or the fork-choice rule abandons some + // chain. + Stale, +} + struct ActiveLeavesUpdate { - activated: [(Hash, Number)], // in practice, these should probably be a SmallVec + activated: [(Hash, Number, LeafStatus)], // in practice, these should probably be a SmallVec deactivated: [Hash], } ```