Reversion Safety tools for overseer and subsystems (#3104)

* guide: reversion safety

* guide: manage reversion safety in subsystems

* add leaf status to ActivatedLeaf

* add an LRU-cache to overseer for staleness detection

* update ActivatedLeaf usages in tests to contain status field

* add variant where missed accidentally

* add some helpers to LeafStatus

* address grumbles
This commit is contained in:
Robert Habermeier
2021-05-31 13:54:05 -05:00
committed by GitHub
parent 6b166a7a1f
commit 963993d288
18 changed files with 270 additions and 20 deletions
+1
View File
@@ -6260,6 +6260,7 @@ dependencies = [
"futures 0.3.14",
"futures-timer 3.0.2",
"kv-log-macro",
"lru",
"polkadot-node-network-protocol",
"polkadot-node-primitives",
"polkadot-node-subsystem",
+3
View File
@@ -33,6 +33,7 @@ use polkadot_node_primitives::{AvailableData, BlockData, PoV};
use polkadot_node_subsystem_util::TimeoutExt;
use polkadot_subsystem::{
ActiveLeavesUpdate, errors::RuntimeApiError, jaeger, messages::AllMessages, ActivatedLeaf,
LeafStatus,
};
use polkadot_node_subsystem_test_helpers as test_helpers;
use sp_keyring::Sr25519Keyring;
@@ -258,6 +259,7 @@ fn runtime_api_error_does_not_stop_the_subsystem() {
activated: vec![ActivatedLeaf {
hash: new_leaf,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -994,6 +996,7 @@ async fn import_leaf(
activated: vec![ActivatedLeaf {
hash: new_leaf,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
+2 -1
View File
@@ -1333,7 +1333,7 @@ mod tests {
use polkadot_primitives::v1::{GroupRotationInfo, HeadData, PersistedValidationData, ScheduledCore};
use polkadot_subsystem::{
messages::{RuntimeApiRequest, RuntimeApiMessage},
ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf,
ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, LeafStatus,
};
use polkadot_node_primitives::{InvalidCandidate, BlockData};
use polkadot_node_subsystem_test_helpers as test_helpers;
@@ -1526,6 +1526,7 @@ mod tests {
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: test_state.relay_parent,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})))
).await;
@@ -29,7 +29,8 @@ use sc_network as network;
use sc_network::IfDisconnected;
use sc_network::config as netconfig;
use polkadot_subsystem::{ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf,
use polkadot_subsystem::{
ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, LeafStatus,
messages::{
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, NetworkBridgeMessage,
RuntimeApiMessage, RuntimeApiRequest,
@@ -173,6 +174,7 @@ impl TestState {
activated: smallvec![ActivatedLeaf {
hash: new.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![old.clone()],
@@ -33,7 +33,9 @@ use polkadot_node_primitives::{PoV, BlockData};
use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
use polkadot_node_subsystem_util::TimeoutExt;
use polkadot_subsystem_testhelpers as test_helpers;
use polkadot_subsystem::{messages::{RuntimeApiMessage, RuntimeApiRequest}, jaeger, ActivatedLeaf};
use polkadot_subsystem::{
messages::{RuntimeApiMessage, RuntimeApiRequest}, jaeger, ActivatedLeaf, LeafStatus,
};
type VirtualOverseer = test_helpers::TestSubsystemContextHandle<AvailabilityRecoveryMessage>;
@@ -448,6 +450,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -529,6 +532,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -610,6 +614,7 @@ fn bad_merkle_path_leads_to_recovery_error() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -666,6 +671,7 @@ fn wrong_chunk_index_leads_to_recovery_error() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -739,6 +745,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -786,6 +793,7 @@ fn fast_path_backing_group_recovers() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -838,6 +846,7 @@ fn no_answers_in_fast_path_causes_chunk_requests() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -900,6 +909,7 @@ fn task_canceled_when_receivers_dropped() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -942,6 +952,7 @@ fn chunks_retry_until_all_nodes_respond() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -1000,6 +1011,7 @@ fn returns_early_if_we_have_the_data() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -1037,6 +1049,7 @@ fn does_not_query_local_validator() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
+9 -1
View File
@@ -1157,7 +1157,7 @@ mod tests {
use sc_network::{Event as NetworkEvent, IfDisconnected};
use polkadot_subsystem::{jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal};
use polkadot_subsystem::{jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, LeafStatus};
use polkadot_subsystem::messages::{
ApprovalDistributionMessage,
BitfieldDistributionMessage,
@@ -1471,6 +1471,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: head,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1568,6 +1569,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1652,6 +1654,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1667,6 +1670,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_b,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1739,6 +1743,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1956,6 +1961,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -2171,6 +2177,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_b,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -2400,6 +2407,7 @@ mod tests {
activated: hashes.enumerate().map(|(i, h)| ActivatedLeaf {
hash: h,
number: i as _,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}).rev().collect(),
deactivated: Default::default(),
@@ -921,7 +921,7 @@ mod tests {
use polkadot_subsystem::{
jaeger,
messages::{RuntimeApiMessage, RuntimeApiRequest},
ActiveLeavesUpdate, ActivatedLeaf,
ActiveLeavesUpdate, ActivatedLeaf, LeafStatus,
};
use polkadot_subsystem_testhelpers as test_helpers;
@@ -1159,6 +1159,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: test_state.relay_parent,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: [][..].into(),
@@ -18,7 +18,7 @@
use super::*;
use polkadot_node_subsystem::{
jaeger, ActivatedLeaf,
jaeger, ActivatedLeaf, LeafStatus,
messages::{RuntimeApiMessage, RuntimeApiRequest},
};
use polkadot_node_subsystem_test_helpers as test_helpers;
@@ -73,6 +73,7 @@ async fn overseer_signal_active_leaves(
let leaf = ActivatedLeaf {
hash: leaf,
number: 0xdeadcafe,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
};
overseer
@@ -903,7 +903,7 @@ async fn circulate_statement_and_dependents(
fn statement_message(relay_parent: Hash, statement: SignedFullStatement)
-> protocol_v1::ValidationProtocol
{
{
let msg = if is_statement_large(&statement) {
protocol_v1::StatementDistributionMessage::LargeStatement(
StatementMetadata {
@@ -1198,7 +1198,7 @@ async fn retrieve_statement_from_message<'a>(
).await {
vacant.insert(new_status);
}
}
}
protocol_v1::StatementDistributionMessage::Statement(_, s) => {
// No fetch in progress, safe to return any statement immediately (we don't bother
// about normal network jitter which might cause `Valid` statements to arrive early
@@ -1594,7 +1594,7 @@ impl StatementDistribution {
match result {
Ok(true) => break,
Ok(false) => {}
Err(Error(Fault::Fatal(f))) => return Err(f),
Err(Error(Fault::Fatal(f))) => return Err(f),
Err(Error(Fault::Err(error))) =>
tracing::debug!(target: LOG_TARGET, ?error)
}
@@ -2072,7 +2072,9 @@ mod tests {
use sp_keystore::{CryptoStore, SyncCryptoStorePtr, SyncCryptoStore};
use sc_keystore::LocalKeystore;
use polkadot_node_network_protocol::{view, ObservedRole, request_response::Recipient};
use polkadot_subsystem::{jaeger, ActivatedLeaf, messages::{RuntimeApiMessage, RuntimeApiRequest}};
use polkadot_subsystem::{
jaeger, ActivatedLeaf, messages::{RuntimeApiMessage, RuntimeApiRequest}, LeafStatus,
};
use polkadot_node_network_protocol::request_response::{
Requests,
v1::{
@@ -2690,6 +2692,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -2865,6 +2868,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -3336,6 +3340,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -3591,6 +3596,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -3634,7 +3640,7 @@ mod tests {
NetworkBridgeEvent::PeerViewChange(peer_a.clone(), view![hash_a])
)
}).await;
// receive a seconded statement from peer A.
let statement = {
let signing_context = SigningContext {
+1
View File
@@ -16,6 +16,7 @@ polkadot-procmacro-overseer-subsystems-gen = { path = "./subsystems-gen" }
polkadot-primitives = { path = "../../primitives" }
polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../subsystem" }
tracing = "0.1.26"
lru = "0.6"
[dev-dependencies]
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
+170 -4
View File
@@ -74,6 +74,7 @@ use futures::{
Future, FutureExt, StreamExt,
};
use futures_timer::Delay;
use lru::LruCache;
use polkadot_primitives::v1::{Block, BlockId,BlockNumber, Hash, ParachainHost};
use client::{BlockImportNotification, BlockchainEvents, FinalityNotification};
@@ -91,6 +92,7 @@ use polkadot_subsystem::messages::{
pub use polkadot_subsystem::{
Subsystem, SubsystemContext, SubsystemSender, OverseerSignal, FromOverseer, SubsystemError,
SubsystemResult, SpawnedSubsystem, ActiveLeavesUpdate, ActivatedLeaf, DummySubsystem, jaeger,
LeafStatus,
};
use polkadot_node_subsystem_util::{TimeoutExt, metrics::{self, prometheus}, metered, Metronome};
use polkadot_node_primitives::SpawnNamed;
@@ -1045,6 +1047,11 @@ struct SubsystemMeterReadouts {
signals: metered::Readout,
}
/// Store 2 days worth of blocks, not accounting for forks,
/// in the LRU cache. Assumes a 6-second block time.
const KNOWN_LEAVES_CACHE_SIZE: usize = 2 * 24 * 3600 / 6;
/// The `Overseer` itself.
pub struct Overseer<S, SupportsParachains> {
/// Handles to all subsystems.
@@ -1098,6 +1105,9 @@ pub struct Overseer<S, SupportsParachains> {
/// An implementation for checking whether a header supports parachain consensus.
supports_parachains: SupportsParachains,
/// An LRU cache for keeping track of relay-chain heads that have already been seen.
known_leaves: LruCache<Hash, ()>,
/// Various Prometheus metrics.
metrics: Metrics,
}
@@ -1832,6 +1842,7 @@ where
active_leaves,
metrics,
span_per_active_leaf: Default::default(),
known_leaves: LruCache::new(KNOWN_LEAVES_CACHE_SIZE),
supports_parachains,
};
@@ -1881,10 +1892,11 @@ where
for (hash, number) in std::mem::take(&mut self.leaves) {
let _ = self.active_leaves.insert(hash, number);
if let Some(span) = self.on_head_activated(&hash, None) {
if let Some((span, status)) = self.on_head_activated(&hash, None) {
update.activated.push(ActivatedLeaf {
hash,
number,
status,
span,
});
}
@@ -1967,9 +1979,10 @@ where
};
let mut update = match self.on_head_activated(&block.hash, Some(block.parent_hash)) {
Some(span) => ActiveLeavesUpdate::start_work(ActivatedLeaf {
Some((span, status)) => ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: block.hash,
number: block.number,
status,
span
}),
None => ActiveLeavesUpdate::default(),
@@ -2110,7 +2123,7 @@ where
/// this returns `None`.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn on_head_activated(&mut self, hash: &Hash, parent_hash: Option<Hash>)
-> Option<Arc<jaeger::Span>>
-> Option<(Arc<jaeger::Span>, LeafStatus)>
{
if !self.supports_parachains.head_supports_parachains(hash) {
return None;
@@ -2132,7 +2145,14 @@ where
let span = Arc::new(span);
self.span_per_active_leaf.insert(*hash, span.clone());
Some(span)
let status = if let Some(_) = self.known_leaves.put(*hash, ()) {
LeafStatus::Stale
} else {
LeafStatus::Fresh
};
Some((span, status))
}
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
@@ -2620,12 +2640,14 @@ mod tests {
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: first_block_hash,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})),
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: [ActivatedLeaf {
hash: second_block_hash,
number: 2,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].as_ref().into(),
deactivated: [first_block_hash].as_ref().into(),
@@ -2634,6 +2656,7 @@ mod tests {
activated: [ActivatedLeaf {
hash: third_block_hash,
number: 3,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].as_ref().into(),
deactivated: [second_block_hash].as_ref().into(),
@@ -2728,11 +2751,13 @@ mod tests {
ActivatedLeaf {
hash: first_block_hash,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
},
ActivatedLeaf {
hash: second_block_hash,
number: 2,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
},
].as_ref().into(),
@@ -2825,6 +2850,7 @@ mod tests {
ActivatedLeaf {
hash: imported_block.hash,
number: imported_block.number,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled)
}
].as_ref().into(),
@@ -2859,6 +2885,146 @@ mod tests {
});
}
// Tests that duplicate leaves have an attached 'Stale' status.
#[test]
fn overseer_stale_detection() {
let spawner = sp_core::testing::TaskExecutor::new();
executor::block_on(async move {
let a1_hash = [1; 32].into();
let b1_hash = [2; 32].into();
let a2_hash = [3; 32].into();
let b2_hash = [4; 32].into();
let first_block = BlockInfo {
hash: a1_hash,
parent_hash: [0; 32].into(),
number: 1,
};
let second_block = BlockInfo {
hash: b1_hash,
parent_hash: [0; 32].into(),
number: 1,
};
let third_block = BlockInfo {
hash: a2_hash,
parent_hash: a1_hash,
number: 2,
};
let fourth_block = BlockInfo {
hash: b2_hash,
parent_hash: b1_hash,
number: 2,
};
let (tx_5, mut rx_5) = metered::channel(64);
let (tx_6, mut rx_6) = metered::channel(64);
let all_subsystems = AllSubsystems::<()>::dummy()
.replace_candidate_validation(TestSubsystem5(tx_5))
.replace_candidate_backing(TestSubsystem6(tx_6));
let (overseer, mut handler) = Overseer::new(
vec![first_block.clone()],
all_subsystems,
None,
MockSupportsParachains,
spawner,
).unwrap();
let overseer_fut = overseer.run().fuse();
pin_mut!(overseer_fut);
let mut ss5_results = Vec::new();
let mut ss6_results = Vec::new();
handler.block_imported(second_block.clone()).await;
// import the second block of each chain to deactivate the heads.
handler.block_imported(third_block).await;
handler.block_imported(fourth_block).await;
// import the first blocks again (emulating a revert)
handler.block_imported(first_block).await;
handler.block_imported(second_block).await;
let expected_heartbeats = vec![
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: a1_hash,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})),
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: b1_hash,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})),
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: [ActivatedLeaf {
hash: a2_hash,
number: 2,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].as_ref().into(),
deactivated: [a1_hash].as_ref().into(),
}),
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: [ActivatedLeaf {
hash: b2_hash,
number: 2,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].as_ref().into(),
deactivated: [b1_hash].as_ref().into(),
}),
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: a1_hash,
number: 1,
status: LeafStatus::Stale,
span: Arc::new(jaeger::Span::Disabled),
})),
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: b1_hash,
number: 1,
status: LeafStatus::Stale,
span: Arc::new(jaeger::Span::Disabled),
})),
];
loop {
select! {
res = overseer_fut => {
assert!(res.is_ok());
break;
},
res = rx_5.next() => {
if let Some(res) = res {
ss5_results.push(res);
}
}
res = rx_6.next() => {
if let Some(res) = res {
ss6_results.push(res);
}
}
complete => break,
}
if ss5_results.len() == expected_heartbeats.len() &&
ss6_results.len() == expected_heartbeats.len() {
handler.stop().await;
}
}
assert_eq!(ss5_results, expected_heartbeats);
assert_eq!(ss6_results, expected_heartbeats);
});
}
#[derive(Clone)]
struct CounterSubsystem {
stop_signals_received: Arc<atomic::AtomicUsize>,
+3 -1
View File
@@ -868,7 +868,7 @@ mod tests {
use polkadot_node_jaeger as jaeger;
use polkadot_node_subsystem::{
messages::{AllMessages, CandidateSelectionMessage}, ActiveLeavesUpdate, FromOverseer, OverseerSignal,
SpawnedSubsystem, ActivatedLeaf,
SpawnedSubsystem, ActivatedLeaf, LeafStatus,
};
use assert_matches::assert_matches;
use futures::{channel::mpsc, executor, StreamExt, future, Future, FutureExt, SinkExt};
@@ -998,6 +998,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: relay_parent,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}),
)))
@@ -1028,6 +1029,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: relay_parent,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}),
)))
+32
View File
@@ -46,6 +46,36 @@ use self::messages::AllMessages;
/// If there are greater than this number of slots, then we fall back to a heap vector.
const ACTIVE_LEAVES_SMALLVEC_CAPACITY: usize = 8;
/// The status of an activated leaf.
#[derive(Debug, Clone)]
pub enum LeafStatus {
/// A leaf is fresh when it's the first time the leaf has been encountered.
/// Most leaves should be fresh.
Fresh,
/// A leaf is stale when it's encountered for a subsequent time. This will happen
/// when the chain is reverted or the fork-choice rule abandons some chain.
Stale,
}
impl LeafStatus {
/// Returns a bool indicating fresh status.
pub fn is_fresh(&self) -> bool {
match *self {
LeafStatus::Fresh => true,
LeafStatus::Stale => false,
}
}
/// Returns a bool indicating stale status.
pub fn is_stale(&self) -> bool {
match *self {
LeafStatus::Fresh => false,
LeafStatus::Stale => true,
}
}
}
/// Activated leaf.
#[derive(Debug, Clone)]
pub struct ActivatedLeaf {
@@ -53,6 +83,8 @@ pub struct ActivatedLeaf {
pub hash: Hash,
/// The block number.
pub number: BlockNumber,
/// The status of the leaf.
pub status: LeafStatus,
/// An associated [`jaeger::Span`].
///
/// NOTE: Each span should only be kept active as long as the leaf is considered active and should be dropped
@@ -170,7 +170,7 @@ On receiving an `OverseerSignal::BlockFinalized(h)`, we fetch the block number `
#### `OverseerSignal::ActiveLeavesUpdate`
On receiving an `OverseerSignal::ActiveLeavesUpdate(update)`:
* We determine the set of new blocks that were not in our previous view. This is done by querying the ancestry of all new items in the view and contrasting against the stored `BlockNumber`s. Typically, there will be only one new block. We fetch the headers and information on these blocks from the ChainApi subsystem.
* We determine the set of new blocks that were not in our previous view. This is done by querying the ancestry of all new items in the view and contrasting against the stored `BlockNumber`s. Typically, there will be only one new block. We fetch the headers and information on these blocks from the ChainApi subsystem. Stale leaves in the update can be ignored.
* We update the `StoredBlockRange` and the `BlockNumber` maps.
* We use the RuntimeApiSubsystem to determine information about these blocks. It is generally safe to assume that runtime state is available for recent, unfinalized blocks. In the case that it isn't, it means that we are catching up to the head of the chain and needn't worry about assignments to those blocks anyway, as the security assumption of the protocol tolerates nodes being temporarily offline or out-of-date.
* We fetch the set of candidates included by each block by dispatching a `RuntimeApiRequest::CandidateEvents` and checking the `CandidateIncluded` events.
@@ -15,7 +15,7 @@ Output:
## Functionality
Upon receipt of an `ActiveLeavesUpdate`, launch bitfield signing job for each `activated` head. Stop the job for each `deactivated` head.
Upon receipt of an `ActiveLeavesUpdate`, launch bitfield signing job for each `activated` head referring to a fresh leaf. Stop the job for each `deactivated` head.
## Bitfield Signing Job
@@ -32,7 +32,7 @@ The subsystem should maintain a set of handles to Candidate Backing Jobs that ar
### On Overseer Signal
* If the signal is an [`OverseerSignal`][OverseerSignal]`::ActiveLeavesUpdate`:
* spawn a Candidate Backing Job for each `activated` head, storing a bidirectional channel with the Candidate Backing Job in the set of handles.
* spawn a Candidate Backing Job for each `activated` head referring to a fresh leaf, storing a bidirectional channel with the Candidate Backing Job in the set of handles.
* cease the Candidate Backing Job for each `deactivated` head, if any.
* If the signal is an [`OverseerSignal`][OverseerSignal]`::Conclude`: Forward conclude messages to all jobs, wait a small amount of time for them to join, and then exit.
@@ -26,6 +26,8 @@ The hierarchy of subsystems:
The overseer determines work to do based on block import events and block finalization events. It does this by keeping track of the set of relay-parents for which work is currently being done. This is known as the "active leaves" set. It determines an initial set of active leaves on startup based on the data on-disk, and uses events about blockchain import to update the active leaves. Updates lead to [`OverseerSignal`](../types/overseer-protocol.md#overseer-signal)`::ActiveLeavesUpdate` being sent according to new relay-parents, as well as relay-parents to stop considering. Block import events inform the overseer of leaves that no longer need to be built on, now that they have children, and inform us to begin building on those children. Block finalization events inform us when we can stop focusing on blocks that appear to have been orphaned.
The overseer is also responsible for tracking the freshness of active leaves. Leaves are fresh when they're encountered for the first time, and stale when they're encountered for subsequent times. This can occur after chain reversions or when the fork-choice rule abandons some chain. This distinction is used to manage **Reversion Safety**. Consensus messages are often localized to a specific relay-parent, and it is often a misbehavior to equivocate or sign two conflicting messages. When reverting the chain, we may begin work on a leaf that subsystems have already signed messages for. Subsystems which need to account for reversion safety should avoid performing work on stale leaves.
The overseer's logic can be described with these functions:
## On Startup
@@ -38,6 +40,7 @@ The overseer's logic can be described with these functions:
## On Block Import Event
* Apply the block import event to the active leaves. A new block should lead to its addition to the active leaves set and its parent being deactivated.
* Mark any stale leaves as stale. The overseer should track all leaves it activates to determine whether leaves are fresh or stale.
* Send an `OverseerSignal::ActiveLeavesUpdate` message to all subsystems containing all activated and deactivated leaves.
* Ensure all `ActiveLeavesUpdate` messages are flushed before resuming activity as a message router.
@@ -29,8 +29,18 @@ Either way, there will be some top-level type encapsulating messages from the ov
Indicates a change in active leaves. Activated leaves should have jobs, whereas deactivated leaves should lead to winding-down of work based on those leaves.
```rust
enum LeafStatus {
// A leaf is fresh when it's the first time the leaf has been encountered.
// Most leaves should be fresh.
Fresh,
// A leaf is stale when it's encountered for a subsequent time. This will
// happen when the chain is reverted or the fork-choice rule abandons some
// chain.
Stale,
}
struct ActiveLeavesUpdate {
activated: [(Hash, Number)], // in practice, these should probably be a SmallVec
activated: [(Hash, Number, LeafStatus)], // in practice, these should probably be a SmallVec
deactivated: [Hash],
}
```