Reversion Safety tools for overseer and subsystems (#3104)

* guide: reversion safety

* guide: manage reversion safety in subsystems

* add leaf status to ActivatedLeaf

* add an LRU-cache to overseer for staleness detection

* update ActivatedLeaf usages in tests to contain status field

* add variant where missed accidentally

* add some helpers to LeafStatus

* address grumbles
This commit is contained in:
Robert Habermeier
2021-05-31 13:54:05 -05:00
committed by GitHub
parent 6b166a7a1f
commit 963993d288
18 changed files with 270 additions and 20 deletions
@@ -29,7 +29,8 @@ use sc_network as network;
use sc_network::IfDisconnected;
use sc_network::config as netconfig;
use polkadot_subsystem::{ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf,
use polkadot_subsystem::{
ActiveLeavesUpdate, FromOverseer, OverseerSignal, ActivatedLeaf, LeafStatus,
messages::{
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, NetworkBridgeMessage,
RuntimeApiMessage, RuntimeApiRequest,
@@ -173,6 +174,7 @@ impl TestState {
activated: smallvec![ActivatedLeaf {
hash: new.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![old.clone()],
@@ -33,7 +33,9 @@ use polkadot_node_primitives::{PoV, BlockData};
use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
use polkadot_node_subsystem_util::TimeoutExt;
use polkadot_subsystem_testhelpers as test_helpers;
use polkadot_subsystem::{messages::{RuntimeApiMessage, RuntimeApiRequest}, jaeger, ActivatedLeaf};
use polkadot_subsystem::{
messages::{RuntimeApiMessage, RuntimeApiRequest}, jaeger, ActivatedLeaf, LeafStatus,
};
type VirtualOverseer = test_helpers::TestSubsystemContextHandle<AvailabilityRecoveryMessage>;
@@ -448,6 +450,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -529,6 +532,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -610,6 +614,7 @@ fn bad_merkle_path_leads_to_recovery_error() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -666,6 +671,7 @@ fn wrong_chunk_index_leads_to_recovery_error() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -739,6 +745,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -786,6 +793,7 @@ fn fast_path_backing_group_recovers() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -838,6 +846,7 @@ fn no_answers_in_fast_path_causes_chunk_requests() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -900,6 +909,7 @@ fn task_canceled_when_receivers_dropped() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -942,6 +952,7 @@ fn chunks_retry_until_all_nodes_respond() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -1000,6 +1011,7 @@ fn returns_early_if_we_have_the_data() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
@@ -1037,6 +1049,7 @@ fn does_not_query_local_validator() {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
+9 -1
View File
@@ -1157,7 +1157,7 @@ mod tests {
use sc_network::{Event as NetworkEvent, IfDisconnected};
use polkadot_subsystem::{jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal};
use polkadot_subsystem::{jaeger, ActiveLeavesUpdate, FromOverseer, OverseerSignal, LeafStatus};
use polkadot_subsystem::messages::{
ApprovalDistributionMessage,
BitfieldDistributionMessage,
@@ -1471,6 +1471,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: head,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1568,6 +1569,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1652,6 +1654,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1667,6 +1670,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_b,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1739,6 +1743,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -1956,6 +1961,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -2171,6 +2177,7 @@ mod tests {
ActiveLeavesUpdate::start_work(ActivatedLeaf {
hash: hash_b,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
})
))
@@ -2400,6 +2407,7 @@ mod tests {
activated: hashes.enumerate().map(|(i, h)| ActivatedLeaf {
hash: h,
number: i as _,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}).rev().collect(),
deactivated: Default::default(),
@@ -921,7 +921,7 @@ mod tests {
use polkadot_subsystem::{
jaeger,
messages::{RuntimeApiMessage, RuntimeApiRequest},
ActiveLeavesUpdate, ActivatedLeaf,
ActiveLeavesUpdate, ActivatedLeaf, LeafStatus,
};
use polkadot_subsystem_testhelpers as test_helpers;
@@ -1159,6 +1159,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: test_state.relay_parent,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: [][..].into(),
@@ -18,7 +18,7 @@
use super::*;
use polkadot_node_subsystem::{
jaeger, ActivatedLeaf,
jaeger, ActivatedLeaf, LeafStatus,
messages::{RuntimeApiMessage, RuntimeApiRequest},
};
use polkadot_node_subsystem_test_helpers as test_helpers;
@@ -73,6 +73,7 @@ async fn overseer_signal_active_leaves(
let leaf = ActivatedLeaf {
hash: leaf,
number: 0xdeadcafe,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
};
overseer
@@ -903,7 +903,7 @@ async fn circulate_statement_and_dependents(
fn statement_message(relay_parent: Hash, statement: SignedFullStatement)
-> protocol_v1::ValidationProtocol
{
{
let msg = if is_statement_large(&statement) {
protocol_v1::StatementDistributionMessage::LargeStatement(
StatementMetadata {
@@ -1198,7 +1198,7 @@ async fn retrieve_statement_from_message<'a>(
).await {
vacant.insert(new_status);
}
}
}
protocol_v1::StatementDistributionMessage::Statement(_, s) => {
// No fetch in progress, safe to return any statement immediately (we don't bother
// about normal network jitter which might cause `Valid` statements to arrive early
@@ -1594,7 +1594,7 @@ impl StatementDistribution {
match result {
Ok(true) => break,
Ok(false) => {}
Err(Error(Fault::Fatal(f))) => return Err(f),
Err(Error(Fault::Fatal(f))) => return Err(f),
Err(Error(Fault::Err(error))) =>
tracing::debug!(target: LOG_TARGET, ?error)
}
@@ -2072,7 +2072,9 @@ mod tests {
use sp_keystore::{CryptoStore, SyncCryptoStorePtr, SyncCryptoStore};
use sc_keystore::LocalKeystore;
use polkadot_node_network_protocol::{view, ObservedRole, request_response::Recipient};
use polkadot_subsystem::{jaeger, ActivatedLeaf, messages::{RuntimeApiMessage, RuntimeApiRequest}};
use polkadot_subsystem::{
jaeger, ActivatedLeaf, messages::{RuntimeApiMessage, RuntimeApiRequest}, LeafStatus,
};
use polkadot_node_network_protocol::request_response::{
Requests,
v1::{
@@ -2690,6 +2692,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -2865,6 +2868,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -3336,6 +3340,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -3591,6 +3596,7 @@ mod tests {
activated: vec![ActivatedLeaf {
hash: hash_a,
number: 1,
status: LeafStatus::Fresh,
span: Arc::new(jaeger::Span::Disabled),
}].into(),
deactivated: vec![].into(),
@@ -3634,7 +3640,7 @@ mod tests {
NetworkBridgeEvent::PeerViewChange(peer_a.clone(), view![hash_a])
)
}).await;
// receive a seconded statement from peer A.
let statement = {
let signing_context = SigningContext {