A fast-path for requesting AvailableData from backing validators (#2453)

* guide changes for a fast-path requesting from backing validators

* add backing group to availability recovery message

* add new phase to interaction

* typos

* add full data messages

* handle new network messages

* dispatch full data requests

* cleanup

* check chunk index

* test for invalid recovery

* tests

* Typos.

* fix some grumbles

* be more explicit about error handling and control flow

* fast-path param

* use with_chunks_only in Service

Co-authored-by: Robert Klotzner <robert.klotzner@gmx.at>
This commit is contained in:
Robert Habermeier
2021-02-17 13:51:50 -06:00
committed by GitHub
parent 4a5e5f13ae
commit b7aac51341
11 changed files with 950 additions and 196 deletions
@@ -38,7 +38,7 @@ struct TestHarness {
virtual_overseer: VirtualOverseer,
}
fn test_harness<T: Future<Output = ()>>(
fn test_harness_fast_path<T: Future<Output = ()>>(
test: impl FnOnce(TestHarness) -> T,
) {
let _ = env_logger::builder()
@@ -53,7 +53,33 @@ fn test_harness<T: Future<Output = ()>>(
let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
let subsystem = AvailabilityRecoverySubsystem::new();
let subsystem = AvailabilityRecoverySubsystem::with_fast_path();
let subsystem = subsystem.run(context);
let test_fut = test(TestHarness { virtual_overseer });
futures::pin_mut!(test_fut);
futures::pin_mut!(subsystem);
executor::block_on(future::select(test_fut, subsystem));
}
fn test_harness_chunks_only<T: Future<Output = ()>>(
test: impl FnOnce(TestHarness) -> T,
) {
let _ = env_logger::builder()
.is_test(true)
.filter(
Some("polkadot_availability_recovery"),
log::LevelFilter::Trace,
)
.try_init();
let pool = sp_core::testing::TaskExecutor::new();
let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
let subsystem = AvailabilityRecoverySubsystem::with_chunks_only();
let subsystem = subsystem.run(context);
let test_fut = test(TestHarness { virtual_overseer });
@@ -112,6 +138,14 @@ async fn overseer_recv(
use sp_keyring::Sr25519Keyring;
#[derive(Debug, Clone)]
enum HasAvailableData {
No,
Yes,
Timeout,
Other(AvailableData),
}
#[derive(Clone)]
struct TestState {
validators: Vec<Sr25519Keyring>,
@@ -149,21 +183,31 @@ impl TestState {
tx.send(Ok(Some(SessionInfo {
validators: self.validator_public.clone(),
discovery_keys: self.validator_authority_id.clone(),
// all validators in the same group.
validator_groups: vec![(0..self.validators.len()).map(|i| i as ValidatorIndex).collect()],
..Default::default()
}))).unwrap();
}
);
}
async fn test_connect_to_all_validators(
&self,
virtual_overseer: &mut VirtualOverseer,
) {
self.test_connect_to_validators(virtual_overseer, self.validator_public.len()).await;
}
async fn test_connect_to_validators(
&self,
virtual_overseer: &mut VirtualOverseer,
n: usize,
) {
// Channels by AuthorityDiscoveryId to send results to.
// Gather them here and send in batch after the loop not to race.
let mut results = HashMap::new();
for _ in 0..self.validator_public.len() {
for _ in 0..n {
// Connect to shuffled validators one by one.
assert_matches!(
overseer_recv(virtual_overseer).await,
@@ -305,6 +349,84 @@ impl TestState {
}
}
}
async fn test_full_data_requests(
&self,
candidate_hash: CandidateHash,
virtual_overseer: &mut VirtualOverseer,
who_has: &[HasAvailableData],
) {
for _ in 0..self.validator_public.len() {
self.test_connect_to_validators(virtual_overseer, 1).await;
// Receive a request for a chunk.
assert_matches!(
overseer_recv(virtual_overseer).await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::SendValidationMessage(
peers,
protocol_v1::ValidationProtocol::AvailabilityRecovery(wire_message),
)
) => {
let (request_id, validator_index) = assert_matches!(
wire_message,
protocol_v1::AvailabilityRecoveryMessage::RequestFullData(
request_id,
candidate_hash_recvd,
) => {
assert_eq!(candidate_hash_recvd, candidate_hash);
assert_eq!(peers.len(), 1);
let validator_index = self.validator_peer_id.iter().position(|p| p == &peers[0]).unwrap();
(request_id, validator_index)
}
);
let available_data = match who_has[validator_index] {
HasAvailableData::No => Some(None),
HasAvailableData::Yes => Some(Some(self.available_data.clone())),
HasAvailableData::Timeout => None,
HasAvailableData::Other(ref other) => Some(Some(other.clone())),
};
if let Some(maybe_data) = available_data {
overseer_send(
virtual_overseer,
AvailabilityRecoveryMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerMessage(
self.validator_peer_id[validator_index].clone(),
protocol_v1::AvailabilityRecoveryMessage::FullData(
request_id,
maybe_data,
)
)
)
).await;
}
match who_has[validator_index] {
HasAvailableData::Yes => break, // done
HasAvailableData::No => {}
HasAvailableData::Timeout => { Delay::new(FULL_DATA_REQUEST_TIMEOUT).await }
HasAvailableData::Other(_) => {
assert_matches!(
overseer_recv(virtual_overseer).await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ReportPeer(
p,
rep,
)
) => {
assert_eq!(p, self.validator_peer_id[validator_index]);
assert_eq!(rep, COST_INVALID_AVAILABLE_DATA);
}
);
}
}
}
);
}
}
}
@@ -319,8 +441,13 @@ fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec<AuthorityDiscoveryI
fn derive_erasure_chunks_with_proofs_and_root(
n_validators: usize,
available_data: &AvailableData,
alter_chunk: impl Fn(usize, &mut Vec<u8>),
) -> (Vec<ErasureChunk>, Hash) {
let chunks: Vec<Vec<u8>> = obtain_chunks(n_validators, available_data).unwrap();
let mut chunks: Vec<Vec<u8>> = obtain_chunks(n_validators, available_data).unwrap();
for (i, chunk) in chunks.iter_mut().enumerate() {
alter_chunk(i, chunk)
}
// create proofs for each erasure chunk
let branches = branches(chunks.as_ref());
@@ -379,6 +506,7 @@ impl Default for TestState {
let (chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root(
validators.len(),
&available_data,
|_, _| {},
);
candidate.descriptor.erasure_root = erasure_root;
@@ -400,10 +528,10 @@ impl Default for TestState {
}
#[test]
fn availability_is_recovered() {
fn availability_is_recovered_from_chunks_if_no_group_provided() {
let test_state = TestState::default();
test_harness(|test_harness| async move {
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
@@ -421,13 +549,14 @@ fn availability_is_recovered() {
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_validators(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
@@ -448,13 +577,14 @@ fn availability_is_recovered() {
AvailabilityRecoveryMessage::RecoverAvailableData(
new_candidate,
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_validators(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
// A request times out with `Unavailable` error.
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable);
@@ -462,10 +592,10 @@ fn availability_is_recovered() {
}
#[test]
fn a_faulty_chunk_leads_to_recovery_error() {
let mut test_state = TestState::default();
fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunks_only() {
let test_state = TestState::default();
test_harness(|test_harness| async move {
test_harness_chunks_only(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
@@ -483,22 +613,90 @@ fn a_faulty_chunk_leads_to_recovery_error() {
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
Some(GroupIndex(0)),
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
test_state.test_chunk_requests(candidate_hash, &mut virtual_overseer).await;
// Recovered data should match the original one.
assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data);
let (tx, rx) = oneshot::channel();
// Test another candidate, send no chunks.
let mut new_candidate = CandidateReceipt::default();
new_candidate.descriptor.relay_parent = test_state.candidate.descriptor.relay_parent;
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
new_candidate,
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_validators(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
// A request times out with `Unavailable` error.
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable);
});
}
#[test]
fn bad_merkle_path_leads_to_recovery_error() {
let mut test_state = TestState::default();
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![(test_state.current.clone(), Arc::new(JaegerSpan::Disabled))],
deactivated: smallvec![],
}),
).await;
let (tx, rx) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
// Create some faulty chunks.
test_state.chunks[0].chunk = vec![1; 32];
test_state.chunks[1].chunk = vec![2; 32];
test_state.chunks[0].chunk = vec![0; 32];
test_state.chunks[1].chunk = vec![1; 32];
test_state.chunks[2].chunk = vec![2; 32];
test_state.chunks[3].chunk = vec![3; 32];
let mut faulty = vec![false; test_state.chunks.len()];
faulty[0] = true;
faulty[1] = true;
faulty[2] = true;
faulty[3] = true;
test_state.test_faulty_chunk_requests(
candidate_hash,
@@ -507,15 +705,15 @@ fn a_faulty_chunk_leads_to_recovery_error() {
).await;
// A request times out with `Unavailable` error.
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Invalid);
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable);
});
}
#[test]
fn a_wrong_chunk_leads_to_recovery_error() {
fn wrong_chunk_index_leads_to_recovery_error() {
let mut test_state = TestState::default();
test_harness(|test_harness| async move {
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
@@ -533,23 +731,25 @@ fn a_wrong_chunk_leads_to_recovery_error() {
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_validators(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
// Send a wrong chunk so it passes proof check but fails to reconstruct.
// These chunks should fail the index check as they don't have the correct index for validator.
test_state.chunks[1] = test_state.chunks[0].clone();
test_state.chunks[2] = test_state.chunks[0].clone();
test_state.chunks[3] = test_state.chunks[0].clone();
test_state.chunks[4] = test_state.chunks[0].clone();
let faulty = vec![false; test_state.chunks.len()];
let mut faulty = vec![true; test_state.chunks.len()];
faulty[0] = false;
test_state.test_faulty_chunk_requests(
candidate_hash,
@@ -557,7 +757,206 @@ fn a_wrong_chunk_leads_to_recovery_error() {
&faulty,
).await;
// A request times out with `Unavailable` error.
// A request times out with `Unavailable` error as there are no good peers.
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable);
});
}
#[test]
fn invalid_erasure_coding_leads_to_invalid_error() {
let mut test_state = TestState::default();
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
let pov = PoV {
block_data: BlockData(vec![69; 64]),
};
let (bad_chunks, bad_erasure_root) = derive_erasure_chunks_with_proofs_and_root(
test_state.chunks.len(),
&AvailableData {
validation_data: test_state.persisted_validation_data.clone(),
pov: Arc::new(pov),
},
|i, chunk| *chunk = vec![i as u8; 32],
);
test_state.chunks = bad_chunks;
test_state.candidate.descriptor.erasure_root = bad_erasure_root;
let candidate_hash = test_state.candidate.hash();
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![(test_state.current.clone(), Arc::new(JaegerSpan::Disabled))],
deactivated: smallvec![],
}),
).await;
let (tx, rx) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
test_state.test_chunk_requests(
candidate_hash,
&mut virtual_overseer,
).await;
// A request times out with `Unavailable` error as there are no good peers.
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Invalid);
});
}
#[test]
fn fast_path_backing_group_recovers() {
let test_state = TestState::default();
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![(test_state.current.clone(), Arc::new(JaegerSpan::Disabled))],
deactivated: smallvec![],
}),
).await;
let (tx, rx) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
Some(GroupIndex(0)),
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
let mut who_has: Vec<_> = (0..test_state.validators.len()).map(|_| HasAvailableData::No).collect();
who_has[3] = HasAvailableData::Yes;
test_state.test_full_data_requests(
candidate_hash,
&mut virtual_overseer,
&who_has,
).await;
// Recovered data should match the original one.
assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data);
});
}
#[test]
fn wrong_data_from_fast_path_peer_leads_to_punishment() {
let test_state = TestState::default();
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![(test_state.current.clone(), Arc::new(JaegerSpan::Disabled))],
deactivated: smallvec![],
}),
).await;
let (tx, _rx) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
Some(GroupIndex(0)),
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
let mut a = test_state.available_data.clone();
a.pov = Arc::new(PoV { block_data: BlockData(vec![69; 420]) });
let who_has: Vec<_> = (0..test_state.validators.len()).map(|_| HasAvailableData::Other(a.clone())).collect();
// This function implicitly punishes.
test_state.test_full_data_requests(
candidate_hash,
&mut virtual_overseer,
&who_has,
).await;
});
}
#[test]
fn no_answers_in_fast_path_causes_chunk_requests() {
let test_state = TestState::default();
test_harness_fast_path(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![(test_state.current.clone(), Arc::new(JaegerSpan::Disabled))],
deactivated: smallvec![],
}),
).await;
let (tx, rx) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
Some(GroupIndex(0)),
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
// mix of timeout and no.
let mut who_has: Vec<_> = (0..test_state.validators.len()).map(|_| HasAvailableData::Timeout).collect();
who_has[0] = HasAvailableData::No;
who_has[3] = HasAvailableData::No;
test_state.test_full_data_requests(
candidate_hash,
&mut virtual_overseer,
&who_has,
).await;
test_state.test_connect_to_all_validators(&mut virtual_overseer).await;
test_state.test_chunk_requests(candidate_hash, &mut virtual_overseer).await;
// Recovered data should match the original one.
assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data);
});
}