Retry availability until the receiver of the request is dropped (#2763)

* guide updates

* keep interactions alive until receivers drop

* retry indefinitely

* cancel approval tasks on finality

* use swap_remove instead of remove
This commit is contained in:
Robert Habermeier
2021-03-30 17:33:38 +02:00
committed by GitHub
parent 6514e00144
commit 08d5b268a0
6 changed files with 341 additions and 266 deletions
@@ -141,11 +141,19 @@ async fn overseer_recv(
use sp_keyring::Sr25519Keyring;
#[derive(Debug, Clone)]
#[derive(Debug)]
enum Has {
No,
Yes,
Timeout,
NetworkError(sc_network::RequestFailure),
}
impl Has {
fn timeout() -> Self {
Has::NetworkError(sc_network::RequestFailure::Network(
sc_network::OutboundFailure::Timeout
))
}
}
#[derive(Clone)]
@@ -172,18 +180,6 @@ impl TestState {
self.validators.len() - self.threshold() + 1
}
fn all_have(&self) -> Vec<Has> {
(0..self.validators.len()).map(|_| Has::Yes).collect()
}
fn all_dont_have(&self) -> Vec<Has> {
(0..self.validators.len()).map(|_| Has::Yes).collect()
}
fn all_timeout(&self) -> Vec<Has> {
(0..self.validators.len()).map(|_| Has::Timeout).collect()
}
async fn test_runtime_api(
&self,
virtual_overseer: &mut VirtualOverseer,
@@ -216,7 +212,7 @@ impl TestState {
candidate_hash: CandidateHash,
virtual_overseer: &mut VirtualOverseer,
n: usize,
who_has: &[Has],
who_has: impl Fn(usize) -> Has,
) {
// arbitrary order.
for _ in 0..n {
@@ -237,14 +233,10 @@ impl TestState {
assert_eq!(req.payload.candidate_hash, candidate_hash);
let validator_index = req.payload.index.0 as usize;
let available_data = match who_has[validator_index] {
let available_data = match who_has(validator_index) {
Has::No => Ok(None),
Has::Yes => Ok(Some(self.chunks[validator_index].clone().into())),
Has::Timeout => {
Err(sc_network::RequestFailure::Network(
sc_network::OutboundFailure::Timeout
))
}
Has::NetworkError(e) => Err(e),
};
let _ = req.pending_response.send(
@@ -263,7 +255,7 @@ impl TestState {
&self,
candidate_hash: CandidateHash,
virtual_overseer: &mut VirtualOverseer,
who_has: &[Has],
who_has: impl Fn(usize) -> Has,
) {
for _ in 0..self.validators.len() {
// Receive a request for a chunk.
@@ -286,27 +278,21 @@ impl TestState {
.position(|a| Recipient::Authority(a.clone()) == req.peer)
.unwrap();
let available_data = match who_has[validator_index] {
let available_data = match who_has(validator_index) {
Has::No => Ok(None),
Has::Yes => Ok(Some(self.available_data.clone())),
Has::Timeout => {
Err(sc_network::RequestFailure::Network(
sc_network::OutboundFailure::Timeout
))
}
Has::NetworkError(e) => Err(e),
};
let done = available_data.as_ref().ok().map_or(false, |x| x.is_some());
let _ = req.pending_response.send(
available_data.map(|r|
req_res::v1::AvailableDataFetchingResponse::from(r).encode()
)
);
match who_has[validator_index].clone() {
Has::Yes => break, // done
Has::No => {}
Has::Timeout => {}
}
if done { break }
}
)
}
@@ -448,7 +434,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() {
candidate_hash,
&mut virtual_overseer,
test_state.threshold(),
&test_state.all_have(),
|_| Has::Yes,
).await;
// Recovered data should match the original one.
@@ -477,7 +463,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() {
new_candidate.hash(),
&mut virtual_overseer,
test_state.impossibility_threshold(),
&test_state.all_dont_have(),
|_| Has::No,
).await;
// A request times out with `Unavailable` error.
@@ -524,7 +510,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk
candidate_hash,
&mut virtual_overseer,
test_state.threshold(),
&test_state.all_have(),
|_| Has::Yes,
).await;
// Recovered data should match the original one.
@@ -553,7 +539,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk
new_candidate.hash(),
&mut virtual_overseer,
test_state.impossibility_threshold(),
&test_state.all_dont_have(),
|_| Has::No,
).await;
// A request times out with `Unavailable` error.
@@ -607,7 +593,7 @@ fn bad_merkle_path_leads_to_recovery_error() {
candidate_hash,
&mut virtual_overseer,
test_state.impossibility_threshold(),
&test_state.all_have(),
|_| Has::Yes,
).await;
// A request times out with `Unavailable` error.
@@ -656,14 +642,11 @@ fn wrong_chunk_index_leads_to_recovery_error() {
test_state.chunks[3] = test_state.chunks[0].clone();
test_state.chunks[4] = test_state.chunks[0].clone();
let mut have = test_state.all_dont_have();
have[0] = Has::No;
test_state.test_chunk_requests(
candidate_hash,
&mut virtual_overseer,
test_state.impossibility_threshold(),
&have,
|_| Has::No,
).await;
// A request times out with `Unavailable` error as there are no good peers.
@@ -726,7 +709,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() {
candidate_hash,
&mut virtual_overseer,
test_state.threshold(),
&test_state.all_have(),
|_| Has::Yes,
).await;
// f+1 'valid' chunks can't produce correct data.
@@ -769,13 +752,15 @@ fn fast_path_backing_group_recovers() {
let candidate_hash = test_state.candidate.hash();
let mut who_has = test_state.all_dont_have();
who_has[3] = Has::Yes;
let who_has = |i| match i {
3 => Has::Yes,
_ => Has::No,
};
test_state.test_full_data_requests(
candidate_hash,
&mut virtual_overseer,
&who_has,
who_has,
).await;
// Recovered data should match the original one.
@@ -819,24 +804,124 @@ fn no_answers_in_fast_path_causes_chunk_requests() {
let candidate_hash = test_state.candidate.hash();
// mix of timeout and no.
let mut who_has = test_state.all_timeout();
who_has[0] = Has::No;
who_has[3] = Has::No;
let who_has = |i| match i {
0 | 3 => Has::No,
_ => Has::timeout(),
};
test_state.test_full_data_requests(
candidate_hash,
&mut virtual_overseer,
&who_has,
who_has,
).await;
test_state.test_chunk_requests(
candidate_hash,
&mut virtual_overseer,
test_state.threshold(),
&test_state.all_have(),
|_| Has::Yes,
).await;
// Recovered data should match the original one.
assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data);
});
}
#[test]
fn task_canceled_when_receivers_dropped() {
let test_state = TestState::default();
test_harness_chunks_only(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
}),
).await;
let (tx, _) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
None,
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
for _ in 0..test_state.validators.len() {
match virtual_overseer.recv().timeout(TIMEOUT).await {
None => return,
Some(_) => continue,
}
}
panic!("task requested all validators without concluding")
});
}
#[test]
fn chunks_retry_until_all_nodes_respond() {
let test_state = TestState::default();
test_harness_chunks_only(|test_harness| async move {
let TestHarness { mut virtual_overseer } = test_harness;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![ActivatedLeaf {
hash: test_state.current.clone(),
number: 1,
span: Arc::new(jaeger::Span::Disabled),
}],
deactivated: smallvec![],
}),
).await;
let (tx, rx) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
AvailabilityRecoveryMessage::RecoverAvailableData(
test_state.candidate.clone(),
test_state.session_index,
Some(GroupIndex(0)),
tx,
)
).await;
test_state.test_runtime_api(&mut virtual_overseer).await;
let candidate_hash = test_state.candidate.hash();
test_state.test_chunk_requests(
candidate_hash,
&mut virtual_overseer,
test_state.validators.len(),
|_| Has::timeout(),
).await;
// we get to go another round!
test_state.test_chunk_requests(
candidate_hash,
&mut virtual_overseer,
test_state.impossibility_threshold(),
|_| Has::No,
).await;
// Recovered data should match the original one.
assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable);
});
}