PoV Distribution optimization (#1990)

* Initial commit

* Remove unnecessary struct

* Some review nits

* Update node/network/pov-distribution/src/lib.rs

* Update parachain/test-parachains/adder/collator/tests/integration.rs

* Review nits

* notify_all_we_are_awaiting

* Both ways of peers connections should work the same

* Add mod-level docs to error.rs

* Avoid multiple connection requests at same parent

* Dont bail on errors

* FusedStream for ConnectionRequests

* Fix build after merge

* Improve error handling

* Remove whitespace formatting
This commit is contained in:
Fedor Sakharov
2020-11-25 12:20:54 +03:00
committed by GitHub
parent 51f6cb1979
commit 994d621f2c
6 changed files with 900 additions and 80 deletions
@@ -1,7 +1,21 @@
use super::*;
use futures::executor;
use polkadot_primitives::v1::BlockData;
use std::time::Duration;
use assert_matches::assert_matches;
use futures::executor;
use tracing::trace;
use smallvec::smallvec;
use sp_keyring::Sr25519Keyring;
use polkadot_primitives::v1::{
AuthorityDiscoveryId, BlockData, CoreState, GroupRotationInfo, Id as ParaId,
ScheduledCore, ValidatorIndex,
};
use polkadot_subsystem::messages::{RuntimeApiMessage, RuntimeApiRequest};
use polkadot_node_subsystem_test_helpers as test_helpers;
use polkadot_node_subsystem_util::TimeoutExt;
fn make_pov(data: Vec<u8>) -> PoV {
PoV { block_data: BlockData(data) }
@@ -15,6 +29,482 @@ fn make_peer_state(awaited: Vec<(Hash, Vec<Hash>)>)
}
}
fn validator_pubkeys(val_ids: &[Sr25519Keyring]) -> Vec<ValidatorId> {
val_ids.iter().map(|v| v.public().into()).collect()
}
fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec<AuthorityDiscoveryId> {
val_ids.iter().map(|v| v.public().into()).collect()
}
struct TestHarness {
virtual_overseer: test_helpers::TestSubsystemContextHandle<PoVDistributionMessage>,
}
fn test_harness<T: Future<Output = ()>>(
test: impl FnOnce(TestHarness) -> T,
) {
let _ = env_logger::builder()
.is_test(true)
.filter(
Some("polkadot_pov_distribution"),
log::LevelFilter::Trace,
)
.filter(
Some(LOG_TARGET),
log::LevelFilter::Trace,
)
.try_init();
let pool = sp_core::testing::TaskExecutor::new();
let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
let subsystem = super::PoVDistribution::new(Metrics::default());
let subsystem = subsystem.run(context);
let test_fut = test(TestHarness { virtual_overseer });
futures::pin_mut!(test_fut);
futures::pin_mut!(subsystem);
executor::block_on(future::select(test_fut, subsystem));
}
const TIMEOUT: Duration = Duration::from_millis(100);
async fn overseer_send(
overseer: &mut test_helpers::TestSubsystemContextHandle<PoVDistributionMessage>,
msg: PoVDistributionMessage,
) {
trace!("Sending message:\n{:?}", &msg);
overseer
.send(FromOverseer::Communication { msg })
.timeout(TIMEOUT)
.await
.expect(&format!("{:?} is more than enough for sending messages.", TIMEOUT));
}
async fn overseer_recv(
overseer: &mut test_helpers::TestSubsystemContextHandle<PoVDistributionMessage>,
) -> AllMessages {
let msg = overseer_recv_with_timeout(overseer, TIMEOUT)
.await
.expect(&format!("{:?} is more than enough to receive messages", TIMEOUT));
trace!("Received message:\n{:?}", &msg);
msg
}
async fn overseer_recv_with_timeout(
overseer: &mut test_helpers::TestSubsystemContextHandle<PoVDistributionMessage>,
timeout: Duration,
) -> Option<AllMessages> {
trace!("Waiting for message...");
overseer
.recv()
.timeout(timeout)
.await
}
async fn overseer_signal(
overseer: &mut test_helpers::TestSubsystemContextHandle<PoVDistributionMessage>,
signal: OverseerSignal,
) {
overseer
.send(FromOverseer::Signal(signal))
.timeout(TIMEOUT)
.await
.expect(&format!("{:?} is more than enough for sending signals.", TIMEOUT));
}
#[derive(Clone)]
struct TestState {
chain_ids: Vec<ParaId>,
validators: Vec<Sr25519Keyring>,
validator_public: Vec<ValidatorId>,
validator_authority_id: Vec<AuthorityDiscoveryId>,
validator_peer_id: Vec<PeerId>,
validator_groups: (Vec<Vec<ValidatorIndex>>, GroupRotationInfo),
relay_parent: Hash,
availability_cores: Vec<CoreState>,
}
impl Default for TestState {
fn default() -> Self {
let chain_a = ParaId::from(1);
let chain_b = ParaId::from(2);
let chain_ids = vec![chain_a, chain_b];
let validators = vec![
Sr25519Keyring::Alice,
Sr25519Keyring::Bob,
Sr25519Keyring::Charlie,
Sr25519Keyring::Dave,
Sr25519Keyring::Ferdie,
];
let validator_public = validator_pubkeys(&validators);
let validator_authority_id = validator_authority_id(&validators);
let validator_peer_id = std::iter::repeat_with(|| PeerId::random())
.take(validator_public.len())
.collect();
let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]];
let group_rotation_info = GroupRotationInfo {
session_start_block: 0,
group_rotation_frequency: 100,
now: 1,
};
let validator_groups = (validator_groups, group_rotation_info);
let availability_cores = vec![
CoreState::Scheduled(ScheduledCore {
para_id: chain_ids[0],
collator: None,
}),
CoreState::Scheduled(ScheduledCore {
para_id: chain_ids[1],
collator: None,
}),
];
let relay_parent = Hash::repeat_byte(0x05);
Self {
chain_ids,
validators,
validator_public,
validator_authority_id,
validator_peer_id,
validator_groups,
relay_parent,
availability_cores,
}
}
}
#[test]
fn ask_validators_for_povs() {
let test_state = TestState::default();
test_harness(|test_harness| async move {
let mut virtual_overseer = test_harness.virtual_overseer;
let pov_block = PoV {
block_data: BlockData(vec![42, 43, 44]),
};
let pov_hash = pov_block.hash();
let mut candidate = CandidateDescriptor::default();
let current = test_state.relay_parent.clone();
candidate.para_id = test_state.chain_ids[0];
candidate.pov_hash = pov_hash;
candidate.relay_parent = test_state.relay_parent;
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![test_state.relay_parent.clone()],
deactivated: smallvec![],
}),
).await;
// first subsystem will try to obtain validators.
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Validators(tx),
)) => {
assert_eq!(relay_parent, current);
tx.send(Ok(test_state.validator_public.clone())).unwrap();
}
);
let (tx, pov_fetch_result) = oneshot::channel();
overseer_send(
&mut virtual_overseer,
PoVDistributionMessage::FetchPoV(test_state.relay_parent.clone(), candidate, tx),
).await;
// obtain the availability cores.
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::AvailabilityCores(tx)
)) => {
assert_eq!(relay_parent, current);
tx.send(Ok(test_state.availability_cores.clone())).unwrap();
}
);
// Obtain the validator groups
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorGroups(tx)
)) => {
assert_eq!(relay_parent, current);
tx.send(Ok(test_state.validator_groups.clone())).unwrap();
}
);
// obtain the validators per relay parent
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Validators(tx),
)) => {
assert_eq!(relay_parent, current);
tx.send(Ok(test_state.validator_public.clone())).unwrap();
}
);
// obtain the validator_id to authority_id mapping
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorDiscovery(validators, tx),
)) => {
assert_eq!(relay_parent, current);
assert_eq!(validators.len(), 3);
assert!(validators.iter().all(|v| test_state.validator_public.contains(&v)));
let result = vec![
Some(test_state.validator_authority_id[2].clone()),
Some(test_state.validator_authority_id[0].clone()),
Some(test_state.validator_authority_id[4].clone()),
];
tx.send(Ok(result)).unwrap();
}
);
// We now should connect to our validator group.
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ConnectToValidators {
validator_ids,
mut connected,
..
}
) => {
assert_eq!(validator_ids.len(), 3);
assert!(validator_ids.iter().all(|id| test_state.validator_authority_id.contains(id)));
let result = vec![
(test_state.validator_authority_id[2].clone(), test_state.validator_peer_id[2].clone()),
(test_state.validator_authority_id[0].clone(), test_state.validator_peer_id[0].clone()),
(test_state.validator_authority_id[4].clone(), test_state.validator_peer_id[4].clone()),
];
result.into_iter().for_each(|r| connected.try_send(r).unwrap());
}
);
for i in vec![2, 0, 4] {
overseer_send(
&mut virtual_overseer,
PoVDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerViewChange(
test_state.validator_peer_id[i].clone(),
View(vec![current]),
)
)
).await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage(
to_peers,
payload,
)) => {
assert_eq!(to_peers, vec![test_state.validator_peer_id[i].clone()]);
assert_eq!(payload, awaiting_message(current.clone(), vec![pov_hash.clone()]));
}
);
}
overseer_send(
&mut virtual_overseer,
PoVDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerMessage(
test_state.validator_peer_id[2].clone(),
protocol_v1::PoVDistributionMessage::SendPoV(current, pov_hash, pov_block.clone()),
)
)
).await;
assert_eq!(*pov_fetch_result.await.unwrap(), pov_block);
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(id, benefit)) => {
assert_eq!(benefit, BENEFIT_FRESH_POV);
assert_eq!(id, test_state.validator_peer_id[2].clone());
}
);
// Now let's test that if some peer is ahead of us we would still
// send `Await` on `FetchPoV` message to it.
let next_leaf = Hash::repeat_byte(10);
// A validator's view changes and now is lets say ahead of us.
overseer_send(
&mut virtual_overseer,
PoVDistributionMessage::NetworkBridgeUpdateV1(
NetworkBridgeEvent::PeerViewChange(
test_state.validator_peer_id[2].clone(),
View(vec![next_leaf]),
)
)
).await;
let pov_block = PoV {
block_data: BlockData(vec![45, 46, 47]),
};
let pov_hash = pov_block.hash();
let candidate = CandidateDescriptor {
para_id: test_state.chain_ids[0],
pov_hash,
relay_parent: next_leaf.clone(),
..Default::default()
};
let (tx, _pov_fetch_result) = oneshot::channel();
overseer_signal(
&mut virtual_overseer,
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
activated: smallvec![next_leaf.clone()],
deactivated: smallvec![current.clone()],
})
).await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Validators(tx),
)) => {
assert_eq!(relay_parent, next_leaf);
tx.send(Ok(test_state.validator_public.clone())).unwrap();
}
);
overseer_send(
&mut virtual_overseer,
PoVDistributionMessage::FetchPoV(next_leaf.clone(), candidate, tx),
).await;
// Obtain the availability cores.
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::AvailabilityCores(tx)
)) => {
assert_eq!(relay_parent, next_leaf);
tx.send(Ok(test_state.availability_cores.clone())).unwrap();
}
);
// Obtain the validator groups
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorGroups(tx)
)) => {
assert_eq!(relay_parent, next_leaf);
tx.send(Ok(test_state.validator_groups.clone())).unwrap();
}
);
// obtain the validators per relay parent
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Validators(tx),
)) => {
assert_eq!(relay_parent, next_leaf);
tx.send(Ok(test_state.validator_public.clone())).unwrap();
}
);
// obtain the validator_id to authority_id mapping
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorDiscovery(validators, tx),
)) => {
assert_eq!(relay_parent, next_leaf);
assert_eq!(validators.len(), 3);
assert!(validators.iter().all(|v| test_state.validator_public.contains(&v)));
let result = vec![
Some(test_state.validator_authority_id[2].clone()),
Some(test_state.validator_authority_id[0].clone()),
Some(test_state.validator_authority_id[4].clone()),
];
tx.send(Ok(result)).unwrap();
}
);
// We now should connect to our validator group.
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ConnectToValidators {
validator_ids,
mut connected,
..
}
) => {
assert_eq!(validator_ids.len(), 3);
assert!(validator_ids.iter().all(|id| test_state.validator_authority_id.contains(id)));
let result = vec![
(test_state.validator_authority_id[2].clone(), test_state.validator_peer_id[2].clone()),
(test_state.validator_authority_id[0].clone(), test_state.validator_peer_id[0].clone()),
(test_state.validator_authority_id[4].clone(), test_state.validator_peer_id[4].clone()),
];
result.into_iter().for_each(|r| connected.try_send(r).unwrap());
}
);
// We already know that the leaf in question in the peer's view so we request
// a chunk from them right away.
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage(
to_peers,
payload,
)) => {
assert_eq!(to_peers, vec![test_state.validator_peer_id[2].clone()]);
assert_eq!(payload, awaiting_message(next_leaf.clone(), vec![pov_hash.clone()]));
}
);
});
}
#[test]
fn distributes_to_those_awaiting_and_completes_local() {
let hash_a: Hash = [0; 32].into();
@@ -66,6 +556,7 @@ fn distributes_to_those_awaiting_and_completes_local() {
},
our_view: View(vec![hash_a, hash_b]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -103,8 +594,10 @@ fn distributes_to_those_awaiting_and_completes_local() {
});
}
#[test]
fn we_inform_peers_with_same_view_we_are_awaiting() {
let hash_a: Hash = [0; 32].into();
let hash_b: Hash = [1; 32].into();
@@ -146,6 +639,7 @@ fn we_inform_peers_with_same_view_we_are_awaiting() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -153,29 +647,135 @@ fn we_inform_peers_with_same_view_we_are_awaiting() {
let mut descriptor = CandidateDescriptor::default();
descriptor.pov_hash = pov_hash;
let para_id_1 = ParaId::from(1);
let para_id_2 = ParaId::from(2);
descriptor.para_id = para_id_1;
let availability_cores = vec![
CoreState::Scheduled(ScheduledCore {
para_id: para_id_1,
collator: None,
}),
CoreState::Scheduled(ScheduledCore {
para_id: para_id_2,
collator: None,
}),
];
let validators = vec![
Sr25519Keyring::Alice,
Sr25519Keyring::Bob,
Sr25519Keyring::Charlie,
Sr25519Keyring::Dave,
Sr25519Keyring::Ferdie,
];
let validator_authority_id = validator_authority_id(&validators);
let validators = validator_pubkeys(&validators);
let validator_peer_id: Vec<_> = std::iter::repeat_with(|| PeerId::random())
.take(validators.len())
.collect();
let validator_groups = vec![vec![2, 0, 4], vec![1], vec![3]];
let group_rotation_info = GroupRotationInfo {
session_start_block: 0,
group_rotation_frequency: 100,
now: 1,
};
let validator_groups = (validator_groups, group_rotation_info);
executor::block_on(async move {
handle_fetch(
let handle_future = handle_fetch(
&mut state,
&mut ctx,
hash_a,
descriptor,
pov_send,
).await;
);
assert_eq!(state.relay_parent_state[&hash_a].fetching[&pov_hash].len(), 1);
let check_future = async move {
//assert_eq!(state.relay_parent_state[&hash_a].fetching[&pov_hash].len(), 1);
assert_matches!(
handle.recv().await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::AvailabilityCores(tx)
)) => {
assert_eq!(relay_parent, hash_a);
tx.send(Ok(availability_cores)).unwrap();
}
);
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::SendValidationMessage(peers, message)
) => {
assert_eq!(peers, vec![peer_a.clone()]);
assert_eq!(
message,
awaiting_message(hash_a, vec![pov_hash]),
);
}
)
assert_matches!(
handle.recv().await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorGroups(tx)
)) => {
assert_eq!(relay_parent, hash_a);
tx.send(Ok(validator_groups)).unwrap();
}
);
assert_matches!(
handle.recv().await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::Validators(tx),
)) => {
assert_eq!(relay_parent, hash_a);
tx.send(Ok(validators.clone())).unwrap();
}
);
assert_matches!(
handle.recv().await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::ValidatorDiscovery(validators_res, tx),
)) => {
assert_eq!(relay_parent, hash_a);
assert_eq!(validators_res.len(), 3);
assert!(validators_res.iter().all(|v| validators.contains(&v)));
let result = vec![
Some(validator_authority_id[2].clone()),
Some(validator_authority_id[0].clone()),
Some(validator_authority_id[4].clone()),
];
tx.send(Ok(result)).unwrap();
}
);
assert_matches!(
handle.recv().await,
AllMessages::NetworkBridge(
NetworkBridgeMessage::ConnectToValidators {
validator_ids,
mut connected,
..
}
) => {
assert_eq!(validator_ids.len(), 3);
assert!(validator_ids.iter().all(|id| validator_authority_id.contains(id)));
let result = vec![
(validator_authority_id[2].clone(), validator_peer_id[2].clone()),
(validator_authority_id[0].clone(), validator_peer_id[0].clone()),
(validator_authority_id[4].clone(), validator_peer_id[4].clone()),
];
result.into_iter().for_each(|r| connected.try_send(r).unwrap());
}
);
};
futures::join!(handle_future, check_future);
});
}
@@ -224,6 +824,7 @@ fn peer_view_change_leads_to_us_informing() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -296,6 +897,7 @@ fn peer_complete_fetch_and_is_rewarded() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -385,6 +987,7 @@ fn peer_punished_for_sending_bad_pov() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -449,6 +1052,7 @@ fn peer_punished_for_sending_unexpected_pov() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -511,6 +1115,7 @@ fn peer_punished_for_sending_pov_out_of_our_view() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -570,6 +1175,7 @@ fn peer_reported_for_awaiting_too_much() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -656,6 +1262,7 @@ fn peer_reported_for_awaiting_outside_their_view() {
},
our_view: View(vec![hash_a, hash_b]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -719,6 +1326,7 @@ fn peer_reported_for_awaiting_outside_our_view() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -797,6 +1405,7 @@ fn peer_complete_fetch_leads_to_us_completing_others() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();
@@ -880,6 +1489,7 @@ fn peer_completing_request_no_longer_awaiting() {
},
our_view: View(vec![hash_a]),
metrics: Default::default(),
connection_requests: Default::default(),
};
let pool = sp_core::testing::TaskExecutor::new();