Whole subsystem test for new availability-distribution (#2552)

* WIP: Whole subsystem test.

* New tests compile.

* Avoid needless runtime queries for no validator nodes.

* Make tx and rx publicly accessible in virtual overseer.

This simplifies mocking in some cases, as tx can be cloned, but rx can
not.

* Whole subsystem test working.

* Update node/network/availability-distribution/src/session_cache.rs

Co-authored-by: Andronik Ordian <write@reusable.software>

* Update node/network/availability-distribution/src/session_cache.rs

Co-authored-by: Andronik Ordian <write@reusable.software>

* Document better what `None` return value means.

* Get rid of BitVec dependency.

* Update Cargo.lock

* Hopefully fixed implementers guide build.

Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Robert Klotzner
2021-03-03 16:23:15 +01:00
committed by GitHub
parent ae3ee5ed7f
commit 78ac4b7add
12 changed files with 596 additions and 1320 deletions
@@ -0,0 +1,148 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Helper functions and tools to generate mock data useful for testing this subsystem.
use std::sync::Arc;
use sp_keyring::Sr25519Keyring;
use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
use polkadot_primitives::v1::{AvailableData, BlockData, CandidateCommitments, CandidateDescriptor,
CandidateHash, CommittedCandidateReceipt, ErasureChunk, GroupIndex, Hash, HeadData, Id
as ParaId, OccupiedCore, PersistedValidationData, PoV, SessionInfo,
ValidatorIndex
};
/// Create dummy session info with two validator groups.
pub fn make_session_info() -> SessionInfo {
let validators = vec![
Sr25519Keyring::Ferdie, // <- this node, role: validator
Sr25519Keyring::Alice,
Sr25519Keyring::Bob,
Sr25519Keyring::Charlie,
Sr25519Keyring::Dave,
Sr25519Keyring::Eve,
Sr25519Keyring::One,
];
let validator_groups: Vec<Vec<ValidatorIndex>> = [vec![5, 0, 3], vec![1, 6, 2, 4]]
.iter().map(|g| g.into_iter().map(|v| ValidatorIndex(*v)).collect()).collect();
SessionInfo {
discovery_keys: validators.iter().map(|k| k.public().into()).collect(),
// Not used:
n_cores: validator_groups.len() as u32,
validator_groups,
// Not used values:
validators: validators.iter().map(|k| k.public().into()).collect(),
assignment_keys: Vec::new(),
zeroth_delay_tranche_width: 0,
relay_vrf_modulo_samples: 0,
n_delay_tranches: 0,
no_show_slots: 0,
needed_approvals: 0,
}
}
/// Builder for constructing occupied cores.
///
/// Takes all the values we care about and fills the rest with dummy values on `build`.
pub struct OccupiedCoreBuilder {
pub group_responsible: GroupIndex,
pub para_id: ParaId,
pub relay_parent: Hash,
}
impl OccupiedCoreBuilder {
pub fn build(self) -> (OccupiedCore, (CandidateHash, ErasureChunk)) {
let pov = PoV {
block_data: BlockData(vec![45, 46, 47]),
};
let pov_hash = pov.hash();
let (erasure_root, chunk) = get_valid_chunk_data(pov.clone());
let candidate_receipt = TestCandidateBuilder {
para_id: self.para_id,
pov_hash,
relay_parent: self.relay_parent,
erasure_root,
..Default::default()
}.build();
let core = OccupiedCore {
next_up_on_available: None,
occupied_since: 0,
time_out_at: 0,
next_up_on_time_out: None,
availability: Default::default(),
group_responsible: self.group_responsible,
candidate_hash: candidate_receipt.hash(),
candidate_descriptor: candidate_receipt.descriptor().clone(),
};
(core, (candidate_receipt.hash(), chunk))
}
}
#[derive(Default)]
pub struct TestCandidateBuilder {
para_id: ParaId,
head_data: HeadData,
pov_hash: Hash,
relay_parent: Hash,
erasure_root: Hash,
}
impl TestCandidateBuilder {
pub fn build(self) -> CommittedCandidateReceipt {
CommittedCandidateReceipt {
descriptor: CandidateDescriptor {
para_id: self.para_id,
pov_hash: self.pov_hash,
relay_parent: self.relay_parent,
erasure_root: self.erasure_root,
..Default::default()
},
commitments: CandidateCommitments {
head_data: self.head_data,
..Default::default()
},
}
}
}
pub fn get_valid_chunk_data(pov: PoV) -> (Hash, ErasureChunk) {
let fake_validator_count = 10;
let persisted = PersistedValidationData {
parent_head: HeadData(vec![7, 8, 9]),
relay_parent_number: Default::default(),
max_pov_size: 1024,
relay_parent_storage_root: Default::default(),
};
let available_data = AvailableData {
validation_data: persisted, pov: Arc::new(pov),
};
let chunks = obtain_chunks(fake_validator_count, &available_data).unwrap();
let branches = branches(chunks.as_ref());
let root = branches.root();
let chunk = branches.enumerate()
.map(|(index, (proof, chunk))| ErasureChunk {
chunk: chunk.to_vec(),
index: ValidatorIndex(index as _),
proof,
})
.next().expect("There really should be 10 chunks.");
(root, chunk)
}
@@ -0,0 +1,63 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use futures::{executor, future, Future};
use sp_keystore::SyncCryptoStorePtr;
use polkadot_subsystem_testhelpers as test_helpers;
use super::*;
mod state;
/// State for test harnesses.
use state::{TestState, TestHarness};
/// Mock data useful for testing.
pub(crate) mod mock;
fn test_harness<T: Future<Output = ()>>(
keystore: SyncCryptoStorePtr,
test_fx: impl FnOnce(TestHarness) -> T,
) {
sp_tracing::try_init_simple();
let pool = sp_core::testing::TaskExecutor::new();
let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
let subsystem = AvailabilityDistributionSubsystem::new(keystore, Default::default());
{
let subsystem = subsystem.run(context);
let test_fut = test_fx(TestHarness { virtual_overseer, pool });
futures::pin_mut!(test_fut);
futures::pin_mut!(subsystem);
executor::block_on(future::select(test_fut, subsystem));
}
}
/// Simple basic check, whether the subsystem works as expected.
///
/// Exceptional cases are tested as unit tests in `fetch_task`.
#[test]
fn check_basic() {
let state = TestState::default();
test_harness(state.keystore.clone(), move |harness| {
state.run(harness)
});
}
@@ -0,0 +1,317 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use std::{collections::{HashMap, HashSet}, sync::Arc, time::Duration};
use polkadot_node_subsystem_util::TimeoutExt;
use polkadot_subsystem_testhelpers::TestSubsystemContextHandle;
use smallvec::smallvec;
use futures::{FutureExt, channel::oneshot, SinkExt, channel::mpsc, StreamExt};
use futures_timer::Delay;
use sc_keystore::LocalKeystore;
use sp_application_crypto::AppKey;
use sp_keystore::{SyncCryptoStore, SyncCryptoStorePtr};
use sp_keyring::Sr25519Keyring;
use sp_core::{traits::SpawnNamed, testing::TaskExecutor};
use sc_network as network;
use sc_network::config as netconfig;
use polkadot_subsystem::{ActiveLeavesUpdate, FromOverseer, OverseerSignal, messages::{AllMessages,
AvailabilityDistributionMessage, AvailabilityStoreMessage, NetworkBridgeMessage, RuntimeApiMessage,
RuntimeApiRequest}
};
use polkadot_primitives::v1::{CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash, Id
as ParaId, ScheduledCore, SessionInfo, ValidatorId,
ValidatorIndex
};
use polkadot_node_network_protocol::{jaeger,
request_response::{IncomingRequest, OutgoingRequest, Requests, v1}
};
use polkadot_subsystem_testhelpers as test_helpers;
use test_helpers::SingleItemSink;
use super::mock::{make_session_info, OccupiedCoreBuilder, };
use crate::LOG_TARGET;
pub struct TestHarness {
pub virtual_overseer: test_helpers::TestSubsystemContextHandle<AvailabilityDistributionMessage>,
pub pool: TaskExecutor,
}
/// TestState for mocking execution of this subsystem.
///
/// The `Default` instance provides data, which makes the system succeed by providing a couple of
/// valid occupied cores. You can tune the data before calling `TestState::run`. E.g. modify some
/// chunks to be invalid, the test will then still pass if you remove that chunk from
/// `valid_chunks`.
#[derive(Clone)]
pub struct TestState {
// Simulated relay chain heads:
pub relay_chain: Vec<Hash>,
pub chunks: HashMap<(CandidateHash, ValidatorIndex), ErasureChunk>,
/// All chunks that are valid and should be accepted.
pub valid_chunks: HashSet<(CandidateHash, ValidatorIndex)>,
pub session_info: SessionInfo,
/// Cores per relay chain block.
pub cores: HashMap<Hash, Vec<CoreState>>,
pub keystore: SyncCryptoStorePtr,
}
impl Default for TestState {
fn default() -> Self {
let relay_chain: Vec<_> = (1u8..10).map(Hash::repeat_byte).collect();
let chain_a = ParaId::from(1);
let chain_b = ParaId::from(2);
let chain_ids = vec![chain_a, chain_b];
let keystore: SyncCryptoStorePtr = Arc::new(LocalKeystore::in_memory());
let session_info = make_session_info();
SyncCryptoStore::sr25519_generate_new(
&*keystore,
ValidatorId::ID,
Some(&Sr25519Keyring::Ferdie.to_seed()),
)
.expect("Insert key into keystore");
let (cores, chunks) = {
let mut cores = HashMap::new();
let mut chunks = HashMap::new();
cores.insert(relay_chain[0],
vec![
CoreState::Scheduled(ScheduledCore {
para_id: chain_ids[0],
collator: None,
}),
CoreState::Scheduled(ScheduledCore {
para_id: chain_ids[1],
collator: None,
}),
]
);
let heads = {
let mut advanced = relay_chain.iter();
advanced.next();
relay_chain.iter().zip(advanced)
};
for (relay_parent, relay_child) in heads {
let (p_cores, p_chunks): (Vec<_>, Vec<_>) = chain_ids.iter().enumerate()
.map(|(i, para_id)| {
let (core, chunk) = OccupiedCoreBuilder {
group_responsible: GroupIndex(i as _),
para_id: *para_id,
relay_parent: relay_parent.clone(),
}.build();
(CoreState::Occupied(core), chunk)
}
)
.unzip();
cores.insert(relay_child.clone(), p_cores);
// Skip chunks for our own group (won't get fetched):
let mut chunks_other_groups = p_chunks.into_iter();
chunks_other_groups.next();
for (validator_index, chunk) in chunks_other_groups {
chunks.insert((validator_index, chunk.index), chunk);
}
}
(cores, chunks)
};
Self {
relay_chain,
valid_chunks: chunks.clone().keys().map(Clone::clone).collect(),
chunks,
session_info,
cores,
keystore,
}
}
}
impl TestState {
/// Run, but fail after some timeout.
pub async fn run(self, harness: TestHarness) {
// Make sure test won't run forever.
let f = self.run_inner(harness.pool, harness.virtual_overseer).timeout(Duration::from_secs(10));
assert!(f.await.is_some(), "Test ran into timeout");
}
/// Run tests with the given mock values in `TestState`.
///
/// This will simply advance through the simulated chain and examines whether the subsystem
/// behaves as expected: It will succeed if all valid chunks of other backing groups get stored
/// and no other.
async fn run_inner(self, executor: TaskExecutor, virtual_overseer: TestSubsystemContextHandle<AvailabilityDistributionMessage>) {
// We skip genesis here (in reality ActiveLeavesUpdate can also skip a block:
let updates = {
let mut advanced = self.relay_chain.iter();
advanced.next();
self
.relay_chain.iter().zip(advanced)
.map(|(old, new)| ActiveLeavesUpdate {
activated: smallvec![(new.clone(), Arc::new(jaeger::Span::Disabled))],
deactivated: smallvec![old.clone()],
}).collect::<Vec<_>>()
};
// We should be storing all valid chunks during execution:
//
// Test will fail if this does not happen until timeout.
let mut remaining_stores = self.valid_chunks.len();
let TestSubsystemContextHandle { tx, mut rx } = virtual_overseer;
// Spawning necessary as incoming queue can only hold a single item, we don't want to dead
// lock ;-)
let update_tx = tx.clone();
executor.spawn("Sending active leaves updates", async move {
for update in updates {
overseer_signal(
update_tx.clone(),
OverseerSignal::ActiveLeaves(update)
).await;
// We need to give the subsystem a little time to do its job, otherwise it will
// cancel jobs as obsolete:
Delay::new(Duration::from_millis(20)).await;
}
}.boxed()
);
while remaining_stores > 0
{
tracing::trace!(target: LOG_TARGET, remaining_stores, "Stores left to go");
let msg = overseer_recv(&mut rx).await;
match msg {
AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(reqs)) => {
for req in reqs {
// Forward requests:
let in_req = to_incoming_req(&executor, req);
executor.spawn("Request forwarding",
overseer_send(
tx.clone(),
AvailabilityDistributionMessage::AvailabilityFetchingRequest(in_req)
).boxed()
);
}
}
AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx)) => {
let chunk = self.chunks.get(&(candidate_hash, validator_index));
tx.send(chunk.map(Clone::clone))
.expect("Receiver is expected to be alive");
}
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk{candidate_hash, chunk, tx, ..}) => {
assert!(
self.valid_chunks.contains(&(candidate_hash, chunk.index)),
"Only valid chunks should ever get stored."
);
tx.send(Ok(()))
.expect("Receiver is expected to be alive");
tracing::trace!(target: LOG_TARGET, "'Stored' fetched chunk.");
remaining_stores -= 1;
}
AllMessages::RuntimeApi(RuntimeApiMessage::Request(hash, req)) => {
match req {
RuntimeApiRequest::SessionIndexForChild(tx) => {
// Always session index 1 for now:
tx.send(Ok(1))
.expect("Receiver should still be alive");
}
RuntimeApiRequest::SessionInfo(_, tx) => {
tx.send(Ok(Some(self.session_info.clone())))
.expect("Receiver should be alive.");
}
RuntimeApiRequest::AvailabilityCores(tx) => {
tracing::trace!(target: LOG_TARGET, cores= ?self.cores[&hash], hash = ?hash, "Sending out cores for hash");
tx.send(Ok(self.cores[&hash].clone()))
.expect("Receiver should still be alive");
}
_ => {
panic!("Unexpected runtime request: {:?}", req);
}
}
}
_ => {
panic!("Unexpected message received: {:?}", msg);
}
}
}
}
}
async fn overseer_signal(
mut tx: SingleItemSink<FromOverseer<AvailabilityDistributionMessage>>,
msg: impl Into<OverseerSignal>,
) {
let msg = msg.into();
tracing::trace!(target: LOG_TARGET, msg = ?msg, "sending message");
tx.send(FromOverseer::Signal(msg))
.await
.expect("Test subsystem no longer live");
}
async fn overseer_send(
mut tx: SingleItemSink<FromOverseer<AvailabilityDistributionMessage>>,
msg: impl Into<AvailabilityDistributionMessage>,
) {
let msg = msg.into();
tracing::trace!(target: LOG_TARGET, msg = ?msg, "sending message");
tx.send(FromOverseer::Communication { msg }).await
.expect("Test subsystem no longer live");
tracing::trace!(target: LOG_TARGET, "sent message");
}
async fn overseer_recv(
rx: &mut mpsc::UnboundedReceiver<AllMessages>,
) -> AllMessages {
tracing::trace!(target: LOG_TARGET, "waiting for message ...");
rx.next().await.expect("Test subsystem no longer live")
}
fn to_incoming_req(
executor: &TaskExecutor,
outgoing: Requests
) -> IncomingRequest<v1::AvailabilityFetchingRequest> {
match outgoing {
Requests::AvailabilityFetching(OutgoingRequest { payload, pending_response, .. }) => {
let (tx, rx): (oneshot::Sender<netconfig::OutgoingResponse>, oneshot::Receiver<_>)
= oneshot::channel();
executor.spawn("Message forwarding", async {
let response = rx.await;
let payload = response.expect("Unexpected canceled request").result;
pending_response.send(payload.map_err(|_| network::RequestFailure::Refused))
.expect("Sending response is expected to work");
}.boxed()
);
IncomingRequest::new(
// We don't really care:
network::PeerId::random(),
payload,
tx
)
}
}
}