Whole subsystem test for new availability-distribution (#2552)

* WIP: Whole subsystem test. * New tests compile. * Avoid needless runtime queries for no validator nodes. * Make tx and rx publicly accessible in virtual overseer. This simplifies mocking in some cases, as tx can be cloned, but rx can not. * Whole subsystem test working. * Update node/network/availability-distribution/src/session_cache.rs Co-authored-by: Andronik Ordian <write@reusable.software> * Update node/network/availability-distribution/src/session_cache.rs Co-authored-by: Andronik Ordian <write@reusable.software> * Document better what `None` return value means. * Get rid of BitVec dependency. * Update Cargo.lock * Hopefully fixed implementers guide build. Co-authored-by: Andronik Ordian <write@reusable.software>
2026-04-28 01:38:04 +00:00 · 2021-03-03 16:23:15 +01:00
parent ae3ee5ed7f
commit 78ac4b7add
12 changed files with 596 additions and 1320 deletions
@@ -0,0 +1,148 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+
+//! Helper functions and tools to generate mock data useful for testing this subsystem.
+
+use std::sync::Arc;
+
+use sp_keyring::Sr25519Keyring;
+
+use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
+use polkadot_primitives::v1::{AvailableData, BlockData, CandidateCommitments, CandidateDescriptor,
+	CandidateHash, CommittedCandidateReceipt, ErasureChunk, GroupIndex, Hash, HeadData, Id
+	as ParaId, OccupiedCore, PersistedValidationData, PoV, SessionInfo,
+	ValidatorIndex
+};
+
+/// Create dummy session info with two validator groups.
+pub fn make_session_info() -> SessionInfo {
+		let validators = vec![
+			Sr25519Keyring::Ferdie, // <- this node, role: validator
+			Sr25519Keyring::Alice,
+			Sr25519Keyring::Bob,
+			Sr25519Keyring::Charlie,
+			Sr25519Keyring::Dave,
+			Sr25519Keyring::Eve,
+			Sr25519Keyring::One,
+		];
+
+		let validator_groups: Vec<Vec<ValidatorIndex>> = [vec![5, 0, 3], vec![1, 6, 2, 4]]
+			.iter().map(|g| g.into_iter().map(|v| ValidatorIndex(*v)).collect()).collect();
+
+		SessionInfo {
+			discovery_keys: validators.iter().map(|k| k.public().into()).collect(),
+			// Not used:
+			n_cores: validator_groups.len() as u32,
+			validator_groups,
+			// Not used values:
+			validators: validators.iter().map(|k| k.public().into()).collect(),
+			assignment_keys: Vec::new(),
+			zeroth_delay_tranche_width: 0,
+			relay_vrf_modulo_samples: 0,
+			n_delay_tranches: 0,
+			no_show_slots: 0,
+			needed_approvals: 0,
+		}
+}
+
+/// Builder for constructing occupied cores.
+///
+/// Takes all the values we care about and fills the rest with dummy values on `build`.
+pub struct OccupiedCoreBuilder {
+	pub group_responsible: GroupIndex,
+	pub para_id: ParaId,
+	pub relay_parent: Hash,
+}
+
+impl OccupiedCoreBuilder {
+	pub fn build(self) -> (OccupiedCore, (CandidateHash, ErasureChunk)) {
+		let pov = PoV {
+			block_data: BlockData(vec![45, 46, 47]),
+		};
+		let pov_hash = pov.hash();
+		let (erasure_root, chunk) = get_valid_chunk_data(pov.clone());
+		let candidate_receipt = TestCandidateBuilder {
+			para_id: self.para_id,
+			pov_hash,
+			relay_parent: self.relay_parent,
+			erasure_root,
+			..Default::default()
+		}.build();
+		let core = OccupiedCore {
+			next_up_on_available: None,
+			occupied_since: 0,
+			time_out_at: 0,
+			next_up_on_time_out: None,
+			availability: Default::default(),
+			group_responsible: self.group_responsible,
+			candidate_hash: candidate_receipt.hash(),
+			candidate_descriptor: candidate_receipt.descriptor().clone(),
+		};
+		(core, (candidate_receipt.hash(), chunk))
+	}
+}
+
+#[derive(Default)]
+pub struct TestCandidateBuilder {
+	para_id: ParaId,
+	head_data: HeadData,
+	pov_hash: Hash,
+	relay_parent: Hash,
+	erasure_root: Hash,
+}
+
+impl TestCandidateBuilder {
+	pub fn build(self) -> CommittedCandidateReceipt {
+		CommittedCandidateReceipt {
+			descriptor: CandidateDescriptor {
+				para_id: self.para_id,
+				pov_hash: self.pov_hash,
+				relay_parent: self.relay_parent,
+				erasure_root: self.erasure_root,
+				..Default::default()
+			},
+			commitments: CandidateCommitments {
+				head_data: self.head_data,
+				..Default::default()
+			},
+		}
+	}
+}
+
+pub fn get_valid_chunk_data(pov: PoV) -> (Hash, ErasureChunk) {
+	let fake_validator_count = 10;
+	let persisted = PersistedValidationData {
+		parent_head: HeadData(vec![7, 8, 9]),
+		relay_parent_number: Default::default(),
+		max_pov_size: 1024,
+		relay_parent_storage_root: Default::default(),
+	};
+	let available_data = AvailableData {
+		validation_data: persisted, pov: Arc::new(pov),
+	};
+	let chunks = obtain_chunks(fake_validator_count, &available_data).unwrap();
+	let branches = branches(chunks.as_ref());
+	let root = branches.root();
+	let chunk = branches.enumerate()
+			.map(|(index, (proof, chunk))| ErasureChunk {
+				chunk: chunk.to_vec(),
+				index: ValidatorIndex(index as _),
+				proof,
+			})
+			.next().expect("There really should be 10 chunks.");
+	(root, chunk)
+}
@@ -0,0 +1,63 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use futures::{executor, future, Future};
+
+use sp_keystore::SyncCryptoStorePtr;
+
+use polkadot_subsystem_testhelpers as test_helpers;
+
+use super::*;
+
+mod state;
+/// State for test harnesses.
+use state::{TestState, TestHarness};
+
+/// Mock data useful for testing.
+pub(crate) mod mock;
+
+fn test_harness<T: Future<Output = ()>>(
+	keystore: SyncCryptoStorePtr,
+	test_fx: impl FnOnce(TestHarness) -> T,
+) {
+	sp_tracing::try_init_simple();
+
+	let pool = sp_core::testing::TaskExecutor::new();
+	let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone());
+
+	let subsystem = AvailabilityDistributionSubsystem::new(keystore, Default::default());
+	{
+		let subsystem = subsystem.run(context);
+
+		let test_fut = test_fx(TestHarness { virtual_overseer, pool });
+
+		futures::pin_mut!(test_fut);
+		futures::pin_mut!(subsystem);
+
+		executor::block_on(future::select(test_fut, subsystem));
+	}
+}
+
+/// Simple basic check, whether the subsystem works as expected.
+///
+/// Exceptional cases are tested as unit tests in `fetch_task`.
+#[test]
+fn check_basic() {
+	let state = TestState::default();
+	test_harness(state.keystore.clone(), move |harness| {
+		state.run(harness)
+	});
+}
@@ -0,0 +1,317 @@
+// Copyright 2021 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::{collections::{HashMap, HashSet}, sync::Arc, time::Duration};
+
+use polkadot_node_subsystem_util::TimeoutExt;
+use polkadot_subsystem_testhelpers::TestSubsystemContextHandle;
+use smallvec::smallvec;
+
+use futures::{FutureExt, channel::oneshot, SinkExt, channel::mpsc, StreamExt};
+use futures_timer::Delay;
+
+use sc_keystore::LocalKeystore;
+use sp_application_crypto::AppKey;
+use sp_keystore::{SyncCryptoStore, SyncCryptoStorePtr};
+use sp_keyring::Sr25519Keyring;
+use sp_core::{traits::SpawnNamed, testing::TaskExecutor};
+use sc_network as network;
+use sc_network::config as netconfig;
+
+use polkadot_subsystem::{ActiveLeavesUpdate, FromOverseer, OverseerSignal, messages::{AllMessages,
+	AvailabilityDistributionMessage, AvailabilityStoreMessage, NetworkBridgeMessage, RuntimeApiMessage,
+	RuntimeApiRequest}
+};
+use polkadot_primitives::v1::{CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash, Id
+	as ParaId, ScheduledCore, SessionInfo, ValidatorId,
+	ValidatorIndex
+};
+use polkadot_node_network_protocol::{jaeger,
+	request_response::{IncomingRequest, OutgoingRequest, Requests, v1}
+};
+use polkadot_subsystem_testhelpers as test_helpers;
+use test_helpers::SingleItemSink;
+
+use super::mock::{make_session_info, OccupiedCoreBuilder, };
+use crate::LOG_TARGET;
+
+pub struct TestHarness {
+	pub virtual_overseer: test_helpers::TestSubsystemContextHandle<AvailabilityDistributionMessage>,
+	pub pool: TaskExecutor,
+}
+
+/// TestState for mocking execution of this subsystem.
+///
+/// The `Default` instance provides data, which makes the system succeed by providing a couple of
+/// valid occupied cores. You can tune the data before calling `TestState::run`. E.g. modify some
+/// chunks to be invalid, the test will then still pass if you remove that chunk from
+/// `valid_chunks`.
+#[derive(Clone)]
+pub struct TestState {
+	// Simulated relay chain heads:
+	pub relay_chain: Vec<Hash>,
+	pub chunks: HashMap<(CandidateHash, ValidatorIndex), ErasureChunk>,
+	/// All chunks that are valid and should be accepted.
+	pub valid_chunks: HashSet<(CandidateHash, ValidatorIndex)>,
+	pub session_info: SessionInfo,
+	/// Cores per relay chain block.
+	pub cores: HashMap<Hash, Vec<CoreState>>,
+	pub keystore: SyncCryptoStorePtr,
+}
+
+impl Default for TestState {
+	fn default() -> Self {
+		let relay_chain: Vec<_> = (1u8..10).map(Hash::repeat_byte).collect();
+		let chain_a = ParaId::from(1);
+		let chain_b = ParaId::from(2);
+
+		let chain_ids = vec![chain_a, chain_b];
+
+		let keystore: SyncCryptoStorePtr = Arc::new(LocalKeystore::in_memory());
+
+		let session_info = make_session_info();
+
+		SyncCryptoStore::sr25519_generate_new(
+			&*keystore,
+			ValidatorId::ID,
+			Some(&Sr25519Keyring::Ferdie.to_seed()),
+		)
+		.expect("Insert key into keystore");
+
+		let (cores, chunks) = {
+			let mut cores = HashMap::new();
+			let mut chunks = HashMap::new();
+
+			cores.insert(relay_chain[0], 
+				vec![
+					CoreState::Scheduled(ScheduledCore {
+						para_id: chain_ids[0],
+						collator: None,
+					}),
+					CoreState::Scheduled(ScheduledCore {
+						para_id: chain_ids[1],
+						collator: None,
+					}),
+				]
+			);
+
+			let heads =  {
+				let mut advanced = relay_chain.iter();
+				advanced.next();
+				relay_chain.iter().zip(advanced)
+			};
+			for (relay_parent, relay_child) in heads {
+				let (p_cores, p_chunks): (Vec<_>, Vec<_>) = chain_ids.iter().enumerate()
+					.map(|(i, para_id)| {
+						let (core, chunk) = OccupiedCoreBuilder {
+							group_responsible: GroupIndex(i as _),
+							para_id: *para_id,
+							relay_parent: relay_parent.clone(),
+						}.build();
+						(CoreState::Occupied(core), chunk)
+					}
+					)
+					.unzip();
+				cores.insert(relay_child.clone(), p_cores);
+				// Skip chunks for our own group (won't get fetched):
+				let mut chunks_other_groups = p_chunks.into_iter();
+				chunks_other_groups.next();
+				for (validator_index, chunk) in chunks_other_groups {
+					chunks.insert((validator_index, chunk.index), chunk);
+				}
+			}
+			(cores, chunks)
+		};
+		Self {
+			relay_chain,
+			valid_chunks: chunks.clone().keys().map(Clone::clone).collect(),
+			chunks,
+			session_info,
+			cores,
+			keystore,
+		}
+	}
+}
+
+impl TestState {
+	
+	/// Run, but fail after some timeout.
+	pub async fn run(self, harness: TestHarness) {
+		// Make sure test won't run forever.
+		let f = self.run_inner(harness.pool, harness.virtual_overseer).timeout(Duration::from_secs(10));
+		assert!(f.await.is_some(), "Test ran into timeout");
+	}
+
+	/// Run tests with the given mock values in `TestState`.
+	///
+	/// This will simply advance through the simulated chain and examines whether the subsystem
+	/// behaves as expected: It will succeed if all valid chunks of other backing groups get stored
+	/// and no other.
+	async fn run_inner(self, executor: TaskExecutor, virtual_overseer: TestSubsystemContextHandle<AvailabilityDistributionMessage>) {
+		// We skip genesis here (in reality ActiveLeavesUpdate can also skip a block:
+		let updates = {
+			let mut advanced = self.relay_chain.iter();
+			advanced.next();
+			self
+			.relay_chain.iter().zip(advanced)
+			.map(|(old, new)| ActiveLeavesUpdate {
+				activated: smallvec![(new.clone(), Arc::new(jaeger::Span::Disabled))],
+				deactivated: smallvec![old.clone()],
+			}).collect::<Vec<_>>()
+		};
+
+		// We should be storing all valid chunks during execution:
+		//
+		// Test will fail if this does not happen until timeout.
+		let mut remaining_stores = self.valid_chunks.len();
+		
+		let TestSubsystemContextHandle { tx, mut rx } = virtual_overseer;
+
+		// Spawning necessary as incoming queue can only hold a single item, we don't want to dead
+		// lock ;-)
+		let update_tx = tx.clone();
+		executor.spawn("Sending active leaves updates", async move {
+			for update in updates {
+				overseer_signal(
+					update_tx.clone(),
+					OverseerSignal::ActiveLeaves(update)
+				).await;
+				// We need to give the subsystem a little time to do its job, otherwise it will
+				// cancel jobs as obsolete:
+				Delay::new(Duration::from_millis(20)).await;
+			}
+		}.boxed()
+		);
+
+		while remaining_stores > 0
+		{
+			tracing::trace!(target: LOG_TARGET, remaining_stores, "Stores left to go");
+			let msg = overseer_recv(&mut rx).await;
+			match msg {
+				AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(reqs)) => {
+					for req in reqs {
+						// Forward requests:
+						let in_req = to_incoming_req(&executor, req);
+
+						executor.spawn("Request forwarding",
+									overseer_send(
+										tx.clone(),
+										AvailabilityDistributionMessage::AvailabilityFetchingRequest(in_req)
+									).boxed()
+						);
+					}
+				}
+				AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk(candidate_hash,	validator_index, tx)) => {
+					let chunk = self.chunks.get(&(candidate_hash, validator_index));
+					tx.send(chunk.map(Clone::clone))
+						.expect("Receiver is expected to be alive");
+				}
+				AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk{candidate_hash,	chunk, tx, ..}) => {
+					assert!(
+						self.valid_chunks.contains(&(candidate_hash, chunk.index)),
+						"Only valid chunks should ever get stored."
+					);
+					tx.send(Ok(()))
+						.expect("Receiver is expected to be alive");
+					tracing::trace!(target: LOG_TARGET, "'Stored' fetched chunk.");
+					remaining_stores -= 1;
+				}
+				AllMessages::RuntimeApi(RuntimeApiMessage::Request(hash, req)) => {
+					match req {
+						RuntimeApiRequest::SessionIndexForChild(tx) => {
+							// Always session index 1 for now:
+							tx.send(Ok(1))
+							.expect("Receiver should still be alive");
+						}
+						RuntimeApiRequest::SessionInfo(_, tx) => {
+							tx.send(Ok(Some(self.session_info.clone())))
+							.expect("Receiver should be alive.");
+						}
+						RuntimeApiRequest::AvailabilityCores(tx) => {
+							tracing::trace!(target: LOG_TARGET, cores= ?self.cores[&hash], hash = ?hash, "Sending out cores for hash");
+							tx.send(Ok(self.cores[&hash].clone()))
+							.expect("Receiver should still be alive");
+						}
+						_ => {
+							panic!("Unexpected runtime request: {:?}", req);
+						}
+					}
+				}
+				_ => {
+					panic!("Unexpected message received: {:?}", msg);
+				}
+			}
+		}
+	}
+}
+
+
+
+async fn overseer_signal(
+	mut tx: SingleItemSink<FromOverseer<AvailabilityDistributionMessage>>,
+	msg: impl Into<OverseerSignal>,
+) {
+	let msg = msg.into();
+	tracing::trace!(target: LOG_TARGET, msg = ?msg, "sending message");
+	tx.send(FromOverseer::Signal(msg))
+		.await
+		.expect("Test subsystem no longer live");
+}
+
+async fn overseer_send(
+	mut tx: SingleItemSink<FromOverseer<AvailabilityDistributionMessage>>,
+	msg: impl Into<AvailabilityDistributionMessage>,
+) {
+	let msg = msg.into();
+	tracing::trace!(target: LOG_TARGET, msg = ?msg, "sending message");
+	tx.send(FromOverseer::Communication { msg }).await
+		.expect("Test subsystem no longer live");
+	tracing::trace!(target: LOG_TARGET, "sent message");
+}
+
+
+async fn overseer_recv(
+	rx: &mut mpsc::UnboundedReceiver<AllMessages>,
+) -> AllMessages {
+	tracing::trace!(target: LOG_TARGET, "waiting for message ...");
+	rx.next().await.expect("Test subsystem no longer live")
+}
+
+fn to_incoming_req(
+	executor: &TaskExecutor,
+	outgoing: Requests
+) -> IncomingRequest<v1::AvailabilityFetchingRequest> {
+	match outgoing {
+		Requests::AvailabilityFetching(OutgoingRequest { payload, pending_response, .. }) => {
+			let (tx, rx): (oneshot::Sender<netconfig::OutgoingResponse>, oneshot::Receiver<_>)
+			   = oneshot::channel();
+			executor.spawn("Message forwarding", async {
+				let response = rx.await;
+				let payload = response.expect("Unexpected canceled request").result;
+				pending_response.send(payload.map_err(|_| network::RequestFailure::Refused))
+					.expect("Sending response is expected to work");
+			}.boxed()
+			);
+
+			IncomingRequest::new(
+				// We don't really care:
+				network::PeerId::random(),
+				payload,
+				tx
+			)
+		}
+	}
+}