feat: initialize Kurdistan SDK - independent fork of Polkadot SDK

This commit is contained in:
2025-12-13 15:44:15 +03:00
commit e4778b4576
6838 changed files with 1847450 additions and 0 deletions
@@ -0,0 +1,126 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
//
//! Error handling related code and Error/Result definitions.
use fatality::Nested;
use pezkuwi_node_network_protocol::request_response::outgoing::RequestError;
use pezkuwi_primitives::SessionIndex;
use futures::channel::oneshot;
use pezkuwi_node_subsystem::{ChainApiError, RuntimeApiError, SubsystemError};
use pezkuwi_node_subsystem_util::runtime;
use crate::LOG_TARGET;
#[allow(missing_docs)]
#[fatality::fatality(splitable)]
pub enum Error {
#[fatal]
#[error("Spawning subsystem task failed: {0}")]
SpawnTask(#[source] SubsystemError),
#[fatal]
#[error("Erasure chunk requester stream exhausted")]
RequesterExhausted,
#[fatal]
#[error("Receive channel closed: {0}")]
IncomingMessageChannel(#[source] SubsystemError),
#[fatal(forward)]
#[error("Error while accessing runtime information: {0}")]
Runtime(#[from] runtime::Error),
#[fatal]
#[error("Oneshot for receiving response from Chain API got cancelled")]
ChainApiSenderDropped(#[from] oneshot::Canceled),
#[fatal]
#[error("Retrieving response from Chain API unexpectedly failed with error: {0}")]
ChainApi(#[from] ChainApiError),
#[error("Failed to get node features from the runtime")]
FailedNodeFeatures(#[source] RuntimeApiError),
// av-store will drop the sender on any error that happens.
#[error("Response channel to obtain chunk failed")]
QueryChunkResponseChannel(#[source] oneshot::Canceled),
// av-store will drop the sender on any error that happens.
#[error("Response channel to obtain available data failed")]
QueryAvailableDataResponseChannel(#[source] oneshot::Canceled),
// We tried accessing a session that was not cached.
#[error("Session {missing_session} is not cached, cached sessions: {available_sessions:?}.")]
NoSuchCachedSession { available_sessions: Vec<SessionIndex>, missing_session: SessionIndex },
// Sending request response failed (Can happen on timeouts for example).
#[error("Sending a request's response failed.")]
SendResponse,
#[error("FetchPoV request error: {0}")]
FetchPoV(#[source] RequestError),
#[error("Fetched PoV does not match expected hash")]
UnexpectedPoV,
#[error("Remote responded with `NoSuchPoV`")]
NoSuchPoV,
#[error("Given validator index could not be found in current session")]
InvalidValidatorIndex,
#[error("Erasure coding error: {0}")]
ErasureCoding(#[from] pezkuwi_erasure_coding::Error),
}
/// General result abbreviation type alias.
pub type Result<T> = std::result::Result<T, Error>;
/// Utility for eating top level errors and log them.
///
/// We basically always want to try and continue on error. This utility function is meant to
/// consume top-level errors by simply logging them
pub fn log_error(
result: Result<()>,
ctx: &'static str,
warn_freq: &mut gum::Freq,
) -> std::result::Result<(), FatalError> {
match result.into_nested()? {
Ok(()) => Ok(()),
Err(jfyi) => {
match jfyi {
JfyiError::UnexpectedPoV |
JfyiError::InvalidValidatorIndex |
JfyiError::NoSuchCachedSession { .. } |
JfyiError::QueryAvailableDataResponseChannel(_) |
JfyiError::QueryChunkResponseChannel(_) |
JfyiError::FailedNodeFeatures(_) |
JfyiError::ErasureCoding(_) => gum::warn!(target: LOG_TARGET, error = %jfyi, ctx),
JfyiError::FetchPoV(_) |
JfyiError::SendResponse |
JfyiError::NoSuchPoV |
JfyiError::Runtime(_) => {
gum::warn_if_frequent!(freq: warn_freq, max_rate: gum::Times::PerHour(100), target: LOG_TARGET, error = ?jfyi, ctx)
},
}
Ok(())
},
}
}
@@ -0,0 +1,199 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use futures::{future::Either, FutureExt, StreamExt, TryFutureExt};
use sp_keystore::KeystorePtr;
use pezkuwi_node_network_protocol::request_response::{
v1, v2, IncomingRequestReceiver, ReqProtocolNames,
};
use pezkuwi_node_subsystem::{
messages::AvailabilityDistributionMessage, overseer, FromOrchestra, OverseerSignal,
SpawnedSubsystem, SubsystemError,
};
/// Error and [`Result`] type for this subsystem.
mod error;
use error::{log_error, FatalError, Result};
use pezkuwi_node_subsystem_util::runtime::RuntimeInfo;
/// `Requester` taking care of requesting chunks for candidates pending availability.
mod requester;
use requester::Requester;
/// Handing requests for PoVs during backing.
mod pov_requester;
/// Responding to erasure chunk requests:
mod responder;
use responder::{run_chunk_receivers, run_pov_receiver};
mod metrics;
/// Prometheus `Metrics` for availability distribution.
pub use metrics::Metrics;
#[cfg(test)]
mod tests;
const LOG_TARGET: &'static str = "teyrchain::availability-distribution";
/// The availability distribution subsystem.
pub struct AvailabilityDistributionSubsystem {
/// Easy and efficient runtime access for this subsystem.
runtime: RuntimeInfo,
/// Receivers to receive messages from.
recvs: IncomingRequestReceivers,
/// Mapping of the req-response protocols to the full protocol names.
req_protocol_names: ReqProtocolNames,
/// Prometheus metrics.
metrics: Metrics,
}
/// Receivers to be passed into availability distribution.
pub struct IncomingRequestReceivers {
/// Receiver for incoming PoV requests.
pub pov_req_receiver: IncomingRequestReceiver<v1::PoVFetchingRequest>,
/// Receiver for incoming v1 availability chunk requests.
pub chunk_req_v1_receiver: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
/// Receiver for incoming v2 availability chunk requests.
pub chunk_req_v2_receiver: IncomingRequestReceiver<v2::ChunkFetchingRequest>,
}
#[overseer::subsystem(AvailabilityDistribution, error=SubsystemError, prefix=self::overseer)]
impl<Context> AvailabilityDistributionSubsystem {
fn start(self, ctx: Context) -> SpawnedSubsystem {
let future = self
.run(ctx)
.map_err(|e| SubsystemError::with_origin("availability-distribution", e))
.boxed();
SpawnedSubsystem { name: "availability-distribution-subsystem", future }
}
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl AvailabilityDistributionSubsystem {
/// Create a new instance of the availability distribution.
pub fn new(
keystore: KeystorePtr,
recvs: IncomingRequestReceivers,
req_protocol_names: ReqProtocolNames,
metrics: Metrics,
) -> Self {
let runtime = RuntimeInfo::new(Some(keystore));
Self { runtime, recvs, req_protocol_names, metrics }
}
/// Start processing work as passed on from the Overseer.
async fn run<Context>(self, mut ctx: Context) -> std::result::Result<(), FatalError> {
let Self { mut runtime, recvs, metrics, req_protocol_names } = self;
let IncomingRequestReceivers {
pov_req_receiver,
chunk_req_v1_receiver,
chunk_req_v2_receiver,
} = recvs;
let mut requester = Requester::new(req_protocol_names, metrics.clone()).fuse();
let mut warn_freq = gum::Freq::new();
{
let sender = ctx.sender().clone();
ctx.spawn(
"pov-receiver",
run_pov_receiver(sender.clone(), pov_req_receiver, metrics.clone()).boxed(),
)
.map_err(FatalError::SpawnTask)?;
ctx.spawn(
"chunk-receiver",
run_chunk_receivers(
sender,
chunk_req_v1_receiver,
chunk_req_v2_receiver,
metrics.clone(),
)
.boxed(),
)
.map_err(FatalError::SpawnTask)?;
}
loop {
let action = {
let mut subsystem_next = ctx.recv().fuse();
futures::select! {
subsystem_msg = subsystem_next => Either::Left(subsystem_msg),
from_task = requester.next() => Either::Right(from_task),
}
};
// Handle task messages sending:
let message = match action {
Either::Left(subsystem_msg) =>
subsystem_msg.map_err(|e| FatalError::IncomingMessageChannel(e))?,
Either::Right(from_task) => {
let from_task = from_task.ok_or(FatalError::RequesterExhausted)?;
ctx.send_message(from_task).await;
continue;
},
};
match message {
FromOrchestra::Signal(OverseerSignal::ActiveLeaves(update)) => {
log_error(
requester
.get_mut()
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await,
"Error in Requester::update_fetching_heads",
&mut warn_freq,
)?;
},
FromOrchestra::Signal(OverseerSignal::BlockFinalized(_hash, _finalized_number)) => {
},
FromOrchestra::Signal(OverseerSignal::Conclude) => return Ok(()),
FromOrchestra::Communication {
msg:
AvailabilityDistributionMessage::FetchPoV {
relay_parent,
from_validator,
para_id,
candidate_hash,
pov_hash,
tx,
},
} => {
log_error(
pov_requester::fetch_pov(
&mut ctx,
&mut runtime,
relay_parent,
from_validator,
para_id,
candidate_hash,
pov_hash,
tx,
metrics.clone(),
)
.await,
"pov_requester::fetch_pov",
&mut warn_freq,
)?;
},
}
}
}
}
@@ -0,0 +1,156 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use pezkuwi_node_subsystem_util::{
metrics,
metrics::{
prometheus,
prometheus::{Counter, CounterVec, Opts, PrometheusError, Registry, U64},
},
};
/// Label for success counters.
pub const SUCCEEDED: &'static str = "succeeded";
/// Label for fail counters.
pub const FAILED: &'static str = "failed";
/// Label for chunks/PoVs that could not be served, because they were not available.
pub const NOT_FOUND: &'static str = "not-found";
/// Availability Distribution metrics.
#[derive(Clone, Default)]
pub struct Metrics(Option<MetricsInner>);
#[derive(Clone)]
struct MetricsInner {
/// Number of chunks fetched.
///
/// Note: The failed count gets incremented, when we were not able to fetch the chunk at all.
/// For times, where we failed downloading, but succeeded on the next try (with different
/// backers), see `retries`.
fetched_chunks: CounterVec<U64>,
/// Number of chunks served.
served_chunks: CounterVec<U64>,
/// Number of received fetch PoV responses.
fetched_povs: CounterVec<U64>,
/// Number of PoVs served.
served_povs: CounterVec<U64>,
/// Number of times our first set of validators did not provide the needed chunk and we had to
/// query further validators.
retries: Counter<U64>,
}
impl Metrics {
/// Create new dummy metrics, not reporting anything.
pub fn new_dummy() -> Self {
Metrics(None)
}
/// Increment counter on fetched labels.
pub fn on_fetch(&self, label: &'static str) {
if let Some(metrics) = &self.0 {
metrics.fetched_chunks.with_label_values(&[label]).inc()
}
}
/// Increment counter on served chunks.
pub fn on_served_chunk(&self, label: &'static str) {
if let Some(metrics) = &self.0 {
metrics.served_chunks.with_label_values(&[label]).inc()
}
}
/// Increment counter on fetched PoVs.
pub fn on_fetched_pov(&self, label: &'static str) {
if let Some(metrics) = &self.0 {
metrics.fetched_povs.with_label_values(&[label]).inc()
}
}
/// Increment counter on served PoVs.
pub fn on_served_pov(&self, label: &'static str) {
if let Some(metrics) = &self.0 {
metrics.served_povs.with_label_values(&[label]).inc()
}
}
/// Increment retry counter.
pub fn on_retry(&self) {
if let Some(metrics) = &self.0 {
metrics.retries.inc()
}
}
}
impl metrics::Metrics for Metrics {
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
let metrics = MetricsInner {
fetched_chunks: prometheus::register(
CounterVec::new(
Opts::new(
"pezkuwi_teyrchain_fetched_chunks_total",
"Total number of fetched chunks.",
),
&["success"]
)?,
registry,
)?,
served_chunks: prometheus::register(
CounterVec::new(
Opts::new(
"pezkuwi_teyrchain_served_chunks_total",
"Total number of chunks served by this backer.",
),
&["success"]
)?,
registry,
)?,
fetched_povs: prometheus::register(
CounterVec::new(
Opts::new(
"pezkuwi_teyrchain_fetched_povs_total",
"Total number of povs fetches by this backer.",
),
&["success"]
)?,
registry,
)?,
served_povs: prometheus::register(
CounterVec::new(
Opts::new(
"pezkuwi_teyrchain_served_povs_total",
"Total number of povs served by this backer.",
),
&["success"]
)?,
registry,
)?,
retries: prometheus::register(
Counter::new(
"pezkuwi_teyrchain_fetch_retries_total",
"Number of times we did not succeed in fetching a chunk and needed to try more backers.",
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
}
@@ -0,0 +1,239 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
//! PoV requester takes care of requesting PoVs from validators of a backing group.
use futures::{channel::oneshot, future::BoxFuture, FutureExt};
use pezkuwi_node_network_protocol::request_response::{
outgoing::{RequestError, Requests},
v1::{PoVFetchingRequest, PoVFetchingResponse},
OutgoingRequest, Recipient,
};
use pezkuwi_node_primitives::PoV;
use pezkuwi_node_subsystem::{
messages::{IfDisconnected, NetworkBridgeTxMessage},
overseer,
};
use pezkuwi_node_subsystem_util::runtime::RuntimeInfo;
use pezkuwi_primitives::{AuthorityDiscoveryId, CandidateHash, Hash, Id as ParaId, ValidatorIndex};
use crate::{
error::{Error, FatalError, JfyiError, Result},
metrics::{FAILED, NOT_FOUND, SUCCEEDED},
Metrics, LOG_TARGET,
};
/// Start background worker for taking care of fetching the requested `PoV` from the network.
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
pub async fn fetch_pov<Context>(
ctx: &mut Context,
runtime: &mut RuntimeInfo,
parent: Hash,
from_validator: ValidatorIndex,
para_id: ParaId,
candidate_hash: CandidateHash,
pov_hash: Hash,
tx: oneshot::Sender<PoV>,
metrics: Metrics,
) -> Result<()> {
let info = &runtime.get_session_info(ctx.sender(), parent).await?.session_info;
let authority_id = info
.discovery_keys
.get(from_validator.0 as usize)
.ok_or(JfyiError::InvalidValidatorIndex)?
.clone();
let (req, pending_response) = OutgoingRequest::new(
Recipient::Authority(authority_id.clone()),
PoVFetchingRequest { candidate_hash },
);
let full_req = Requests::PoVFetchingV1(req);
ctx.send_message(NetworkBridgeTxMessage::SendRequests(
vec![full_req],
IfDisconnected::ImmediateError,
))
.await;
ctx.spawn(
"pov-fetcher",
fetch_pov_job(para_id, pov_hash, authority_id, pending_response.boxed(), tx, metrics)
.boxed(),
)
.map_err(|e| FatalError::SpawnTask(e))?;
Ok(())
}
/// Future to be spawned for taking care of handling reception and sending of PoV.
async fn fetch_pov_job(
para_id: ParaId,
pov_hash: Hash,
authority_id: AuthorityDiscoveryId,
pending_response: BoxFuture<'static, std::result::Result<PoVFetchingResponse, RequestError>>,
tx: oneshot::Sender<PoV>,
metrics: Metrics,
) {
if let Err(err) = do_fetch_pov(pov_hash, pending_response, tx, metrics).await {
gum::warn!(target: LOG_TARGET, ?err, ?para_id, ?pov_hash, ?authority_id, "fetch_pov_job");
}
}
/// Do the actual work of waiting for the response.
async fn do_fetch_pov(
pov_hash: Hash,
pending_response: BoxFuture<'static, std::result::Result<PoVFetchingResponse, RequestError>>,
tx: oneshot::Sender<PoV>,
metrics: Metrics,
) -> Result<()> {
let response = pending_response.await.map_err(Error::FetchPoV);
let pov = match response {
Ok(PoVFetchingResponse::PoV(pov)) => pov,
Ok(PoVFetchingResponse::NoSuchPoV) => {
metrics.on_fetched_pov(NOT_FOUND);
return Err(Error::NoSuchPoV);
},
Err(err) => {
metrics.on_fetched_pov(FAILED);
return Err(err);
},
};
if pov.hash() == pov_hash {
metrics.on_fetched_pov(SUCCEEDED);
tx.send(pov).map_err(|_| Error::SendResponse)
} else {
metrics.on_fetched_pov(FAILED);
Err(Error::UnexpectedPoV)
}
}
#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use futures::{executor, future};
use codec::Encode;
use sc_network::ProtocolName;
use sp_core::testing::TaskExecutor;
use pezkuwi_node_primitives::BlockData;
use pezkuwi_node_subsystem::messages::{
AllMessages, AvailabilityDistributionMessage, RuntimeApiMessage, RuntimeApiRequest,
};
use pezkuwi_node_subsystem_test_helpers as test_helpers;
use pezkuwi_primitives::{CandidateHash, ExecutorParams, Hash, NodeFeatures, ValidatorIndex};
use test_helpers::mock::make_ferdie_keystore;
use super::*;
use crate::{tests::mock::make_session_info, LOG_TARGET};
#[test]
fn rejects_invalid_pov() {
sp_tracing::try_init_simple();
let pov = PoV { block_data: BlockData(vec![1, 2, 3, 4, 5, 6]) };
test_run(Hash::default(), pov);
}
#[test]
fn accepts_valid_pov() {
sp_tracing::try_init_simple();
let pov = PoV { block_data: BlockData(vec![1, 2, 3, 4, 5, 6]) };
test_run(pov.hash(), pov);
}
fn test_run(pov_hash: Hash, pov: PoV) {
let pool = TaskExecutor::new();
let (mut context, mut virtual_overseer) =
pezkuwi_node_subsystem_test_helpers::make_subsystem_context::<
AvailabilityDistributionMessage,
TaskExecutor,
>(pool.clone());
let keystore = make_ferdie_keystore();
let mut runtime = pezkuwi_node_subsystem_util::runtime::RuntimeInfo::new(Some(keystore));
let (tx, rx) = oneshot::channel();
let testee = async {
fetch_pov(
&mut context,
&mut runtime,
Hash::default(),
ValidatorIndex(0),
ParaId::default(),
CandidateHash::default(),
pov_hash,
tx,
Metrics::new_dummy(),
)
.await
.expect("Should succeed");
};
let tester = async move {
loop {
match virtual_overseer.recv().await {
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
_,
RuntimeApiRequest::SessionIndexForChild(tx),
)) => {
tx.send(Ok(0)).unwrap();
},
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
_,
RuntimeApiRequest::SessionInfo(_, tx),
)) => {
tx.send(Ok(Some(make_session_info()))).unwrap();
},
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
_,
RuntimeApiRequest::SessionExecutorParams(_, tx),
)) => {
tx.send(Ok(Some(ExecutorParams::default()))).unwrap();
},
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
_,
RuntimeApiRequest::NodeFeatures(_, si_tx),
)) => {
si_tx.send(Ok(NodeFeatures::EMPTY)).unwrap();
},
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(
mut reqs,
_,
)) => {
let req = assert_matches!(
reqs.pop(),
Some(Requests::PoVFetchingV1(outgoing)) => {outgoing}
);
req.pending_response
.send(Ok((
PoVFetchingResponse::PoV(pov.clone()).encode(),
ProtocolName::from(""),
)))
.unwrap();
break;
},
msg => gum::debug!(target: LOG_TARGET, msg = ?msg, "Received msg"),
}
}
if pov.hash() == pov_hash {
assert_eq!(rx.await, Ok(pov));
} else {
assert_eq!(rx.await, Err(oneshot::Canceled));
}
};
futures::pin_mut!(testee);
futures::pin_mut!(tester);
executor::block_on(future::join(testee, tester));
}
}
@@ -0,0 +1,560 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use std::collections::HashSet;
use futures::{
channel::{mpsc, oneshot},
future::select,
FutureExt, SinkExt,
};
use codec::Decode;
use pezkuwi_erasure_coding::branch_hash;
use pezkuwi_node_network_protocol::request_response::{
outgoing::{OutgoingRequest, Recipient, RequestError, Requests},
v1::{self, ChunkResponse},
v2,
};
use pezkuwi_node_primitives::ErasureChunk;
use pezkuwi_node_subsystem::{
messages::{AvailabilityStoreMessage, IfDisconnected, NetworkBridgeTxMessage},
overseer,
};
use pezkuwi_primitives::{
AuthorityDiscoveryId, BlakeTwo256, CandidateHash, ChunkIndex, GroupIndex, Hash, HashT,
OccupiedCore, SessionIndex,
};
use sc_network::ProtocolName;
use crate::{
error::{FatalError, Result},
metrics::{Metrics, FAILED, SUCCEEDED},
requester::session_cache::{BadValidators, SessionInfo},
LOG_TARGET,
};
#[cfg(test)]
mod tests;
/// Configuration for a `FetchTask`
///
/// This exists to separate preparation of a `FetchTask` from actual starting it, which is
/// beneficial as this allows as for taking session info by reference.
pub struct FetchTaskConfig {
prepared_running: Option<RunningTask>,
live_in: HashSet<Hash>,
}
/// Information about a task fetching an erasure chunk.
pub struct FetchTask {
/// For what relay parents this task is relevant.
///
/// In other words, for which relay chain parents this candidate is considered live.
/// This is updated on every `ActiveLeavesUpdate` and enables us to know when we can safely
/// stop keeping track of that candidate/chunk.
pub(crate) live_in: HashSet<Hash>,
/// We keep the task around in until `live_in` becomes empty, to make
/// sure we won't re-fetch an already fetched candidate.
state: FetchedState,
}
/// State of a particular candidate chunk fetching process.
enum FetchedState {
/// Chunk fetch has started.
///
/// Once the contained `Sender` is dropped, any still running task will be canceled.
Started(oneshot::Sender<()>),
/// All relevant `live_in` have been removed, before we were able to get our chunk.
Canceled,
}
/// Messages sent from `FetchTask`s to be handled/forwarded.
pub enum FromFetchTask {
/// Message to other subsystem.
Message(overseer::AvailabilityDistributionOutgoingMessages),
/// Concluded with result.
///
/// In case of `None` everything was fine, in case of `Some`, some validators in the group
/// did not serve us our chunk as expected.
Concluded(Option<BadValidators>),
/// We were not able to fetch the desired chunk for the given `CandidateHash`.
Failed(CandidateHash),
}
/// Information a running task needs.
struct RunningTask {
/// For what session we have been spawned.
session_index: SessionIndex,
/// Index of validator group to fetch the chunk from.
///
/// Needed for reporting bad validators.
group_index: GroupIndex,
/// Validators to request the chunk from.
///
/// This vector gets drained during execution of the task (it will be empty afterwards).
group: Vec<AuthorityDiscoveryId>,
/// The request to send. We can store it as either v1 or v2, they have the same payload.
request: v2::ChunkFetchingRequest,
/// Root hash, for verifying the chunks validity.
erasure_root: Hash,
/// Relay parent of the candidate to fetch.
relay_parent: Hash,
/// Sender for communicating with other subsystems and reporting results.
sender: mpsc::Sender<FromFetchTask>,
/// Prometheus metrics for reporting results.
metrics: Metrics,
/// Expected chunk index. We'll validate that the remote did send us the correct chunk (only
/// important for v2 requests).
chunk_index: ChunkIndex,
/// Full protocol name for ChunkFetchingV1.
req_v1_protocol_name: ProtocolName,
/// Full protocol name for ChunkFetchingV2.
req_v2_protocol_name: ProtocolName,
}
impl FetchTaskConfig {
/// Create a new configuration for a [`FetchTask`].
///
/// The result of this function can be passed into [`FetchTask::start`].
pub fn new(
leaf: Hash,
core: &OccupiedCore,
sender: mpsc::Sender<FromFetchTask>,
metrics: Metrics,
session_info: &SessionInfo,
chunk_index: ChunkIndex,
req_v1_protocol_name: ProtocolName,
req_v2_protocol_name: ProtocolName,
) -> Self {
let live_in = vec![leaf].into_iter().collect();
// Don't run tasks for our backing group:
if session_info.our_group == Some(core.group_responsible) {
return FetchTaskConfig { live_in, prepared_running: None };
}
let prepared_running = RunningTask {
session_index: session_info.session_index,
group_index: core.group_responsible,
group: session_info.validator_groups.get(core.group_responsible.0 as usize)
.expect("The responsible group of a candidate should be available in the corresponding session. qed.")
.clone(),
request: v2::ChunkFetchingRequest {
candidate_hash: core.candidate_hash,
index: session_info.our_index,
},
erasure_root: core.candidate_descriptor.erasure_root(),
relay_parent: core.candidate_descriptor.relay_parent(),
metrics,
sender,
chunk_index,
req_v1_protocol_name,
req_v2_protocol_name
};
FetchTaskConfig { live_in, prepared_running: Some(prepared_running) }
}
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl FetchTask {
/// Start fetching a chunk.
///
/// A task handling the fetching of the configured chunk will be spawned.
pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self> {
let FetchTaskConfig { prepared_running, live_in } = config;
if let Some(running) = prepared_running {
let (handle, kill) = oneshot::channel();
ctx.spawn("chunk-fetcher", running.run(kill).boxed())
.map_err(|e| FatalError::SpawnTask(e))?;
Ok(FetchTask { live_in, state: FetchedState::Started(handle) })
} else {
Ok(FetchTask { live_in, state: FetchedState::Canceled })
}
}
/// Add the given leaf to the relay parents which are making this task relevant.
///
/// This is for book keeping, so we know we are already fetching a given chunk.
pub fn add_leaf(&mut self, leaf: Hash) {
self.live_in.insert(leaf);
}
/// Remove leaves and cancel the task, if it was the last one and the task has still been
/// fetching.
pub fn remove_leaves(&mut self, leaves: &HashSet<Hash>) {
for leaf in leaves {
self.live_in.remove(leaf);
}
if self.live_in.is_empty() && !self.is_finished() {
self.state = FetchedState::Canceled
}
}
/// Whether there are still relay parents around with this candidate pending
/// availability.
pub fn is_live(&self) -> bool {
!self.live_in.is_empty()
}
/// Whether this task can be considered finished.
///
/// That is, it is either canceled, succeeded or failed.
pub fn is_finished(&self) -> bool {
match &self.state {
FetchedState::Canceled => true,
FetchedState::Started(sender) => sender.is_canceled(),
}
}
}
/// Things that can go wrong in task execution.
#[derive(Debug)]
enum TaskError {
/// The peer failed to deliver a correct chunk for some reason (has been reported as
/// appropriate).
PeerError,
/// This very node is seemingly shutting down (sending of message failed).
ShuttingDown,
}
impl RunningTask {
async fn run(self, kill: oneshot::Receiver<()>) {
// Wait for completion/or cancel.
let run_it = self.run_inner();
futures::pin_mut!(run_it);
let _ = select(run_it, kill).await;
}
/// Fetch and store chunk.
///
/// Try validators in backing group in order.
async fn run_inner(mut self) {
let mut bad_validators = Vec::new();
let mut succeeded = false;
let mut count: u32 = 0;
let mut network_error_freq = gum::Freq::new();
let mut canceled_freq = gum::Freq::new();
// Try validators in reverse order:
while let Some(validator) = self.group.pop() {
// Report retries:
if count > 0 {
self.metrics.on_retry();
}
count += 1;
// Send request:
let resp = match self
.do_request(&validator, &mut network_error_freq, &mut canceled_freq)
.await
{
Ok(resp) => resp,
Err(TaskError::ShuttingDown) => {
gum::info!(
target: LOG_TARGET,
"Node seems to be shutting down, canceling fetch task"
);
self.metrics.on_fetch(FAILED);
return;
},
Err(TaskError::PeerError) => {
bad_validators.push(validator);
continue;
},
};
let chunk = match resp {
Some(chunk) => chunk,
None => {
gum::debug!(
target: LOG_TARGET,
validator = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
"Validator did not have our chunk"
);
bad_validators.push(validator);
continue;
},
};
// Data genuine?
if !self.validate_chunk(&validator, &chunk, self.chunk_index) {
bad_validators.push(validator);
continue;
}
// Ok, let's store it and be happy:
self.store_chunk(chunk).await;
succeeded = true;
break;
}
if succeeded {
self.metrics.on_fetch(SUCCEEDED);
self.conclude(bad_validators).await;
} else {
self.metrics.on_fetch(FAILED);
self.conclude_fail().await
}
}
/// Do request and return response, if successful.
async fn do_request(
&mut self,
validator: &AuthorityDiscoveryId,
network_error_freq: &mut gum::Freq,
canceled_freq: &mut gum::Freq,
) -> std::result::Result<Option<ErasureChunk>, TaskError> {
gum::trace!(
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
"Starting chunk request",
);
let (full_request, response_recv) = OutgoingRequest::new_with_fallback(
Recipient::Authority(validator.clone()),
self.request,
// Fallback to v1, for backwards compatibility.
v1::ChunkFetchingRequest::from(self.request),
);
let requests = Requests::ChunkFetching(full_request);
self.sender
.send(FromFetchTask::Message(
NetworkBridgeTxMessage::SendRequests(
vec![requests],
IfDisconnected::ImmediateError,
)
.into(),
))
.await
.map_err(|_| TaskError::ShuttingDown)?;
match response_recv.await {
Ok((bytes, protocol)) => match protocol {
_ if protocol == self.req_v2_protocol_name =>
match v2::ChunkFetchingResponse::decode(&mut &bytes[..]) {
Ok(chunk_response) => Ok(Option::<ErasureChunk>::from(chunk_response)),
Err(e) => {
gum::warn!(
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
err = ?e,
"Peer sent us invalid erasure chunk data (v2)"
);
Err(TaskError::PeerError)
},
},
_ if protocol == self.req_v1_protocol_name =>
match v1::ChunkFetchingResponse::decode(&mut &bytes[..]) {
Ok(chunk_response) => Ok(Option::<ChunkResponse>::from(chunk_response)
.map(|c| c.recombine_into_chunk(&self.request.into()))),
Err(e) => {
gum::warn!(
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
err = ?e,
"Peer sent us invalid erasure chunk data"
);
Err(TaskError::PeerError)
},
},
_ => {
gum::warn!(
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
"Peer sent us invalid erasure chunk data - unknown protocol"
);
Err(TaskError::PeerError)
},
},
Err(RequestError::InvalidResponse(err)) => {
gum::warn!(
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
err = ?err,
"Peer sent us invalid erasure chunk data"
);
Err(TaskError::PeerError)
},
Err(RequestError::NetworkError(err)) => {
gum::warn_if_frequent!(
freq: network_error_freq,
max_rate: gum::Times::PerHour(100),
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
err = ?err,
"Some network error occurred when fetching erasure chunk"
);
Err(TaskError::PeerError)
},
Err(RequestError::Canceled(oneshot::Canceled)) => {
gum::warn_if_frequent!(
freq: canceled_freq,
max_rate: gum::Times::PerHour(100),
target: LOG_TARGET,
origin = ?validator,
relay_parent = ?self.relay_parent,
group_index = ?self.group_index,
session_index = ?self.session_index,
chunk_index = ?self.request.index,
candidate_hash = ?self.request.candidate_hash,
"Erasure chunk request got canceled"
);
Err(TaskError::PeerError)
},
}
}
fn validate_chunk(
&self,
validator: &AuthorityDiscoveryId,
chunk: &ErasureChunk,
expected_chunk_index: ChunkIndex,
) -> bool {
if chunk.index != expected_chunk_index {
gum::warn!(
target: LOG_TARGET,
candidate_hash = ?self.request.candidate_hash,
origin = ?validator,
chunk_index = ?chunk.index,
expected_chunk_index = ?expected_chunk_index,
"Validator sent the wrong chunk",
);
return false;
}
let anticipated_hash =
match branch_hash(&self.erasure_root, chunk.proof(), chunk.index.0 as usize) {
Ok(hash) => hash,
Err(e) => {
gum::warn!(
target: LOG_TARGET,
candidate_hash = ?self.request.candidate_hash,
origin = ?validator,
error = ?e,
"Failed to calculate chunk merkle proof",
);
return false;
},
};
let erasure_chunk_hash = BlakeTwo256::hash(&chunk.chunk);
if anticipated_hash != erasure_chunk_hash {
gum::warn!(target: LOG_TARGET, origin = ?validator, "Received chunk does not match merkle tree");
return false;
}
true
}
/// Store given chunk and log any error.
async fn store_chunk(&mut self, chunk: ErasureChunk) {
let (tx, rx) = oneshot::channel();
let r = self
.sender
.send(FromFetchTask::Message(
AvailabilityStoreMessage::StoreChunk {
candidate_hash: self.request.candidate_hash,
chunk,
validator_index: self.request.index,
tx,
}
.into(),
))
.await;
if let Err(err) = r {
gum::error!(target: LOG_TARGET, err= ?err, "Storing erasure chunk failed, system shutting down?");
}
if let Err(oneshot::Canceled) = rx.await {
gum::error!(target: LOG_TARGET, "Storing erasure chunk failed");
}
}
/// Tell subsystem we are done.
async fn conclude(&mut self, bad_validators: Vec<AuthorityDiscoveryId>) {
let payload = if bad_validators.is_empty() {
None
} else {
Some(BadValidators {
session_index: self.session_index,
group_index: self.group_index,
bad_validators,
})
};
if let Err(err) = self.sender.send(FromFetchTask::Concluded(payload)).await {
gum::warn!(
target: LOG_TARGET,
err= ?err,
"Sending concluded message for task failed"
);
}
}
async fn conclude_fail(&mut self) {
if let Err(err) = self.sender.send(FromFetchTask::Failed(self.request.candidate_hash)).await
{
gum::warn!(target: LOG_TARGET, ?err, "Sending `Failed` message for task failed");
}
}
}
@@ -0,0 +1,400 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use std::collections::HashMap;
use codec::Encode;
use futures::{
channel::{mpsc, oneshot},
executor, select,
task::{noop_waker, Context, Poll},
Future, FutureExt, StreamExt,
};
use rstest::rstest;
use sc_network::{self as network, ProtocolName};
use sp_keyring::Sr25519Keyring;
use pezkuwi_node_network_protocol::request_response::{
v1::{self, ChunkResponse},
Protocol, Recipient, ReqProtocolNames,
};
use pezkuwi_node_primitives::{BlockData, PoV, Proof};
use pezkuwi_node_subsystem::messages::AllMessages;
use pezkuwi_primitives::{CandidateHash, ChunkIndex, ValidatorIndex};
use super::*;
use crate::{metrics::Metrics, tests::mock::get_valid_chunk_data};
#[test]
fn task_can_be_canceled() {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
let (task, _rx) = get_test_running_task(&req_protocol_names, 0.into(), 0.into());
let (handle, kill) = oneshot::channel();
std::mem::drop(handle);
let running_task = task.run(kill);
futures::pin_mut!(running_task);
let waker = noop_waker();
let mut ctx = Context::from_waker(&waker);
assert!(running_task.poll(&mut ctx) == Poll::Ready(()), "Task is immediately finished");
}
/// Make sure task won't accept a chunk that has is invalid.
#[rstest]
#[case(Protocol::ChunkFetchingV1)]
#[case(Protocol::ChunkFetchingV2)]
fn task_does_not_accept_invalid_chunk(#[case] protocol: Protocol) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
let chunk_index = ChunkIndex(1);
let validator_index = ValidatorIndex(0);
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
let validators = vec![Sr25519Keyring::Alice.public().into()];
task.group = validators;
let protocol_name = req_protocol_names.get_name(protocol);
let test = TestRun {
chunk_responses: {
[(
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
get_response(
protocol,
protocol_name.clone(),
Some((
vec![1, 2, 3],
Proof::try_from(vec![vec![9, 8, 2], vec![2, 3, 4]]).unwrap(),
chunk_index,
)),
),
)]
.into_iter()
.collect()
},
valid_chunks: HashSet::new(),
req_protocol_names,
};
test.run(task, rx);
}
#[rstest]
#[case(Protocol::ChunkFetchingV1)]
#[case(Protocol::ChunkFetchingV2)]
fn task_stores_valid_chunk(#[case] protocol: Protocol) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
// In order for protocol version 1 to work, the chunk index needs to be equal to the validator
// index.
let chunk_index = ChunkIndex(0);
let validator_index =
if protocol == Protocol::ChunkFetchingV1 { ValidatorIndex(0) } else { ValidatorIndex(1) };
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
let validators = vec![Sr25519Keyring::Alice.public().into()];
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
let (root_hash, chunk) = get_valid_chunk_data(pov, 10, chunk_index);
task.erasure_root = root_hash;
task.group = validators;
let protocol_name = req_protocol_names.get_name(protocol);
let test = TestRun {
chunk_responses: {
[(
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
get_response(
protocol,
protocol_name.clone(),
Some((chunk.chunk.clone(), chunk.proof, chunk_index)),
),
)]
.into_iter()
.collect()
},
valid_chunks: [(chunk.chunk)].into_iter().collect(),
req_protocol_names,
};
test.run(task, rx);
}
#[rstest]
#[case(Protocol::ChunkFetchingV1)]
#[case(Protocol::ChunkFetchingV2)]
fn task_does_not_accept_wrongly_indexed_chunk(#[case] protocol: Protocol) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
// In order for protocol version 1 to work, the chunk index needs to be equal to the validator
// index.
let chunk_index = ChunkIndex(0);
let validator_index =
if protocol == Protocol::ChunkFetchingV1 { ValidatorIndex(0) } else { ValidatorIndex(1) };
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
let validators = vec![Sr25519Keyring::Alice.public().into()];
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
let (_, other_chunk) = get_valid_chunk_data(pov.clone(), 10, ChunkIndex(3));
let (root_hash, chunk) = get_valid_chunk_data(pov, 10, ChunkIndex(0));
task.erasure_root = root_hash;
task.request.index = chunk.index.into();
task.group = validators;
let protocol_name = req_protocol_names.get_name(protocol);
let test = TestRun {
chunk_responses: {
[(
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
get_response(
protocol,
protocol_name.clone(),
Some((other_chunk.chunk.clone(), chunk.proof, other_chunk.index)),
),
)]
.into_iter()
.collect()
},
valid_chunks: HashSet::new(),
req_protocol_names,
};
test.run(task, rx);
}
/// Task stores chunk, if there is at least one validator having a valid chunk.
#[rstest]
#[case(Protocol::ChunkFetchingV1)]
#[case(Protocol::ChunkFetchingV2)]
fn task_stores_valid_chunk_if_there_is_one(#[case] protocol: Protocol) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
// In order for protocol version 1 to work, the chunk index needs to be equal to the validator
// index.
let chunk_index = ChunkIndex(1);
let validator_index =
if protocol == Protocol::ChunkFetchingV1 { ValidatorIndex(1) } else { ValidatorIndex(2) };
let (mut task, rx) = get_test_running_task(&req_protocol_names, validator_index, chunk_index);
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
let validators = [
// Only Alice has valid chunk - should succeed, even though she is tried last.
Sr25519Keyring::Alice,
Sr25519Keyring::Bob,
Sr25519Keyring::Charlie,
Sr25519Keyring::Dave,
Sr25519Keyring::Eve,
]
.iter()
.map(|v| v.public().into())
.collect::<Vec<_>>();
let (root_hash, chunk) = get_valid_chunk_data(pov, 10, chunk_index);
task.erasure_root = root_hash;
task.group = validators;
let protocol_name = req_protocol_names.get_name(protocol);
let test = TestRun {
chunk_responses: {
[
(
Recipient::Authority(Sr25519Keyring::Alice.public().into()),
get_response(
protocol,
protocol_name.clone(),
Some((chunk.chunk.clone(), chunk.proof, chunk_index)),
),
),
(
Recipient::Authority(Sr25519Keyring::Bob.public().into()),
get_response(protocol, protocol_name.clone(), None),
),
(
Recipient::Authority(Sr25519Keyring::Charlie.public().into()),
get_response(
protocol,
protocol_name.clone(),
Some((
vec![1, 2, 3],
Proof::try_from(vec![vec![9, 8, 2], vec![2, 3, 4]]).unwrap(),
chunk_index,
)),
),
),
]
.into_iter()
.collect()
},
valid_chunks: [(chunk.chunk)].into_iter().collect(),
req_protocol_names,
};
test.run(task, rx);
}
struct TestRun {
/// Response to deliver for a given validator index.
/// None means, answer with `NetworkError`.
chunk_responses: HashMap<Recipient, (Vec<u8>, ProtocolName)>,
/// Set of chunks that should be considered valid:
valid_chunks: HashSet<Vec<u8>>,
/// Request protocol names
req_protocol_names: ReqProtocolNames,
}
impl TestRun {
fn run(self, task: RunningTask, rx: mpsc::Receiver<FromFetchTask>) {
sp_tracing::init_for_tests();
let mut rx = rx.fuse();
let task = task.run_inner().fuse();
futures::pin_mut!(task);
executor::block_on(async {
let mut end_ok = false;
loop {
let msg = select!(
from_task = rx.next() => {
match from_task {
Some(msg) => msg,
None => break,
}
},
() = task =>
break,
);
match msg {
FromFetchTask::Concluded(_) => break,
FromFetchTask::Failed(_) => break,
FromFetchTask::Message(msg) => end_ok = self.handle_message(msg).await,
}
}
if !end_ok {
panic!("Task ended prematurely (failed to store valid chunk)!");
}
});
}
/// Returns true, if after processing of the given message it would be OK for the stream to
/// end.
async fn handle_message(
&self,
msg: overseer::AvailabilityDistributionOutgoingMessages,
) -> bool {
let msg = AllMessages::from(msg);
match msg {
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(
reqs,
IfDisconnected::ImmediateError,
)) => {
let mut valid_responses = 0;
for req in reqs {
let req = match req {
Requests::ChunkFetching(req) => req,
_ => panic!("Unexpected request"),
};
let response =
self.chunk_responses.get(&req.peer).ok_or(network::RequestFailure::Refused);
if let Ok((resp, protocol)) = response {
let chunk = if protocol ==
&self.req_protocol_names.get_name(Protocol::ChunkFetchingV1)
{
Into::<Option<v1::ChunkResponse>>::into(
v1::ChunkFetchingResponse::decode(&mut &resp[..]).unwrap(),
)
.map(|c| c.chunk)
} else if protocol ==
&self.req_protocol_names.get_name(Protocol::ChunkFetchingV2)
{
Into::<Option<ErasureChunk>>::into(
v2::ChunkFetchingResponse::decode(&mut &resp[..]).unwrap(),
)
.map(|c| c.chunk)
} else {
unreachable!()
};
if let Some(chunk) = chunk {
if self.valid_chunks.contains(&chunk) {
valid_responses += 1;
}
}
req.pending_response
.send(response.cloned())
.expect("Sending response should succeed");
}
}
return (valid_responses == 0) && self.valid_chunks.is_empty();
},
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk {
chunk,
tx,
..
}) => {
assert!(self.valid_chunks.contains(&chunk.chunk));
tx.send(Ok(())).expect("Answering fetching task should work");
return true;
},
_ => {
gum::debug!(target: LOG_TARGET, "Unexpected message");
return false;
},
}
}
}
/// Get a `RunningTask` filled with (mostly) dummy values.
fn get_test_running_task(
req_protocol_names: &ReqProtocolNames,
validator_index: ValidatorIndex,
chunk_index: ChunkIndex,
) -> (RunningTask, mpsc::Receiver<FromFetchTask>) {
let (tx, rx) = mpsc::channel(0);
(
RunningTask {
session_index: 0,
group_index: GroupIndex(0),
group: Vec::new(),
request: v2::ChunkFetchingRequest {
candidate_hash: CandidateHash([43u8; 32].into()),
index: validator_index,
},
erasure_root: Hash::repeat_byte(99),
relay_parent: Hash::repeat_byte(71),
sender: tx,
metrics: Metrics::new_dummy(),
req_v1_protocol_name: req_protocol_names.get_name(Protocol::ChunkFetchingV1),
req_v2_protocol_name: req_protocol_names.get_name(Protocol::ChunkFetchingV2),
chunk_index,
},
rx,
)
}
/// Make a versioned ChunkFetchingResponse.
fn get_response(
protocol: Protocol,
protocol_name: ProtocolName,
chunk: Option<(Vec<u8>, Proof, ChunkIndex)>,
) -> (Vec<u8>, ProtocolName) {
(
match protocol {
Protocol::ChunkFetchingV1 => if let Some((chunk, proof, _)) = chunk {
v1::ChunkFetchingResponse::Chunk(ChunkResponse { chunk, proof })
} else {
v1::ChunkFetchingResponse::NoSuchChunk
}
.encode(),
Protocol::ChunkFetchingV2 => if let Some((chunk, proof, index)) = chunk {
v2::ChunkFetchingResponse::Chunk(ErasureChunk { chunk, index, proof })
} else {
v2::ChunkFetchingResponse::NoSuchChunk
}
.encode(),
_ => unreachable!(),
},
protocol_name,
)
}
@@ -0,0 +1,349 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
//! Requester takes care of requesting erasure chunks for candidates that are pending
//! availability.
use std::{
collections::{hash_map::HashMap, hash_set::HashSet},
iter::IntoIterator,
pin::Pin,
};
use futures::{
channel::{mpsc, oneshot},
task::{Context, Poll},
Stream,
};
use pezkuwi_node_network_protocol::request_response::{v1, v2, IsRequest, ReqProtocolNames};
use pezkuwi_node_subsystem::{
messages::{ChainApiMessage, RuntimeApiMessage},
overseer, ActivatedLeaf, ActiveLeavesUpdate,
};
use pezkuwi_node_subsystem_util::{
availability_chunks::availability_chunk_index,
runtime::{get_occupied_cores, RuntimeInfo},
};
use pezkuwi_primitives::{CandidateHash, CoreIndex, Hash, OccupiedCore, SessionIndex};
use super::{FatalError, Metrics, Result, LOG_TARGET};
#[cfg(test)]
mod tests;
/// Cache for session information.
mod session_cache;
use session_cache::SessionCache;
/// A task fetching a particular chunk.
mod fetch_task;
use fetch_task::{FetchTask, FetchTaskConfig, FromFetchTask};
/// Requester takes care of requesting erasure chunks from backing groups and stores them in the
/// av store.
///
/// It implements a stream that needs to be advanced for it making progress.
pub struct Requester {
/// Candidates we need to fetch our chunk for.
///
/// We keep those around as long as a candidate is pending availability on some leaf, so we
/// won't fetch chunks multiple times.
///
/// We remove them on failure, so we get retries on the next block still pending availability.
fetches: HashMap<CandidateHash, FetchTask>,
/// Localized information about sessions we are currently interested in.
session_cache: SessionCache,
/// Sender to be cloned for `FetchTask`s.
tx: mpsc::Sender<FromFetchTask>,
/// Receive messages from `FetchTask`.
rx: mpsc::Receiver<FromFetchTask>,
/// Prometheus Metrics
metrics: Metrics,
/// Mapping of the req-response protocols to the full protocol names.
req_protocol_names: ReqProtocolNames,
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl Requester {
/// How many ancestors of the leaf should we consider along with it.
pub(crate) const LEAF_ANCESTRY_LEN_WITHIN_SESSION: usize = 3;
/// Create a new `Requester`.
///
/// You must feed it with `ActiveLeavesUpdate` via `update_fetching_heads` and make it progress
/// by advancing the stream.
pub fn new(req_protocol_names: ReqProtocolNames, metrics: Metrics) -> Self {
let (tx, rx) = mpsc::channel(1);
Requester {
fetches: HashMap::new(),
session_cache: SessionCache::new(),
tx,
rx,
metrics,
req_protocol_names,
}
}
/// Update heads that need availability distribution.
///
/// For all active heads we will be fetching our chunks for availability distribution.
pub async fn update_fetching_heads<Context>(
&mut self,
ctx: &mut Context,
runtime: &mut RuntimeInfo,
update: ActiveLeavesUpdate,
) -> Result<()> {
gum::trace!(target: LOG_TARGET, ?update, "Update fetching heads");
let ActiveLeavesUpdate { activated, deactivated } = update;
if let Some(leaf) = activated {
// Order important! We need to handle activated, prior to deactivated, otherwise we
// might cancel still needed jobs.
self.start_requesting_chunks(ctx, runtime, leaf).await?;
}
self.stop_requesting_chunks(deactivated.into_iter());
Ok(())
}
/// Start requesting chunks for newly imported head.
///
/// This will also request [`SESSION_ANCESTRY_LEN`] leaf ancestors from the same session
/// and start requesting chunks for them too.
async fn start_requesting_chunks<Context>(
&mut self,
ctx: &mut Context,
runtime: &mut RuntimeInfo,
new_head: ActivatedLeaf,
) -> Result<()> {
let sender = &mut ctx.sender().clone();
let ActivatedLeaf { hash: leaf, .. } = new_head;
let (leaf_session_index, ancestors_in_session) = get_block_ancestors_in_same_session(
sender,
runtime,
leaf,
Self::LEAF_ANCESTRY_LEN_WITHIN_SESSION,
)
.await?;
// Also spawn or bump tasks for candidates in ancestry in the same session.
for hash in std::iter::once(leaf).chain(ancestors_in_session) {
let cores = get_occupied_cores(sender, hash).await?;
gum::trace!(
target: LOG_TARGET,
occupied_cores = ?cores,
"Query occupied core"
);
// Important:
// We mark the whole ancestry as live in the **leaf** hash, so we don't need to track
// any tasks separately.
//
// The next time the subsystem receives leaf update, some of spawned task will be bumped
// to be live in fresh relay parent, while some might get dropped due to the current
// leaf being deactivated.
self.add_cores(ctx, runtime, leaf, leaf_session_index, cores).await?;
}
Ok(())
}
/// Stop requesting chunks for obsolete heads.
fn stop_requesting_chunks(&mut self, obsolete_leaves: impl Iterator<Item = Hash>) {
let obsolete_leaves: HashSet<_> = obsolete_leaves.collect();
self.fetches.retain(|_, task| {
task.remove_leaves(&obsolete_leaves);
task.is_live()
})
}
/// Add candidates corresponding for a particular relay parent.
///
/// Starting requests where necessary.
///
/// Note: The passed in `leaf` is not the same as `CandidateDescriptor::relay_parent` in the
/// given cores. The latter is the `relay_parent` this candidate considers its parent, while the
/// passed in leaf might be some later block where the candidate is still pending availability.
async fn add_cores<Context>(
&mut self,
context: &mut Context,
runtime: &mut RuntimeInfo,
leaf: Hash,
leaf_session_index: SessionIndex,
cores: impl IntoIterator<Item = (CoreIndex, OccupiedCore)>,
) -> Result<()> {
for (core_index, core) in cores {
if let Some(e) = self.fetches.get_mut(&core.candidate_hash) {
// Just book keeping - we are already requesting that chunk:
e.add_leaf(leaf);
} else {
let tx = self.tx.clone();
let metrics = self.metrics.clone();
let session_info = self
.session_cache
.get_session_info(
context,
runtime,
// We use leaf here, the relay_parent must be in the same session as
// the leaf. This is guaranteed by runtime which ensures that cores are
// cleared at session boundaries. At the same time, only leaves are
// guaranteed to be fetchable by the state trie.
leaf,
leaf_session_index,
)
.await
.map_err(|err| {
gum::warn!(
target: LOG_TARGET,
error = ?err,
"Failed to spawn a fetch task"
);
err
})?;
if let Some(session_info) = session_info {
let n_validators =
session_info.validator_groups.iter().fold(0usize, |mut acc, group| {
acc = acc.saturating_add(group.len());
acc
});
let chunk_index = availability_chunk_index(
session_info.node_features.as_ref(),
n_validators,
core_index,
session_info.our_index,
)?;
let task_cfg = FetchTaskConfig::new(
leaf,
&core,
tx,
metrics,
session_info,
chunk_index,
self.req_protocol_names.get_name(v1::ChunkFetchingRequest::PROTOCOL),
self.req_protocol_names.get_name(v2::ChunkFetchingRequest::PROTOCOL),
);
self.fetches
.insert(core.candidate_hash, FetchTask::start(task_cfg, context).await?);
}
}
}
Ok(())
}
}
impl Stream for Requester {
type Item = overseer::AvailabilityDistributionOutgoingMessages;
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context) -> Poll<Option<Self::Item>> {
loop {
match Pin::new(&mut self.rx).poll_next(ctx) {
Poll::Ready(Some(FromFetchTask::Message(m))) => return Poll::Ready(Some(m)),
Poll::Ready(Some(FromFetchTask::Concluded(Some(bad_boys)))) => {
self.session_cache.report_bad_log(bad_boys);
continue;
},
Poll::Ready(Some(FromFetchTask::Concluded(None))) => continue,
Poll::Ready(Some(FromFetchTask::Failed(candidate_hash))) => {
// Make sure we retry on next block still pending availability.
self.fetches.remove(&candidate_hash);
},
Poll::Ready(None) => return Poll::Ready(None),
Poll::Pending => return Poll::Pending,
}
}
}
}
/// Requests up to `limit` ancestor hashes of relay parent in the same session.
///
/// Also returns session index of the `head`.
async fn get_block_ancestors_in_same_session<Sender>(
sender: &mut Sender,
runtime: &mut RuntimeInfo,
head: Hash,
limit: usize,
) -> Result<(SessionIndex, Vec<Hash>)>
where
Sender:
overseer::SubsystemSender<RuntimeApiMessage> + overseer::SubsystemSender<ChainApiMessage>,
{
// The order is parent, grandparent, ...
//
// `limit + 1` since a session index for the last element in ancestry
// is obtained through its parent. It always gets truncated because
// `session_ancestry_len` can only be incremented `ancestors.len() - 1` times.
let mut ancestors = get_block_ancestors(sender, head, limit + 1).await?;
let mut ancestors_iter = ancestors.iter();
// `head` is the child of the first block in `ancestors`, request its session index.
let head_session_index = match ancestors_iter.next() {
Some(parent) => runtime.get_session_index_for_child(sender, *parent).await?,
None => {
// No first element, i.e. empty.
return Ok((0, ancestors));
},
};
let mut session_ancestry_len = 0;
// The first parent is skipped.
for parent in ancestors_iter {
// Parent is the i-th ancestor, request session index for its child -- (i-1)th element.
let session_index = runtime.get_session_index_for_child(sender, *parent).await?;
if session_index == head_session_index {
session_ancestry_len += 1;
} else {
break;
}
}
// Drop the rest.
ancestors.truncate(session_ancestry_len);
Ok((head_session_index, ancestors))
}
/// Request up to `limit` ancestor hashes of relay parent from the Chain API.
async fn get_block_ancestors<Sender>(
sender: &mut Sender,
relay_parent: Hash,
limit: usize,
) -> Result<Vec<Hash>>
where
Sender: overseer::SubsystemSender<ChainApiMessage>,
{
let (tx, rx) = oneshot::channel();
sender
.send_message(ChainApiMessage::Ancestors {
hash: relay_parent,
k: limit,
response_channel: tx,
})
.await;
let ancestors = rx
.await
.map_err(FatalError::ChainApiSenderDropped)?
.map_err(FatalError::ChainApi)?;
Ok(ancestors)
}
@@ -0,0 +1,221 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use std::collections::HashSet;
use rand::{seq::SliceRandom, thread_rng};
use schnellru::{ByLength, LruMap};
use pezkuwi_node_subsystem::overseer;
use pezkuwi_node_subsystem_util::{request_node_features, runtime::RuntimeInfo};
use pezkuwi_primitives::{
AuthorityDiscoveryId, GroupIndex, Hash, NodeFeatures, SessionIndex, ValidatorIndex,
};
use crate::{
error::{Error, Result},
LOG_TARGET,
};
/// Caching of session info as needed by availability chunk distribution.
///
/// It should be ensured that a cached session stays live in the cache as long as we might need it.
pub struct SessionCache {
/// Look up cached sessions by `SessionIndex`.
///
/// Note: Performance of fetching is really secondary here, but we need to ensure we are going
/// to get any existing cache entry, before fetching new information, as we should not mess up
/// the order of validators in `SessionInfo::validator_groups`.
session_info_cache: LruMap<SessionIndex, SessionInfo>,
}
/// Localized session information, tailored for the needs of availability distribution.
#[derive(Clone)]
pub struct SessionInfo {
/// The index of this session.
pub session_index: SessionIndex,
/// Validator groups of the current session.
///
/// Each group's order is randomized. This way we achieve load balancing when requesting
/// chunks, as the validators in a group will be tried in that randomized order. Each node
/// should arrive at a different order, therefore we distribute the load on individual
/// validators.
pub validator_groups: Vec<Vec<AuthorityDiscoveryId>>,
/// Information about ourselves:
pub our_index: ValidatorIndex,
/// Remember to which group we belong, so we won't start fetching chunks for candidates with
/// our group being responsible. (We should have that chunk already.)
///
/// `None`, if we are not in fact part of any group.
pub our_group: Option<GroupIndex>,
/// Node features.
pub node_features: NodeFeatures,
}
/// Report of bad validators.
///
/// Fetching tasks will report back validators that did not respond as expected, so we can re-order
/// them.
pub struct BadValidators {
/// The session index that was used.
pub session_index: SessionIndex,
/// The group, the not properly responding validators belong to.
pub group_index: GroupIndex,
/// The list of bad validators.
pub bad_validators: Vec<AuthorityDiscoveryId>,
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl SessionCache {
/// Create a new `SessionCache`.
pub fn new() -> Self {
SessionCache {
// We need to cache the current and the last session the most:
session_info_cache: LruMap::new(ByLength::new(2)),
}
}
/// Tries to retrieve `SessionInfo`.
/// If this node is not a validator, the function will return `None`.
pub async fn get_session_info<'a, Context>(
&'a mut self,
ctx: &mut Context,
runtime: &mut RuntimeInfo,
parent: Hash,
session_index: SessionIndex,
) -> Result<Option<&'a SessionInfo>> {
gum::trace!(target: LOG_TARGET, session_index, "Calling `get_session_info`");
if self.session_info_cache.get(&session_index).is_none() {
if let Some(info) =
Self::query_info_from_runtime(ctx, runtime, parent, session_index).await?
{
gum::trace!(target: LOG_TARGET, session_index, "Storing session info in lru!");
self.session_info_cache.insert(session_index, info);
} else {
return Ok(None);
}
}
Ok(self.session_info_cache.get(&session_index).map(|i| &*i))
}
/// Variant of `report_bad` that never fails, but just logs errors.
///
/// Not being able to report bad validators is not fatal, so we should not shutdown the
/// subsystem on this.
pub fn report_bad_log(&mut self, report: BadValidators) {
if let Err(err) = self.report_bad(report) {
gum::warn!(
target: LOG_TARGET,
err = ?err,
"Reporting bad validators failed with error"
);
}
}
/// Make sure we try unresponsive or misbehaving validators last.
///
/// We assume validators in a group are tried in reverse order, so the reported bad validators
/// will be put at the beginning of the group.
pub fn report_bad(&mut self, report: BadValidators) -> Result<()> {
let available_sessions = self.session_info_cache.iter().map(|(k, _)| *k).collect();
let session = self.session_info_cache.get(&report.session_index).ok_or(
Error::NoSuchCachedSession {
available_sessions,
missing_session: report.session_index,
},
)?;
let group = session.validator_groups.get_mut(report.group_index.0 as usize).expect(
"A bad validator report must contain a valid group for the reported session. qed.",
);
let bad_set = report.bad_validators.iter().collect::<HashSet<_>>();
// Get rid of bad boys:
group.retain(|v| !bad_set.contains(v));
// We are trying validators in reverse order, so bad ones should be first:
let mut new_group = report.bad_validators;
new_group.append(group);
*group = new_group;
Ok(())
}
/// Query needed information from runtime.
///
/// We need to pass in the relay parent for our call to `request_session_info`. We should
/// actually don't need that: I suppose it is used for internal caching based on relay parents,
/// which we don't use here. It should not do any harm though.
///
/// Returns: `None` if not a validator.
async fn query_info_from_runtime<Context>(
ctx: &mut Context,
runtime: &mut RuntimeInfo,
relay_parent: Hash,
session_index: SessionIndex,
) -> Result<Option<SessionInfo>> {
let info = runtime
.get_session_info_by_index(ctx.sender(), relay_parent, session_index)
.await?;
let node_features = request_node_features(relay_parent, session_index, ctx.sender())
.await
.await?
.map_err(Error::FailedNodeFeatures)?;
let discovery_keys = info.session_info.discovery_keys.clone();
let mut validator_groups = info.session_info.validator_groups.clone();
if let Some(our_index) = info.validator_info.our_index {
// Get our group index:
let our_group = info.validator_info.our_group;
// Shuffle validators in groups:
let mut rng = thread_rng();
for g in validator_groups.iter_mut() {
g.shuffle(&mut rng)
}
// Look up `AuthorityDiscoveryId`s right away:
let validator_groups: Vec<Vec<_>> = validator_groups
.into_iter()
.map(|group| {
group
.into_iter()
.map(|index| {
discovery_keys.get(index.0 as usize)
.expect("There should be a discovery key for each validator of each validator group. qed.")
.clone()
})
.collect()
})
.collect();
let info = SessionInfo {
validator_groups,
our_index,
session_index,
our_group,
node_features,
};
return Ok(Some(info));
}
return Ok(None);
}
}
@@ -0,0 +1,322 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use futures::FutureExt;
use std::future::Future;
use pezkuwi_node_network_protocol::request_response::ReqProtocolNames;
use pezkuwi_node_primitives::{BlockData, ErasureChunk, PoV};
use pezkuwi_node_subsystem_util::runtime::RuntimeInfo;
use pezkuwi_primitives::{
BlockNumber, ChunkIndex, CoreState, ExecutorParams, GroupIndex, Hash, Id as ParaId,
ScheduledCore, SessionIndex, SessionInfo,
};
use sp_core::{testing::TaskExecutor, traits::SpawnNamed};
use pezkuwi_node_subsystem::{
messages::{
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
},
ActiveLeavesUpdate, SpawnGlue,
};
use pezkuwi_node_subsystem_test_helpers::{
make_subsystem_context,
mock::{make_ferdie_keystore, new_leaf},
TestSubsystemContext, TestSubsystemContextHandle,
};
use crate::tests::{
mock::{get_valid_chunk_data, make_session_info, OccupiedCoreBuilder},
node_features_with_mapping_enabled,
};
use super::Requester;
fn get_erasure_chunk() -> ErasureChunk {
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
get_valid_chunk_data(pov, 10, ChunkIndex(0)).1
}
#[derive(Clone)]
struct TestState {
/// Simulated relay chain heads. For each block except genesis
/// there exists a single corresponding candidate, handled in [`spawn_virtual_overseer`].
pub relay_chain: Vec<Hash>,
pub session_info: SessionInfo,
// Defines a way to compute a session index for the block with
// a given number. Returns 1 for all blocks by default.
pub session_index_for_block: fn(BlockNumber) -> SessionIndex,
}
impl TestState {
fn new() -> Self {
let relay_chain: Vec<_> = (0u8..10).map(Hash::repeat_byte).collect();
let session_info = make_session_info();
let session_index_for_block = |_| 1;
Self { relay_chain, session_info, session_index_for_block }
}
}
fn spawn_virtual_overseer(
pool: TaskExecutor,
test_state: TestState,
mut ctx_handle: TestSubsystemContextHandle<AvailabilityDistributionMessage>,
) {
pool.spawn(
"virtual-overseer",
None,
async move {
loop {
let msg = ctx_handle.try_recv().await;
if msg.is_none() {
break;
}
match msg.unwrap() {
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(..)) => {},
AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk(
..,
tx,
)) => {
let chunk = get_erasure_chunk();
tx.send(Some(chunk)).expect("Receiver is expected to be alive");
},
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk {
tx,
..
}) => {
// Silently accept it.
tx.send(Ok(())).expect("Receiver is expected to be alive");
},
AllMessages::RuntimeApi(RuntimeApiMessage::Request(hash, req)) => {
match req {
RuntimeApiRequest::SessionIndexForChild(tx) => {
let chain = &test_state.relay_chain;
let block_number = chain
.iter()
.position(|h| *h == hash)
.expect("Invalid session index request");
// Compute session index.
let session_index_for_block = test_state.session_index_for_block;
tx.send(Ok(session_index_for_block(block_number as u32 + 1)))
.expect("Receiver should still be alive");
},
RuntimeApiRequest::SessionInfo(_, tx) => {
tx.send(Ok(Some(test_state.session_info.clone())))
.expect("Receiver should be alive.");
},
RuntimeApiRequest::SessionExecutorParams(_, tx) => {
tx.send(Ok(Some(ExecutorParams::default())))
.expect("Receiver should be alive.");
},
RuntimeApiRequest::NodeFeatures(_, tx) => {
tx.send(Ok(node_features_with_mapping_enabled()))
.expect("Receiver should be alive.");
},
RuntimeApiRequest::AvailabilityCores(tx) => {
let para_id = ParaId::from(1_u32);
let maybe_block_position =
test_state.relay_chain.iter().position(|h| *h == hash);
let cores = match maybe_block_position {
Some(block_num) => {
let core = if block_num == 0 {
CoreState::Scheduled(ScheduledCore {
para_id,
collator: None,
})
} else {
CoreState::Occupied(
OccupiedCoreBuilder {
group_responsible: GroupIndex(1),
para_id,
relay_parent: hash,
n_validators: 10,
chunk_index: ChunkIndex(0),
}
.build()
.0,
)
};
vec![core]
},
None => Vec::new(),
};
tx.send(Ok(cores)).expect("Receiver should be alive.")
},
_ => {
panic!("Unexpected runtime request: {:?}", req);
},
}
},
AllMessages::ChainApi(ChainApiMessage::Ancestors {
hash,
k,
response_channel,
}) => {
let chain = &test_state.relay_chain;
let maybe_block_position = chain.iter().position(|h| *h == hash);
let ancestors = maybe_block_position
.map(|idx| chain[..idx].iter().rev().take(k).copied().collect())
.unwrap_or_default();
response_channel
.send(Ok(ancestors))
.expect("Receiver is expected to be alive");
},
msg => panic!("Unexpected overseer message: {:?}", msg),
}
}
}
.boxed(),
);
}
fn test_harness<T: Future<Output = ()>>(
test_state: TestState,
test_fx: impl FnOnce(
TestSubsystemContext<AvailabilityDistributionMessage, SpawnGlue<TaskExecutor>>,
) -> T,
) {
let pool = TaskExecutor::new();
let (ctx, ctx_handle) = make_subsystem_context(pool.clone());
spawn_virtual_overseer(pool, test_state, ctx_handle);
futures::executor::block_on(test_fx(ctx));
}
#[test]
fn check_ancestry_lookup_in_same_session() {
let test_state = TestState::new();
let mut requester =
Requester::new(ReqProtocolNames::new(&Hash::repeat_byte(0xff), None), Default::default());
let keystore = make_ferdie_keystore();
let mut runtime = RuntimeInfo::new(Some(keystore));
test_harness(test_state.clone(), |mut ctx| async move {
let chain = &test_state.relay_chain;
let block_number = 1;
let update = ActiveLeavesUpdate {
activated: Some(new_leaf(chain[block_number], block_number as u32)),
deactivated: Vec::new().into(),
};
requester
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await
.expect("Leaf processing failed");
let fetch_tasks = &requester.fetches;
assert_eq!(fetch_tasks.len(), 1);
let block_1_candidate =
*fetch_tasks.keys().next().expect("A task is checked to be present; qed");
let block_number = 2;
let update = ActiveLeavesUpdate {
activated: Some(new_leaf(chain[block_number], block_number as u32)),
deactivated: Vec::new().into(),
};
requester
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await
.expect("Leaf processing failed");
let fetch_tasks = &requester.fetches;
assert_eq!(fetch_tasks.len(), 2);
let task = fetch_tasks.get(&block_1_candidate).expect("Leaf hasn't been deactivated yet");
// The task should be live in both blocks 1 and 2.
assert_eq!(task.live_in.len(), 2);
let block_2_candidate = *fetch_tasks
.keys()
.find(|hash| **hash != block_1_candidate)
.expect("Two tasks are present, the first one corresponds to block 1 candidate; qed");
// Deactivate both blocks but keep the second task as a
// part of ancestry.
let block_number = 2 + Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION;
let update = ActiveLeavesUpdate {
activated: Some(new_leaf(chain[block_number], block_number as u32)),
deactivated: vec![chain[1], chain[2]].into(),
};
requester
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await
.expect("Leaf processing failed");
let fetch_tasks = &requester.fetches;
// The leaf + K its ancestors.
assert_eq!(fetch_tasks.len(), Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION + 1);
let block_2_task = fetch_tasks
.get(&block_2_candidate)
.expect("Expected to be live as a part of ancestry");
assert_eq!(block_2_task.live_in.len(), 1);
});
}
#[test]
fn check_ancestry_lookup_in_different_sessions() {
let mut test_state = TestState::new();
let mut requester =
Requester::new(ReqProtocolNames::new(&Hash::repeat_byte(0xff), None), Default::default());
let keystore = make_ferdie_keystore();
let mut runtime = RuntimeInfo::new(Some(keystore));
test_state.session_index_for_block = |block_number| match block_number {
0..=3 => 1,
_ => 2,
};
test_harness(test_state.clone(), |mut ctx| async move {
let chain = &test_state.relay_chain;
let block_number = 3;
let update = ActiveLeavesUpdate {
activated: Some(new_leaf(chain[block_number], block_number as u32)),
deactivated: Vec::new().into(),
};
requester
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await
.expect("Leaf processing failed");
let fetch_tasks = &requester.fetches;
assert_eq!(fetch_tasks.len(), 3.min(Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION + 1));
let block_number = 4;
let update = ActiveLeavesUpdate {
activated: Some(new_leaf(chain[block_number], block_number as u32)),
deactivated: vec![chain[1], chain[2], chain[3]].into(),
};
requester
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await
.expect("Leaf processing failed");
let fetch_tasks = &requester.fetches;
assert_eq!(fetch_tasks.len(), 1);
let block_number = 5;
let update = ActiveLeavesUpdate {
activated: Some(new_leaf(chain[block_number], block_number as u32)),
deactivated: vec![chain[4]].into(),
};
requester
.update_fetching_heads(&mut ctx, &mut runtime, update)
.await
.expect("Leaf processing failed");
let fetch_tasks = &requester.fetches;
assert_eq!(fetch_tasks.len(), 2.min(Requester::LEAF_ANCESTRY_LEN_WITHIN_SESSION + 1));
});
}
@@ -0,0 +1,296 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
//! Answer requests for availability chunks.
use std::sync::Arc;
use futures::{channel::oneshot, select, FutureExt};
use codec::{Decode, Encode};
use fatality::Nested;
use pezkuwi_node_network_protocol::{
request_response::{v1, v2, IncomingRequest, IncomingRequestReceiver, IsRequest},
UnifiedReputationChange as Rep,
};
use pezkuwi_node_primitives::{AvailableData, ErasureChunk};
use pezkuwi_node_subsystem::{messages::AvailabilityStoreMessage, SubsystemSender};
use pezkuwi_primitives::{CandidateHash, ValidatorIndex};
use crate::{
error::{JfyiError, Result},
metrics::{Metrics, FAILED, NOT_FOUND, SUCCEEDED},
LOG_TARGET,
};
const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded.");
/// Receiver task to be forked as a separate task to handle PoV requests.
pub async fn run_pov_receiver<Sender>(
mut sender: Sender,
mut receiver: IncomingRequestReceiver<v1::PoVFetchingRequest>,
metrics: Metrics,
) where
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
loop {
match receiver.recv(|| vec![COST_INVALID_REQUEST]).await.into_nested() {
Ok(Ok(msg)) => {
answer_pov_request_log(&mut sender, msg, &metrics).await;
},
Err(fatal) => {
gum::debug!(
target: LOG_TARGET,
error = ?fatal,
"Shutting down POV receiver."
);
return;
},
Ok(Err(jfyi)) => {
gum::debug!(target: LOG_TARGET, error = ?jfyi, "Error decoding incoming PoV request.");
},
}
}
}
/// Receiver task to be forked as a separate task to handle chunk requests.
pub async fn run_chunk_receivers<Sender>(
mut sender: Sender,
mut receiver_v1: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
mut receiver_v2: IncomingRequestReceiver<v2::ChunkFetchingRequest>,
metrics: Metrics,
) where
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let make_resp_v1 = |chunk: Option<ErasureChunk>| match chunk {
None => v1::ChunkFetchingResponse::NoSuchChunk,
Some(chunk) => v1::ChunkFetchingResponse::Chunk(chunk.into()),
};
let make_resp_v2 = |chunk: Option<ErasureChunk>| match chunk {
None => v2::ChunkFetchingResponse::NoSuchChunk,
Some(chunk) => v2::ChunkFetchingResponse::Chunk(chunk.into()),
};
loop {
select! {
res = receiver_v1.recv(|| vec![COST_INVALID_REQUEST]).fuse() => match res.into_nested() {
Ok(Ok(msg)) => {
answer_chunk_request_log(&mut sender, msg, make_resp_v1, &metrics).await;
},
Err(fatal) => {
gum::debug!(
target: LOG_TARGET,
error = ?fatal,
"Shutting down chunk receiver."
);
return
},
Ok(Err(jfyi)) => {
gum::debug!(
target: LOG_TARGET,
error = ?jfyi,
"Error decoding incoming chunk request."
);
}
},
res = receiver_v2.recv(|| vec![COST_INVALID_REQUEST]).fuse() => match res.into_nested() {
Ok(Ok(msg)) => {
answer_chunk_request_log(&mut sender, msg.into(), make_resp_v2, &metrics).await;
},
Err(fatal) => {
gum::debug!(
target: LOG_TARGET,
error = ?fatal,
"Shutting down chunk receiver."
);
return
},
Ok(Err(jfyi)) => {
gum::debug!(
target: LOG_TARGET,
error = ?jfyi,
"Error decoding incoming chunk request."
);
}
}
}
}
}
/// Variant of `answer_pov_request` that does Prometheus metric and logging on errors.
///
/// Any errors of `answer_pov_request` will simply be logged.
pub async fn answer_pov_request_log<Sender>(
sender: &mut Sender,
req: IncomingRequest<v1::PoVFetchingRequest>,
metrics: &Metrics,
) where
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let res = answer_pov_request(sender, req).await;
match res {
Ok(result) => metrics.on_served_pov(if result { SUCCEEDED } else { NOT_FOUND }),
Err(err) => {
gum::warn!(
target: LOG_TARGET,
err= ?err,
"Serving PoV failed with error"
);
metrics.on_served_pov(FAILED);
},
}
}
/// Variant of `answer_chunk_request` that does Prometheus metric and logging on errors.
///
/// Any errors of `answer_request` will simply be logged.
pub async fn answer_chunk_request_log<Sender, Req, MakeResp>(
sender: &mut Sender,
req: IncomingRequest<Req>,
make_response: MakeResp,
metrics: &Metrics,
) where
Req: IsRequest + Decode + Encode + Into<v1::ChunkFetchingRequest>,
Req::Response: Encode,
Sender: SubsystemSender<AvailabilityStoreMessage>,
MakeResp: Fn(Option<ErasureChunk>) -> Req::Response,
{
let res = answer_chunk_request(sender, req, make_response).await;
match res {
Ok(result) => metrics.on_served_chunk(if result { SUCCEEDED } else { NOT_FOUND }),
Err(err) => {
gum::warn!(
target: LOG_TARGET,
err= ?err,
"Serving chunk failed with error"
);
metrics.on_served_chunk(FAILED);
},
}
}
/// Answer an incoming PoV fetch request by querying the av store.
///
/// Returns: `Ok(true)` if chunk was found and served.
pub async fn answer_pov_request<Sender>(
sender: &mut Sender,
req: IncomingRequest<v1::PoVFetchingRequest>,
) -> Result<bool>
where
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let av_data = query_available_data(sender, req.payload.candidate_hash).await?;
let result = av_data.is_some();
let response = match av_data {
None => v1::PoVFetchingResponse::NoSuchPoV,
Some(av_data) => {
let pov = Arc::try_unwrap(av_data.pov).unwrap_or_else(|a| (&*a).clone());
v1::PoVFetchingResponse::PoV(pov)
},
};
req.send_response(response).map_err(|_| JfyiError::SendResponse)?;
Ok(result)
}
/// Answer an incoming chunk request by querying the av store.
///
/// Returns: `Ok(true)` if chunk was found and served.
pub async fn answer_chunk_request<Sender, Req, MakeResp>(
sender: &mut Sender,
req: IncomingRequest<Req>,
make_response: MakeResp,
) -> Result<bool>
where
Sender: SubsystemSender<AvailabilityStoreMessage>,
Req: IsRequest + Decode + Encode + Into<v1::ChunkFetchingRequest>,
Req::Response: Encode,
MakeResp: Fn(Option<ErasureChunk>) -> Req::Response,
{
// V1 and V2 requests have the same payload, so decoding into either one will work. It's the
// responses that differ, hence the `MakeResp` generic.
let payload: v1::ChunkFetchingRequest = req.payload.into();
let chunk = query_chunk(sender, payload.candidate_hash, payload.index).await?;
let result = chunk.is_some();
gum::trace!(
target: LOG_TARGET,
hash = ?payload.candidate_hash,
index = ?payload.index,
peer = ?req.peer,
has_data = ?chunk.is_some(),
"Serving chunk",
);
let response = make_response(chunk);
req.pending_response
.send_response(response)
.map_err(|_| JfyiError::SendResponse)?;
Ok(result)
}
/// Query chunk from the availability store.
async fn query_chunk<Sender>(
sender: &mut Sender,
candidate_hash: CandidateHash,
validator_index: ValidatorIndex,
) -> std::result::Result<Option<ErasureChunk>, JfyiError>
where
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let (tx, rx) = oneshot::channel();
sender
.send_message(
AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx).into(),
)
.await;
let result = rx.await.map_err(|e| {
gum::trace!(
target: LOG_TARGET,
?validator_index,
?candidate_hash,
error = ?e,
"Error retrieving chunk",
);
JfyiError::QueryChunkResponseChannel(e)
})?;
Ok(result)
}
/// Query PoV from the availability store.
async fn query_available_data<Sender>(
sender: &mut Sender,
candidate_hash: CandidateHash,
) -> Result<Option<AvailableData>>
where
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let (tx, rx) = oneshot::channel();
sender
.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx).into())
.await;
let result = rx.await.map_err(JfyiError::QueryAvailableDataResponseChannel)?;
Ok(result)
}
@@ -0,0 +1,166 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
//! Helper functions and tools to generate mock data useful for testing this subsystem.
use std::sync::Arc;
use sp_keyring::Sr25519Keyring;
use pezkuwi_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
use pezkuwi_node_primitives::{AvailableData, BlockData, ErasureChunk, PoV, Proof};
use pezkuwi_primitives::{
CandidateCommitments, CandidateHash, ChunkIndex, CommittedCandidateReceiptV2, GroupIndex, Hash,
HeadData, Id as ParaId, IndexedVec, OccupiedCore, PersistedValidationData, SessionInfo,
ValidatorIndex,
};
use pezkuwi_primitives_test_helpers::{
dummy_collator, dummy_collator_signature, dummy_hash, dummy_validation_code,
CandidateDescriptor, CommittedCandidateReceipt,
};
/// Create dummy session info with two validator groups.
pub fn make_session_info() -> SessionInfo {
let validators = vec![
Sr25519Keyring::Ferdie, // <- this node, role: validator
Sr25519Keyring::Alice,
Sr25519Keyring::Bob,
Sr25519Keyring::Charlie,
Sr25519Keyring::Dave,
Sr25519Keyring::Eve,
Sr25519Keyring::One,
];
let validator_groups: IndexedVec<GroupIndex, Vec<ValidatorIndex>> =
[vec![5, 0, 3], vec![1, 6, 2, 4]]
.iter()
.map(|g| g.into_iter().map(|v| ValidatorIndex(*v)).collect())
.collect();
SessionInfo {
discovery_keys: validators.iter().map(|k| k.public().into()).collect(),
// Not used:
n_cores: validator_groups.len() as u32,
validator_groups,
// Not used values:
validators: validators.iter().map(|k| k.public().into()).collect(),
assignment_keys: Vec::new(),
zeroth_delay_tranche_width: 0,
relay_vrf_modulo_samples: 0,
n_delay_tranches: 0,
no_show_slots: 0,
needed_approvals: 0,
active_validator_indices: Vec::new(),
dispute_period: 6,
random_seed: [0u8; 32],
}
}
/// Builder for constructing occupied cores.
///
/// Takes all the values we care about and fills the rest with dummy values on `build`.
pub struct OccupiedCoreBuilder {
pub group_responsible: GroupIndex,
pub para_id: ParaId,
pub relay_parent: Hash,
pub n_validators: usize,
pub chunk_index: ChunkIndex,
}
impl OccupiedCoreBuilder {
pub fn build(self) -> (OccupiedCore, (CandidateHash, ErasureChunk)) {
let pov = PoV { block_data: BlockData(vec![45, 46, 47]) };
let pov_hash = pov.hash();
let (erasure_root, chunk) =
get_valid_chunk_data(pov.clone(), self.n_validators, self.chunk_index);
let candidate_receipt = TestCandidateBuilder {
para_id: self.para_id,
pov_hash,
relay_parent: self.relay_parent,
erasure_root,
..Default::default()
}
.build();
let core = OccupiedCore {
next_up_on_available: None,
occupied_since: 0,
time_out_at: 0,
next_up_on_time_out: None,
availability: Default::default(),
group_responsible: self.group_responsible,
candidate_hash: candidate_receipt.hash(),
candidate_descriptor: candidate_receipt.descriptor.clone(),
};
(core, (candidate_receipt.hash(), chunk))
}
}
#[derive(Default)]
pub struct TestCandidateBuilder {
para_id: ParaId,
head_data: HeadData,
pov_hash: Hash,
relay_parent: Hash,
erasure_root: Hash,
}
impl TestCandidateBuilder {
pub fn build(self) -> CommittedCandidateReceiptV2 {
CommittedCandidateReceipt {
descriptor: CandidateDescriptor {
para_id: self.para_id,
pov_hash: self.pov_hash,
relay_parent: self.relay_parent,
erasure_root: self.erasure_root,
collator: dummy_collator(),
persisted_validation_data_hash: dummy_hash(),
signature: dummy_collator_signature(),
para_head: dummy_hash(),
validation_code_hash: dummy_validation_code().hash(),
},
commitments: CandidateCommitments { head_data: self.head_data, ..Default::default() },
}
.into()
}
}
// Get chunk for index 0
pub fn get_valid_chunk_data(
pov: PoV,
n_validators: usize,
chunk_index: ChunkIndex,
) -> (Hash, ErasureChunk) {
let persisted = PersistedValidationData {
parent_head: HeadData(vec![7, 8, 9]),
relay_parent_number: Default::default(),
max_pov_size: 1024,
relay_parent_storage_root: Default::default(),
};
let available_data = AvailableData { validation_data: persisted, pov: Arc::new(pov) };
let chunks = obtain_chunks(n_validators, &available_data).unwrap();
let branches = branches(chunks.as_ref());
let root = branches.root();
let chunk = branches
.enumerate()
.map(|(index, (proof, chunk))| ErasureChunk {
chunk: chunk.to_vec(),
index: ChunkIndex(index as _),
proof: Proof::try_from(proof).unwrap(),
})
.nth(chunk_index.0 as usize)
.expect("There really should be enough chunks.");
(root, chunk)
}
@@ -0,0 +1,201 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use std::collections::HashSet;
use futures::{executor, future, Future};
use rstest::rstest;
use pezkuwi_node_network_protocol::request_response::{
IncomingRequest, Protocol, ReqProtocolNames,
};
use pezkuwi_primitives::{node_features, Block, CoreState, Hash, NodeFeatures};
use sp_keystore::KeystorePtr;
use super::*;
mod state;
/// State for test harnesses.
use state::{TestHarness, TestState};
/// Mock data useful for testing.
pub(crate) mod mock;
fn test_harness<T: Future<Output = ()>>(
keystore: KeystorePtr,
req_protocol_names: ReqProtocolNames,
test_fx: impl FnOnce(TestHarness) -> T,
) -> std::result::Result<(), FatalError> {
sp_tracing::init_for_tests();
let pool = sp_core::testing::TaskExecutor::new();
let (context, virtual_overseer) =
pezkuwi_node_subsystem_test_helpers::make_subsystem_context(pool.clone());
let (pov_req_receiver, _pov_req_cfg) = IncomingRequest::get_config_receiver::<
Block,
sc_network::NetworkWorker<Block, Hash>,
>(&req_protocol_names);
let (chunk_req_v1_receiver, chunk_req_v1_cfg) = IncomingRequest::get_config_receiver::<
Block,
sc_network::NetworkWorker<Block, Hash>,
>(&req_protocol_names);
let (chunk_req_v2_receiver, chunk_req_v2_cfg) = IncomingRequest::get_config_receiver::<
Block,
sc_network::NetworkWorker<Block, Hash>,
>(&req_protocol_names);
let subsystem = AvailabilityDistributionSubsystem::new(
keystore,
IncomingRequestReceivers { pov_req_receiver, chunk_req_v1_receiver, chunk_req_v2_receiver },
req_protocol_names,
Default::default(),
);
let subsystem = subsystem.run(context);
let test_fut =
test_fx(TestHarness { virtual_overseer, chunk_req_v1_cfg, chunk_req_v2_cfg, pool });
futures::pin_mut!(test_fut);
futures::pin_mut!(subsystem);
executor::block_on(future::join(test_fut, subsystem)).1
}
pub fn node_features_with_mapping_enabled() -> NodeFeatures {
let mut node_features = NodeFeatures::new();
node_features.resize(node_features::FeatureIndex::AvailabilityChunkMapping as usize + 1, false);
node_features.set(node_features::FeatureIndex::AvailabilityChunkMapping as u8 as usize, true);
node_features
}
/// Simple basic check, whether the subsystem works as expected.
///
/// Exceptional cases are tested as unit tests in `fetch_task`.
#[rstest]
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV1)]
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV2)]
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV1)]
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV2)]
fn check_basic(#[case] node_features: NodeFeatures, #[case] chunk_resp_protocol: Protocol) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
let state =
TestState::new(node_features.clone(), req_protocol_names.clone(), chunk_resp_protocol);
if node_features == node_features_with_mapping_enabled() &&
chunk_resp_protocol == Protocol::ChunkFetchingV1
{
// For this specific case, chunk fetching is not possible, because the ValidatorIndex is not
// equal to the ChunkIndex and the peer does not send back the actual ChunkIndex.
let _ = test_harness(state.keystore.clone(), req_protocol_names, move |harness| {
state.run_assert_timeout(harness)
});
} else {
test_harness(state.keystore.clone(), req_protocol_names, move |harness| state.run(harness))
.unwrap();
}
}
/// Check whether requester tries all validators in group.
#[rstest]
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV1)]
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV2)]
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV1)]
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV2)]
fn check_fetch_tries_all(
#[case] node_features: NodeFeatures,
#[case] chunk_resp_protocol: Protocol,
) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
let mut state =
TestState::new(node_features.clone(), req_protocol_names.clone(), chunk_resp_protocol);
for (_, v) in state.chunks.iter_mut() {
// 4 validators in group, so this should still succeed:
v.push(None);
v.push(None);
v.push(None);
}
if node_features == node_features_with_mapping_enabled() &&
chunk_resp_protocol == Protocol::ChunkFetchingV1
{
// For this specific case, chunk fetching is not possible, because the ValidatorIndex is not
// equal to the ChunkIndex and the peer does not send back the actual ChunkIndex.
let _ = test_harness(state.keystore.clone(), req_protocol_names, move |harness| {
state.run_assert_timeout(harness)
});
} else {
test_harness(state.keystore.clone(), req_protocol_names, move |harness| state.run(harness))
.unwrap();
}
}
/// Check whether requester tries all validators in group
///
/// Check that requester will retry the fetch on error on the next block still pending
/// availability.
#[rstest]
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV1)]
#[case(NodeFeatures::EMPTY, Protocol::ChunkFetchingV2)]
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV1)]
#[case(node_features_with_mapping_enabled(), Protocol::ChunkFetchingV2)]
fn check_fetch_retry(#[case] node_features: NodeFeatures, #[case] chunk_resp_protocol: Protocol) {
let req_protocol_names = ReqProtocolNames::new(&Hash::repeat_byte(0xff), None);
let mut state =
TestState::new(node_features.clone(), req_protocol_names.clone(), chunk_resp_protocol);
state
.cores
.insert(state.relay_chain[2], state.cores.get(&state.relay_chain[1]).unwrap().clone());
// We only care about the first three blocks.
// 1. scheduled
// 2. occupied
// 3. still occupied
state.relay_chain.truncate(3);
// Get rid of unused valid chunks:
let valid_candidate_hashes: HashSet<_> = state
.cores
.get(&state.relay_chain[1])
.iter()
.flat_map(|v| v.iter())
.filter_map(|c| match c {
CoreState::Occupied(core) => Some(core.candidate_hash),
_ => None,
})
.collect();
state.valid_chunks.retain(|(ch, _)| valid_candidate_hashes.contains(ch));
for (_, v) in state.chunks.iter_mut() {
// This should still succeed as cores are still pending availability on next block.
v.push(None);
v.push(None);
v.push(None);
v.push(None);
v.push(None);
}
if node_features == node_features_with_mapping_enabled() &&
chunk_resp_protocol == Protocol::ChunkFetchingV1
{
// For this specific case, chunk fetching is not possible, because the ValidatorIndex is not
// equal to the ChunkIndex and the peer does not send back the actual ChunkIndex.
let _ = test_harness(state.keystore.clone(), req_protocol_names, move |harness| {
state.run_assert_timeout(harness)
});
} else {
test_harness(state.keystore.clone(), req_protocol_names, move |harness| state.run(harness))
.unwrap();
}
}
@@ -0,0 +1,450 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
use std::{
collections::{HashMap, HashSet},
time::Duration,
};
use network::{request_responses::OutgoingResponse, ProtocolName, RequestFailure};
use pezkuwi_node_subsystem_test_helpers::TestSubsystemContextHandle;
use pezkuwi_node_subsystem_util::{availability_chunks::availability_chunk_index, TimeoutExt};
use futures::{
channel::{mpsc, oneshot},
FutureExt, SinkExt, StreamExt,
};
use futures_timer::Delay;
use sc_network as network;
use sc_network::{config as netconfig, config::RequestResponseConfig, IfDisconnected};
use sp_core::{testing::TaskExecutor, traits::SpawnNamed};
use sp_keystore::KeystorePtr;
use pezkuwi_node_network_protocol::request_response::{
v1, v2, IncomingRequest, OutgoingRequest, Protocol, ReqProtocolNames, Requests,
};
use pezkuwi_node_primitives::ErasureChunk;
use pezkuwi_node_subsystem::{
messages::{
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage, ChainApiMessage,
NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest,
},
ActiveLeavesUpdate, FromOrchestra, OverseerSignal,
};
use pezkuwi_node_subsystem_test_helpers as test_helpers;
use pezkuwi_primitives::{
CandidateHash, ChunkIndex, CoreIndex, CoreState, ExecutorParams, GroupIndex, Hash,
Id as ParaId, NodeFeatures, ScheduledCore, SessionInfo, ValidatorIndex,
};
use test_helpers::mock::{make_ferdie_keystore, new_leaf};
use super::mock::{make_session_info, OccupiedCoreBuilder};
use crate::LOG_TARGET;
type VirtualOverseer = pezkuwi_node_subsystem_test_helpers::TestSubsystemContextHandle<
AvailabilityDistributionMessage,
>;
pub struct TestHarness {
pub virtual_overseer: VirtualOverseer,
pub chunk_req_v1_cfg: RequestResponseConfig,
pub chunk_req_v2_cfg: RequestResponseConfig,
pub pool: TaskExecutor,
}
/// `TestState` for mocking execution of this subsystem.
///
/// The `Default` instance provides data, which makes the system succeed by providing a couple of
/// valid occupied cores. You can tune the data before calling `TestState::run`. E.g. modify some
/// chunks to be invalid, the test will then still pass if you remove that chunk from
/// `valid_chunks`.
#[derive(Clone)]
pub struct TestState {
/// Simulated relay chain heads:
pub relay_chain: Vec<Hash>,
/// Whenever the subsystem tries to fetch an erasure chunk one item of the given vec will be
/// popped. So you can experiment with serving invalid chunks or no chunks on request and see
/// whether the subsystem still succeeds with its goal.
pub chunks: HashMap<(CandidateHash, ValidatorIndex), Vec<Option<ErasureChunk>>>,
/// All chunks that are valid and should be accepted.
pub valid_chunks: HashSet<(CandidateHash, ValidatorIndex)>,
pub session_info: SessionInfo,
/// Cores per relay chain block.
pub cores: HashMap<Hash, Vec<CoreState>>,
pub keystore: KeystorePtr,
pub node_features: NodeFeatures,
pub chunk_response_protocol: Protocol,
pub req_protocol_names: ReqProtocolNames,
pub our_chunk_index: ChunkIndex,
}
impl TestState {
/// Initialize a default test state.
pub fn new(
node_features: NodeFeatures,
req_protocol_names: ReqProtocolNames,
chunk_response_protocol: Protocol,
) -> Self {
let relay_chain: Vec<_> = (1u8..10).map(Hash::repeat_byte).collect();
let chain_a = ParaId::from(1);
let chain_b = ParaId::from(2);
let chain_ids = vec![chain_a, chain_b];
let keystore = make_ferdie_keystore();
let session_info = make_session_info();
let our_chunk_index = availability_chunk_index(
&node_features,
session_info.validators.len(),
CoreIndex(1),
ValidatorIndex(0),
)
.unwrap();
let (cores, chunks) = {
let mut cores = HashMap::new();
let mut chunks = HashMap::new();
cores.insert(
relay_chain[0],
vec![
CoreState::Scheduled(ScheduledCore { para_id: chain_ids[0], collator: None }),
CoreState::Scheduled(ScheduledCore { para_id: chain_ids[1], collator: None }),
],
);
let heads = {
let mut advanced = relay_chain.iter();
advanced.next();
relay_chain.iter().zip(advanced)
};
for (relay_parent, relay_child) in heads {
let (p_cores, p_chunks): (Vec<_>, Vec<_>) = chain_ids
.iter()
.enumerate()
.map(|(i, para_id)| {
let (core, chunk) = OccupiedCoreBuilder {
group_responsible: GroupIndex(i as _),
para_id: *para_id,
relay_parent: *relay_parent,
n_validators: session_info.validators.len(),
chunk_index: our_chunk_index,
}
.build();
(CoreState::Occupied(core), chunk)
})
.unzip();
cores.insert(*relay_child, p_cores);
// Skip chunks for our own group (won't get fetched):
let mut chunks_other_groups = p_chunks.into_iter();
chunks_other_groups.next();
for (candidate, chunk) in chunks_other_groups {
chunks.insert((candidate, ValidatorIndex(0)), vec![Some(chunk)]);
}
}
(cores, chunks)
};
Self {
relay_chain,
valid_chunks: chunks.clone().keys().map(Clone::clone).collect(),
chunks,
session_info,
cores,
keystore,
node_features,
chunk_response_protocol,
req_protocol_names,
our_chunk_index,
}
}
/// Run, but fail after some timeout.
pub async fn run(self, harness: TestHarness) {
// Make sure test won't run forever.
let f = self.run_inner(harness).timeout(Duration::from_secs(5));
assert!(f.await.is_some(), "Test ran into timeout");
}
/// Run, and assert an expected timeout.
pub async fn run_assert_timeout(self, harness: TestHarness) {
// Make sure test won't run forever.
let f = self.run_inner(harness).timeout(Duration::from_secs(5));
assert!(f.await.is_none(), "Test should have run into timeout");
}
/// Run tests with the given mock values in `TestState`.
///
/// This will simply advance through the simulated chain and examines whether the subsystem
/// behaves as expected: It will succeed if all valid chunks of other backing groups get stored
/// and no other.
///
/// We try to be as agnostic about details as possible, how the subsystem achieves those goals
/// should not be a matter to this test suite.
async fn run_inner(mut self, mut harness: TestHarness) {
// We skip genesis here (in reality ActiveLeavesUpdate can also skip a block):
let updates = {
let mut advanced = self.relay_chain.iter();
advanced.next();
self.relay_chain
.iter()
.zip(advanced)
.map(|(old, new)| ActiveLeavesUpdate {
activated: Some(new_leaf(*new, 1)),
deactivated: vec![*old].into(),
})
.collect::<Vec<_>>()
};
// We should be storing all valid chunks during execution:
//
// Test will fail if this does not happen until timeout.
let mut remaining_stores = self.valid_chunks.len();
let TestSubsystemContextHandle { tx, mut rx, .. } = harness.virtual_overseer;
// Spawning necessary as incoming queue can only hold a single item, we don't want to dead
// lock ;-)
let update_tx = tx.clone();
harness.pool.spawn(
"sending-active-leaves-updates",
None,
async move {
for update in updates {
overseer_signal(update_tx.clone(), OverseerSignal::ActiveLeaves(update)).await;
// We need to give the subsystem a little time to do its job, otherwise it will
// cancel jobs as obsolete:
Delay::new(Duration::from_millis(100)).await;
}
}
.boxed(),
);
while remaining_stores > 0 {
gum::trace!(target: LOG_TARGET, remaining_stores, "Stores left to go");
let msg = overseer_recv(&mut rx).await;
match msg {
AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendRequests(
reqs,
IfDisconnected::ImmediateError,
)) => {
for req in reqs {
// Forward requests:
match self.chunk_response_protocol {
Protocol::ChunkFetchingV1 => {
let in_req = to_incoming_req_v1(
&harness.pool,
req,
self.req_protocol_names.get_name(Protocol::ChunkFetchingV1),
);
harness
.chunk_req_v1_cfg
.inbound_queue
.as_mut()
.unwrap()
.send(in_req.into_raw())
.await
.unwrap();
},
Protocol::ChunkFetchingV2 => {
let in_req = to_incoming_req_v2(
&harness.pool,
req,
self.req_protocol_names.get_name(Protocol::ChunkFetchingV2),
);
harness
.chunk_req_v2_cfg
.inbound_queue
.as_mut()
.unwrap()
.send(in_req.into_raw())
.await
.unwrap();
},
_ => panic!("Unexpected protocol"),
}
}
},
AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk(
candidate_hash,
validator_index,
tx,
)) => {
let chunk = self
.chunks
.get_mut(&(candidate_hash, validator_index))
.and_then(Vec::pop)
.flatten();
tx.send(chunk).expect("Receiver is expected to be alive");
},
AllMessages::AvailabilityStore(AvailabilityStoreMessage::StoreChunk {
candidate_hash,
chunk,
validator_index,
tx,
..
}) => {
assert!(
self.valid_chunks.contains(&(candidate_hash, validator_index)),
"Only valid chunks should ever get stored."
);
assert_eq!(self.our_chunk_index, chunk.index);
tx.send(Ok(())).expect("Receiver is expected to be alive");
gum::trace!(target: LOG_TARGET, "'Stored' fetched chunk.");
remaining_stores -= 1;
},
AllMessages::RuntimeApi(RuntimeApiMessage::Request(hash, req)) => {
match req {
RuntimeApiRequest::SessionIndexForChild(tx) => {
// Always session index 1 for now:
tx.send(Ok(1)).expect("Receiver should still be alive");
},
RuntimeApiRequest::SessionInfo(_, tx) => {
tx.send(Ok(Some(self.session_info.clone())))
.expect("Receiver should be alive.");
},
RuntimeApiRequest::SessionExecutorParams(_, tx) => {
tx.send(Ok(Some(ExecutorParams::default())))
.expect("Receiver should be alive.");
},
RuntimeApiRequest::AvailabilityCores(tx) => {
gum::trace!(target: LOG_TARGET, cores= ?self.cores[&hash], hash = ?hash, "Sending out cores for hash");
tx.send(Ok(self.cores[&hash].clone()))
.expect("Receiver should still be alive");
},
RuntimeApiRequest::NodeFeatures(_, tx) => {
tx.send(Ok(self.node_features.clone()))
.expect("Receiver should still be alive");
},
_ => {
panic!("Unexpected runtime request: {:?}", req);
},
}
},
AllMessages::ChainApi(ChainApiMessage::Ancestors { hash, k, response_channel }) => {
let chain = &self.relay_chain;
let maybe_block_position = chain.iter().position(|h| *h == hash);
let ancestors = maybe_block_position
.map(|idx| chain[..idx].iter().rev().take(k).copied().collect())
.unwrap_or_default();
response_channel.send(Ok(ancestors)).expect("Receiver is expected to be alive");
},
_ => {
panic!("Received unexpected message")
},
}
}
overseer_signal(tx, OverseerSignal::Conclude).await;
}
}
async fn overseer_signal(
mut tx: mpsc::Sender<FromOrchestra<AvailabilityDistributionMessage>>,
msg: impl Into<OverseerSignal>,
) {
let msg = msg.into();
gum::trace!(target: LOG_TARGET, msg = ?msg, "sending message");
tx.send(FromOrchestra::Signal(msg))
.await
.expect("Test subsystem no longer live");
}
async fn overseer_recv(rx: &mut mpsc::UnboundedReceiver<AllMessages>) -> AllMessages {
gum::trace!(target: LOG_TARGET, "waiting for message ...");
rx.next().await.expect("Test subsystem no longer live")
}
fn to_incoming_req_v1(
executor: &TaskExecutor,
outgoing: Requests,
protocol_name: ProtocolName,
) -> IncomingRequest<v1::ChunkFetchingRequest> {
match outgoing {
Requests::ChunkFetching(OutgoingRequest {
pending_response,
fallback_request: Some((fallback_request, fallback_protocol)),
..
}) => {
assert_eq!(fallback_protocol, Protocol::ChunkFetchingV1);
let tx = spawn_message_forwarding(executor, protocol_name, pending_response);
IncomingRequest::new(
// We don't really care:
network::PeerId::random().into(),
fallback_request,
tx,
)
},
_ => panic!("Unexpected request!"),
}
}
fn to_incoming_req_v2(
executor: &TaskExecutor,
outgoing: Requests,
protocol_name: ProtocolName,
) -> IncomingRequest<v2::ChunkFetchingRequest> {
match outgoing {
Requests::ChunkFetching(OutgoingRequest {
payload,
pending_response,
fallback_request: Some((_, fallback_protocol)),
..
}) => {
assert_eq!(fallback_protocol, Protocol::ChunkFetchingV1);
let tx = spawn_message_forwarding(executor, protocol_name, pending_response);
IncomingRequest::new(
// We don't really care:
network::PeerId::random().into(),
payload,
tx,
)
},
_ => panic!("Unexpected request!"),
}
}
fn spawn_message_forwarding(
executor: &TaskExecutor,
protocol_name: ProtocolName,
pending_response: oneshot::Sender<Result<(Vec<u8>, ProtocolName), RequestFailure>>,
) -> oneshot::Sender<OutgoingResponse> {
let (tx, rx): (oneshot::Sender<netconfig::OutgoingResponse>, oneshot::Receiver<_>) =
oneshot::channel();
executor.spawn(
"message-forwarding",
None,
async {
let response = rx.await;
let payload = response.expect("Unexpected canceled request").result;
pending_response
.send(payload.map_err(|_| RequestFailure::Refused).map(|r| (r, protocol_name)))
.expect("Sending response is expected to work");
}
.boxed(),
);
tx
}