mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-23 14:21:11 +00:00
Request based PoV distribution (#2640)
* Indentation fix. * Prepare request-response for PoV fetching. * Drop old PoV distribution. * WIP: Fetch PoV directly from backing. * Backing compiles. * Runtime access and connection management for PoV distribution. * Get rid of seemingly dead code. * Implement PoV fetching. Backing does not yet use it. * Don't send `ConnectToValidators` for empty list. * Even better - no need to check over and over again. * PoV fetching implemented. + Typechecks + Should work Missing: - Guide - Tests - Do fallback fetching in case fetching from seconding validator fails. * Check PoV hash upon reception. * Implement retry of PoV fetching in backing. * Avoid pointless validation spawning. * Add jaeger span to pov requesting. * Add back tracing. * Review remarks. * Whitespace. * Whitespace again. * Cleanup + fix tests. * Log to log target in overseer. * Fix more tests. * Don't fail if group cannot be found. * Simple test for PoV fetcher. * Handle missing group membership better. * Add test for retry functionality. * Fix flaky test. * Spaces again. * Guide updates. * Spaces.
This commit is contained in:
@@ -17,20 +17,26 @@
|
||||
|
||||
//! Error handling related code and Error/Result definitions.
|
||||
|
||||
use polkadot_node_network_protocol::request_response::request::RequestError;
|
||||
use thiserror::Error;
|
||||
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use polkadot_node_subsystem_util::Error as UtilError;
|
||||
use polkadot_primitives::v1::SessionIndex;
|
||||
use polkadot_primitives::v1::{CompressedPoVError, SessionIndex};
|
||||
use polkadot_subsystem::{errors::RuntimeApiError, SubsystemError};
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
/// Errors of this subsystem.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error("Response channel to obtain QueryChunk failed")]
|
||||
#[error("Response channel to obtain chunk failed")]
|
||||
QueryChunkResponseChannel(#[source] oneshot::Canceled),
|
||||
|
||||
#[error("Response channel to obtain available data failed")]
|
||||
QueryAvailableDataResponseChannel(#[source] oneshot::Canceled),
|
||||
|
||||
#[error("Receive channel closed")]
|
||||
IncomingMessageChannel(#[source] SubsystemError),
|
||||
|
||||
@@ -53,24 +59,43 @@ pub enum Error {
|
||||
/// Sending response failed.
|
||||
#[error("Sending a request's response failed.")]
|
||||
SendResponse,
|
||||
}
|
||||
|
||||
/// Error that we should handle gracefully by logging it.
|
||||
#[derive(Debug)]
|
||||
pub enum NonFatalError {
|
||||
/// Some request to utility functions failed.
|
||||
/// This can be either `RuntimeRequestCanceled` or `RuntimeApiError`.
|
||||
#[error("Utility request failed")]
|
||||
UtilRequest(UtilError),
|
||||
|
||||
/// Runtime API subsystem is down, which means we're shutting down.
|
||||
#[error("Runtime request canceled")]
|
||||
RuntimeRequestCanceled(oneshot::Canceled),
|
||||
|
||||
/// Some request to the runtime failed.
|
||||
/// For example if we prune a block we're requesting info about.
|
||||
#[error("Runtime API error")]
|
||||
RuntimeRequest(RuntimeApiError),
|
||||
|
||||
/// We tried fetching a session info which was not available.
|
||||
#[error("There was no session with the given index")]
|
||||
NoSuchSession(SessionIndex),
|
||||
|
||||
/// Decompressing PoV failed.
|
||||
#[error("PoV could not be decompressed")]
|
||||
PoVDecompression(CompressedPoVError),
|
||||
|
||||
/// Fetching PoV failed with `RequestError`.
|
||||
#[error("FetchPoV request error")]
|
||||
FetchPoV(#[source] RequestError),
|
||||
|
||||
/// Fetching PoV failed as the received PoV did not match the expected hash.
|
||||
#[error("Fetched PoV does not match expected hash")]
|
||||
UnexpectedPoV,
|
||||
|
||||
#[error("Remote responded with `NoSuchPoV`")]
|
||||
NoSuchPoV,
|
||||
|
||||
/// No validator with the index could be found in current session.
|
||||
#[error("Given validator index could not be found")]
|
||||
InvalidValidatorIndex,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -87,9 +112,20 @@ pub(crate) async fn recv_runtime<V>(
|
||||
oneshot::Receiver<std::result::Result<V, RuntimeApiError>>,
|
||||
UtilError,
|
||||
>,
|
||||
) -> std::result::Result<V, NonFatalError> {
|
||||
r.map_err(NonFatalError::UtilRequest)?
|
||||
) -> std::result::Result<V, Error> {
|
||||
r.map_err(Error::UtilRequest)?
|
||||
.await
|
||||
.map_err(NonFatalError::RuntimeRequestCanceled)?
|
||||
.map_err(NonFatalError::RuntimeRequest)
|
||||
.map_err(Error::RuntimeRequestCanceled)?
|
||||
.map_err(Error::RuntimeRequest)
|
||||
}
|
||||
|
||||
|
||||
/// Utility for eating top level errors and log them.
|
||||
///
|
||||
/// We basically always want to try and continue on error. This utility function is meant to
|
||||
/// consume top-level errors by simply logging them
|
||||
pub fn log_error(result: Result<()>, ctx: &'static str) {
|
||||
if let Err(error) = result {
|
||||
tracing::warn!(target: LOG_TARGET, error = ?error, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,15 +26,23 @@ use polkadot_subsystem::{
|
||||
/// Error and [`Result`] type for this subsystem.
|
||||
mod error;
|
||||
pub use error::Error;
|
||||
use error::Result;
|
||||
use error::{Result, log_error};
|
||||
|
||||
/// Runtime requests.
|
||||
mod runtime;
|
||||
use runtime::Runtime;
|
||||
|
||||
/// `Requester` taking care of requesting chunks for candidates pending availability.
|
||||
mod requester;
|
||||
use requester::Requester;
|
||||
|
||||
/// Handing requests for PoVs during backing.
|
||||
mod pov_requester;
|
||||
use pov_requester::PoVRequester;
|
||||
|
||||
/// Responding to erasure chunk requests:
|
||||
mod responder;
|
||||
use responder::answer_request_log;
|
||||
use responder::{answer_chunk_request_log, answer_pov_request_log};
|
||||
|
||||
/// Cache for session information.
|
||||
mod session_cache;
|
||||
@@ -52,6 +60,8 @@ const LOG_TARGET: &'static str = "parachain::availability-distribution";
|
||||
pub struct AvailabilityDistributionSubsystem {
|
||||
/// Pointer to a keystore, which is required for determining this nodes validator index.
|
||||
keystore: SyncCryptoStorePtr,
|
||||
/// Easy and efficient runtime access for this subsystem.
|
||||
runtime: Runtime,
|
||||
/// Prometheus metrics.
|
||||
metrics: Metrics,
|
||||
}
|
||||
@@ -74,17 +84,20 @@ where
|
||||
}
|
||||
|
||||
impl AvailabilityDistributionSubsystem {
|
||||
|
||||
/// Create a new instance of the availability distribution.
|
||||
pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self {
|
||||
Self { keystore, metrics }
|
||||
let runtime = Runtime::new(keystore.clone());
|
||||
Self { keystore, runtime, metrics }
|
||||
}
|
||||
|
||||
/// Start processing work as passed on from the Overseer.
|
||||
async fn run<Context>(self, mut ctx: Context) -> Result<()>
|
||||
async fn run<Context>(mut self, mut ctx: Context) -> Result<()>
|
||||
where
|
||||
Context: SubsystemContext<Message = AvailabilityDistributionMessage> + Sync + Send,
|
||||
{
|
||||
let mut requester = Requester::new(self.keystore.clone(), self.metrics.clone()).fuse();
|
||||
let mut pov_requester = PoVRequester::new();
|
||||
loop {
|
||||
let action = {
|
||||
let mut subsystem_next = ctx.recv().fuse();
|
||||
@@ -107,14 +120,14 @@ impl AvailabilityDistributionSubsystem {
|
||||
};
|
||||
match message {
|
||||
FromOverseer::Signal(OverseerSignal::ActiveLeaves(update)) => {
|
||||
// Update the relay chain heads we are fetching our pieces for:
|
||||
if let Some(e) = requester
|
||||
.get_mut()
|
||||
.update_fetching_heads(&mut ctx, update)
|
||||
.await?
|
||||
{
|
||||
tracing::debug!(target: LOG_TARGET, "Error processing ActiveLeavesUpdate: {:?}", e);
|
||||
}
|
||||
log_error(
|
||||
pov_requester.update_connected_validators(&mut ctx, &mut self.runtime, &update).await,
|
||||
"PoVRequester::update_connected_validators"
|
||||
);
|
||||
log_error(
|
||||
requester.get_mut().update_fetching_heads(&mut ctx, update).await,
|
||||
"Error in Requester::update_fetching_heads"
|
||||
);
|
||||
}
|
||||
FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}
|
||||
FromOverseer::Signal(OverseerSignal::Conclude) => {
|
||||
@@ -123,7 +136,34 @@ impl AvailabilityDistributionSubsystem {
|
||||
FromOverseer::Communication {
|
||||
msg: AvailabilityDistributionMessage::ChunkFetchingRequest(req),
|
||||
} => {
|
||||
answer_request_log(&mut ctx, req, &self.metrics).await
|
||||
answer_chunk_request_log(&mut ctx, req, &self.metrics).await
|
||||
}
|
||||
FromOverseer::Communication {
|
||||
msg: AvailabilityDistributionMessage::PoVFetchingRequest(req),
|
||||
} => {
|
||||
answer_pov_request_log(&mut ctx, req, &self.metrics).await
|
||||
}
|
||||
FromOverseer::Communication {
|
||||
msg: AvailabilityDistributionMessage::FetchPoV {
|
||||
relay_parent,
|
||||
from_validator,
|
||||
candidate_hash,
|
||||
pov_hash,
|
||||
tx,
|
||||
},
|
||||
} => {
|
||||
log_error(
|
||||
pov_requester.fetch_pov(
|
||||
&mut ctx,
|
||||
&mut self.runtime,
|
||||
relay_parent,
|
||||
from_validator,
|
||||
candidate_hash,
|
||||
pov_hash,
|
||||
tx,
|
||||
).await,
|
||||
"PoVRequester::fetch_pov"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ pub const SUCCEEDED: &'static str = "succeeded";
|
||||
/// Label for fail counters.
|
||||
pub const FAILED: &'static str = "failed";
|
||||
|
||||
/// Label for chunks that could not be served, because they were not available.
|
||||
/// Label for chunks/PoVs that could not be served, because they were not available.
|
||||
pub const NOT_FOUND: &'static str = "not-found";
|
||||
|
||||
/// Availability Distribution metrics.
|
||||
@@ -47,6 +47,12 @@ struct MetricsInner {
|
||||
/// to a chunk request. This includes `NoSuchChunk` responses.
|
||||
served_chunks: CounterVec<U64>,
|
||||
|
||||
/// Number of PoVs served.
|
||||
///
|
||||
/// Note: Right now, `Succeeded` gets incremented whenever we were able to successfully respond
|
||||
/// to a PoV request. This includes `NoSuchPoV` responses.
|
||||
served_povs: CounterVec<U64>,
|
||||
|
||||
/// Number of times our first set of validators did not provide the needed chunk and we had to
|
||||
/// query further validators.
|
||||
retries: Counter<U64>,
|
||||
@@ -66,12 +72,19 @@ impl Metrics {
|
||||
}
|
||||
|
||||
/// Increment counter on served chunks.
|
||||
pub fn on_served(&self, label: &'static str) {
|
||||
pub fn on_served_chunk(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.served_chunks.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment counter on served PoVs.
|
||||
pub fn on_served_pov(&self, label: &'static str) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.served_povs.with_label_values(&[label]).inc()
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment retry counter.
|
||||
pub fn on_retry(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
@@ -103,6 +116,16 @@ impl metrics::Metrics for Metrics {
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
served_povs: prometheus::register(
|
||||
CounterVec::new(
|
||||
Opts::new(
|
||||
"parachain_served_povs_total",
|
||||
"Total number of povs served by this backer.",
|
||||
),
|
||||
&["success"]
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
retries: prometheus::register(
|
||||
Counter::new(
|
||||
"parachain_fetch_retries_total",
|
||||
|
||||
@@ -0,0 +1,333 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! PoV requester takes care of requesting PoVs from validators of a backing group.
|
||||
|
||||
use futures::{FutureExt, channel::{mpsc, oneshot}, future::BoxFuture};
|
||||
use lru::LruCache;
|
||||
|
||||
use polkadot_subsystem::jaeger;
|
||||
use polkadot_node_network_protocol::{
|
||||
PeerId, peer_set::PeerSet,
|
||||
request_response::{OutgoingRequest, Recipient, request::{RequestError, Requests},
|
||||
v1::{PoVFetchingRequest, PoVFetchingResponse}}
|
||||
};
|
||||
use polkadot_primitives::v1::{
|
||||
AuthorityDiscoveryId, CandidateHash, Hash, PoV, SessionIndex, ValidatorIndex
|
||||
};
|
||||
use polkadot_subsystem::{
|
||||
ActiveLeavesUpdate, SubsystemContext, ActivatedLeaf,
|
||||
messages::{AllMessages, NetworkBridgeMessage, IfDisconnected}
|
||||
};
|
||||
|
||||
use crate::{error::{Error, log_error}, runtime::{Runtime, ValidatorInfo}};
|
||||
|
||||
/// Number of sessions we want to keep in the LRU.
|
||||
const NUM_SESSIONS: usize = 2;
|
||||
|
||||
pub struct PoVRequester {
|
||||
/// We only ever care about being connected to validators of at most two sessions.
|
||||
///
|
||||
/// So we keep an LRU for managing connection requests of size 2.
|
||||
/// Cache will contain `None` if we are not a validator in that session.
|
||||
connected_validators: LruCache<SessionIndex, Option<mpsc::Receiver<(AuthorityDiscoveryId, PeerId)>>>,
|
||||
}
|
||||
|
||||
impl PoVRequester {
|
||||
/// Create a new requester for PoVs.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
connected_validators: LruCache::new(NUM_SESSIONS),
|
||||
}
|
||||
}
|
||||
|
||||
/// Make sure we are connected to the right set of validators.
|
||||
///
|
||||
/// On every `ActiveLeavesUpdate`, we check whether we are connected properly to our current
|
||||
/// validator group.
|
||||
pub async fn update_connected_validators<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
update: &ActiveLeavesUpdate,
|
||||
) -> super::Result<()>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let activated = update.activated.iter().map(|ActivatedLeaf { hash: h, .. }| h);
|
||||
let activated_sessions =
|
||||
get_activated_sessions(ctx, runtime, activated).await?;
|
||||
|
||||
for (parent, session_index) in activated_sessions {
|
||||
if self.connected_validators.contains(&session_index) {
|
||||
continue
|
||||
}
|
||||
let rx = connect_to_relevant_validators(ctx, runtime, parent, session_index).await?;
|
||||
self.connected_validators.put(session_index, rx);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start background worker for taking care of fetching the requested `PoV` from the network.
|
||||
pub async fn fetch_pov<Context>(
|
||||
&self,
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
parent: Hash,
|
||||
from_validator: ValidatorIndex,
|
||||
candidate_hash: CandidateHash,
|
||||
pov_hash: Hash,
|
||||
tx: oneshot::Sender<PoV>
|
||||
) -> super::Result<()>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let info = &runtime.get_session_info(ctx, parent).await?.session_info;
|
||||
let authority_id = info.discovery_keys.get(from_validator.0 as usize)
|
||||
.ok_or(Error::InvalidValidatorIndex)?
|
||||
.clone();
|
||||
let (req, pending_response) = OutgoingRequest::new(
|
||||
Recipient::Authority(authority_id),
|
||||
PoVFetchingRequest {
|
||||
candidate_hash,
|
||||
},
|
||||
);
|
||||
let full_req = Requests::PoVFetching(req);
|
||||
|
||||
ctx.send_message(
|
||||
AllMessages::NetworkBridge(
|
||||
NetworkBridgeMessage::SendRequests(
|
||||
vec![full_req],
|
||||
// We are supposed to be connected to validators of our group via `PeerSet`,
|
||||
// but at session boundaries that is kind of racy, in case a connection takes
|
||||
// longer to get established, so we try to connect in any case.
|
||||
IfDisconnected::TryConnect
|
||||
)
|
||||
)).await;
|
||||
|
||||
let span = jaeger::Span::new(candidate_hash, "fetch-pov")
|
||||
.with_validator_index(from_validator);
|
||||
ctx.spawn("pov-fetcher", fetch_pov_job(pov_hash, pending_response.boxed(), span, tx).boxed())
|
||||
.await
|
||||
.map_err(|e| Error::SpawnTask(e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Future to be spawned for taking care of handling reception and sending of PoV.
|
||||
async fn fetch_pov_job(
|
||||
pov_hash: Hash,
|
||||
pending_response: BoxFuture<'static, Result<PoVFetchingResponse, RequestError>>,
|
||||
span: jaeger::Span,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
) {
|
||||
log_error(
|
||||
do_fetch_pov(pov_hash, pending_response, span, tx).await,
|
||||
"fetch_pov_job",
|
||||
)
|
||||
}
|
||||
|
||||
/// Do the actual work of waiting for the response.
|
||||
async fn do_fetch_pov(
|
||||
pov_hash: Hash,
|
||||
pending_response: BoxFuture<'static, Result<PoVFetchingResponse, RequestError>>,
|
||||
_span: jaeger::Span,
|
||||
tx: oneshot::Sender<PoV>,
|
||||
)
|
||||
-> super::Result<()>
|
||||
{
|
||||
let response = pending_response.await.map_err(Error::FetchPoV)?;
|
||||
let pov = match response {
|
||||
PoVFetchingResponse::PoV(compressed) => {
|
||||
compressed.decompress().map_err(Error::PoVDecompression)?
|
||||
}
|
||||
PoVFetchingResponse::NoSuchPoV => {
|
||||
return Err(Error::NoSuchPoV)
|
||||
}
|
||||
};
|
||||
if pov.hash() == pov_hash {
|
||||
tx.send(pov).map_err(|_| Error::SendResponse)
|
||||
} else {
|
||||
Err(Error::UnexpectedPoV)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the session indeces for the given relay chain parents.
|
||||
async fn get_activated_sessions<Context>(ctx: &mut Context, runtime: &mut Runtime, new_heads: impl Iterator<Item = &Hash>)
|
||||
-> super::Result<impl Iterator<Item = (Hash, SessionIndex)>>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let mut sessions = Vec::new();
|
||||
for parent in new_heads {
|
||||
sessions.push((*parent, runtime.get_session_index(ctx, *parent).await?));
|
||||
}
|
||||
Ok(sessions.into_iter())
|
||||
}
|
||||
|
||||
/// Connect to validators of our validator group.
|
||||
async fn connect_to_relevant_validators<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
parent: Hash,
|
||||
session: SessionIndex
|
||||
)
|
||||
-> super::Result<Option<mpsc::Receiver<(AuthorityDiscoveryId, PeerId)>>>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
if let Some(validator_ids) = determine_relevant_validators(ctx, runtime, parent, session).await? {
|
||||
// We don't actually care about `PeerId`s, just keeping receiver so we stay connected:
|
||||
let (tx, rx) = mpsc::channel(0);
|
||||
ctx.send_message(AllMessages::NetworkBridge(NetworkBridgeMessage::ConnectToValidators {
|
||||
validator_ids, peer_set: PeerSet::Validation, connected: tx
|
||||
})).await;
|
||||
Ok(Some(rx))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the validators in our validator group.
|
||||
///
|
||||
/// Return: `None` if not a validator.
|
||||
async fn determine_relevant_validators<Context>(
|
||||
ctx: &mut Context,
|
||||
runtime: &mut Runtime,
|
||||
parent: Hash,
|
||||
session: SessionIndex,
|
||||
)
|
||||
-> super::Result<Option<Vec<AuthorityDiscoveryId>>>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let info = runtime.get_session_info_by_index(ctx, parent, session).await?;
|
||||
if let ValidatorInfo {
|
||||
our_index: Some(our_index),
|
||||
our_group: Some(our_group)
|
||||
} = &info.validator_info {
|
||||
|
||||
let indeces = info.session_info.validator_groups.get(our_group.0 as usize)
|
||||
.expect("Our group got retrieved from that session info, it must exist. qed.")
|
||||
.clone();
|
||||
Ok(Some(
|
||||
indeces.into_iter()
|
||||
.filter(|i| *i != *our_index)
|
||||
.map(|i| info.session_info.discovery_keys[i.0 as usize].clone())
|
||||
.collect()
|
||||
))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use assert_matches::assert_matches;
|
||||
use futures::{executor, future};
|
||||
|
||||
use parity_scale_codec::Encode;
|
||||
use sp_core::testing::TaskExecutor;
|
||||
|
||||
use polkadot_primitives::v1::{BlockData, CandidateHash, CompressedPoV, Hash, ValidatorIndex};
|
||||
use polkadot_subsystem_testhelpers as test_helpers;
|
||||
use polkadot_subsystem::messages::{AvailabilityDistributionMessage, RuntimeApiMessage, RuntimeApiRequest};
|
||||
|
||||
use super::*;
|
||||
use crate::LOG_TARGET;
|
||||
use crate::tests::mock::{make_session_info, make_ferdie_keystore};
|
||||
|
||||
#[test]
|
||||
fn rejects_invalid_pov() {
|
||||
sp_tracing::try_init_simple();
|
||||
let pov = PoV {
|
||||
block_data: BlockData(vec![1,2,3,4,5,6]),
|
||||
};
|
||||
test_run(Hash::default(), pov);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_valid_pov() {
|
||||
sp_tracing::try_init_simple();
|
||||
let pov = PoV {
|
||||
block_data: BlockData(vec![1,2,3,4,5,6]),
|
||||
};
|
||||
test_run(pov.hash(), pov);
|
||||
}
|
||||
|
||||
fn test_run(pov_hash: Hash, pov: PoV) {
|
||||
let requester = PoVRequester::new();
|
||||
let pool = TaskExecutor::new();
|
||||
let (mut context, mut virtual_overseer) =
|
||||
test_helpers::make_subsystem_context::<AvailabilityDistributionMessage, TaskExecutor>(pool.clone());
|
||||
let keystore = make_ferdie_keystore();
|
||||
let mut runtime = crate::runtime::Runtime::new(keystore);
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let testee = async {
|
||||
requester.fetch_pov(
|
||||
&mut context,
|
||||
&mut runtime,
|
||||
Hash::default(),
|
||||
ValidatorIndex(0),
|
||||
CandidateHash::default(),
|
||||
pov_hash,
|
||||
tx,
|
||||
).await.expect("Should succeed");
|
||||
};
|
||||
|
||||
let tester = async move {
|
||||
loop {
|
||||
match virtual_overseer.recv().await {
|
||||
AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionIndexForChild(tx)
|
||||
)
|
||||
) => {
|
||||
tx.send(Ok(0)).unwrap();
|
||||
}
|
||||
AllMessages::RuntimeApi(
|
||||
RuntimeApiMessage::Request(
|
||||
_,
|
||||
RuntimeApiRequest::SessionInfo(_, tx)
|
||||
)
|
||||
) => {
|
||||
tx.send(Ok(Some(make_session_info()))).unwrap();
|
||||
}
|
||||
AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(mut reqs, _)) => {
|
||||
let req = assert_matches!(
|
||||
reqs.pop(),
|
||||
Some(Requests::PoVFetching(outgoing)) => {outgoing}
|
||||
);
|
||||
req.pending_response.send(Ok(PoVFetchingResponse::PoV(
|
||||
CompressedPoV::compress(&pov).unwrap()).encode()
|
||||
)).unwrap();
|
||||
break
|
||||
},
|
||||
msg => tracing::debug!(target: LOG_TARGET, msg = ?msg, "Received msg"),
|
||||
}
|
||||
}
|
||||
if pov.hash() == pov_hash {
|
||||
assert_eq!(rx.await, Ok(pov));
|
||||
} else {
|
||||
assert_eq!(rx.await, Err(oneshot::Canceled));
|
||||
}
|
||||
};
|
||||
futures::pin_mut!(testee);
|
||||
futures::pin_mut!(tester);
|
||||
executor::block_on(future::join(testee, tester));
|
||||
}
|
||||
}
|
||||
@@ -138,7 +138,7 @@ impl FetchTaskConfig {
|
||||
let live_in = vec![leaf].into_iter().collect();
|
||||
|
||||
// Don't run tasks for our backing group:
|
||||
if session_info.our_group == core.group_responsible {
|
||||
if session_info.our_group == Some(core.group_responsible) {
|
||||
return FetchTaskConfig {
|
||||
live_in,
|
||||
prepared_running: None,
|
||||
|
||||
@@ -39,7 +39,7 @@ use polkadot_subsystem::{
|
||||
};
|
||||
|
||||
use super::{error::recv_runtime, session_cache::SessionCache, LOG_TARGET, Metrics};
|
||||
use crate::error::NonFatalError;
|
||||
use crate::error::Error;
|
||||
|
||||
/// A task fetching a particular chunk.
|
||||
mod fetch_task;
|
||||
@@ -96,7 +96,7 @@ impl Requester {
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
update: ActiveLeavesUpdate,
|
||||
) -> super::Result<Option<NonFatalError>>
|
||||
) -> super::Result<()>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
@@ -111,9 +111,9 @@ impl Requester {
|
||||
} = update;
|
||||
// Order important! We need to handle activated, prior to deactivated, otherwise we might
|
||||
// cancel still needed jobs.
|
||||
let err = self.start_requesting_chunks(ctx, activated.into_iter()).await?;
|
||||
self.start_requesting_chunks(ctx, activated.into_iter()).await?;
|
||||
self.stop_requesting_chunks(deactivated.into_iter());
|
||||
Ok(err)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start requesting chunks for newly imported heads.
|
||||
@@ -121,25 +121,20 @@ impl Requester {
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
new_heads: impl Iterator<Item = ActivatedLeaf>,
|
||||
) -> super::Result<Option<NonFatalError>>
|
||||
) -> super::Result<()>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
for ActivatedLeaf { hash: leaf, .. } in new_heads {
|
||||
let cores = match query_occupied_cores(ctx, leaf).await {
|
||||
Err(err) => return Ok(Some(err)),
|
||||
Ok(cores) => cores,
|
||||
};
|
||||
let cores = query_occupied_cores(ctx, leaf).await?;
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
occupied_cores = ?cores,
|
||||
"Query occupied core"
|
||||
);
|
||||
if let Some(err) = self.add_cores(ctx, leaf, cores).await? {
|
||||
return Ok(Some(err));
|
||||
}
|
||||
self.add_cores(ctx, leaf, cores).await?;
|
||||
}
|
||||
Ok(None)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop requesting chunks for obsolete heads.
|
||||
@@ -164,7 +159,7 @@ impl Requester {
|
||||
ctx: &mut Context,
|
||||
leaf: Hash,
|
||||
cores: impl IntoIterator<Item = OccupiedCore>,
|
||||
) -> super::Result<Option<NonFatalError>>
|
||||
) -> super::Result<()>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
@@ -179,7 +174,7 @@ impl Requester {
|
||||
let tx = self.tx.clone();
|
||||
let metrics = self.metrics.clone();
|
||||
|
||||
let task_cfg = match self
|
||||
let task_cfg = self
|
||||
.session_cache
|
||||
.with_session_info(
|
||||
ctx,
|
||||
@@ -189,11 +184,7 @@ impl Requester {
|
||||
leaf,
|
||||
|info| FetchTaskConfig::new(leaf, &core, tx, metrics, info),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(err) => return Ok(Some(err)),
|
||||
Ok(task_cfg) => task_cfg,
|
||||
};
|
||||
.await?;
|
||||
|
||||
if let Some(task_cfg) = task_cfg {
|
||||
e.insert(FetchTask::start(task_cfg, ctx).await?);
|
||||
@@ -202,7 +193,7 @@ impl Requester {
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,7 +228,7 @@ impl Stream for Requester {
|
||||
async fn query_occupied_cores<Context>(
|
||||
ctx: &mut Context,
|
||||
relay_parent: Hash,
|
||||
) -> Result<Vec<OccupiedCore>, NonFatalError>
|
||||
) -> Result<Vec<OccupiedCore>, Error>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use polkadot_node_network_protocol::request_response::{request::IncomingRequest, v1};
|
||||
use polkadot_primitives::v1::{CandidateHash, ErasureChunk, ValidatorIndex};
|
||||
use polkadot_primitives::v1::{AvailableData, CandidateHash, CompressedPoV, ErasureChunk, ValidatorIndex};
|
||||
use polkadot_subsystem::{
|
||||
messages::{AllMessages, AvailabilityStoreMessage},
|
||||
SubsystemContext, jaeger,
|
||||
@@ -28,10 +28,36 @@ use polkadot_subsystem::{
|
||||
use crate::error::{Error, Result};
|
||||
use crate::{LOG_TARGET, metrics::{Metrics, SUCCEEDED, FAILED, NOT_FOUND}};
|
||||
|
||||
/// Variant of `answer_request` that does Prometheus metric and logging on errors.
|
||||
/// Variant of `answer_pov_request` that does Prometheus metric and logging on errors.
|
||||
///
|
||||
/// Any errors of `answer_pov_request` will simply be logged.
|
||||
pub async fn answer_pov_request_log<Context>(
|
||||
ctx: &mut Context,
|
||||
req: IncomingRequest<v1::PoVFetchingRequest>,
|
||||
metrics: &Metrics,
|
||||
)
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let res = answer_pov_request(ctx, req).await;
|
||||
match res {
|
||||
Ok(result) =>
|
||||
metrics.on_served_pov(if result {SUCCEEDED} else {NOT_FOUND}),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
err= ?err,
|
||||
"Serving PoV failed with error"
|
||||
);
|
||||
metrics.on_served_pov(FAILED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Variant of `answer_chunk_request` that does Prometheus metric and logging on errors.
|
||||
///
|
||||
/// Any errors of `answer_request` will simply be logged.
|
||||
pub async fn answer_request_log<Context>(
|
||||
pub async fn answer_chunk_request_log<Context>(
|
||||
ctx: &mut Context,
|
||||
req: IncomingRequest<v1::ChunkFetchingRequest>,
|
||||
metrics: &Metrics,
|
||||
@@ -39,33 +65,71 @@ pub async fn answer_request_log<Context>(
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let res = answer_request(ctx, req).await;
|
||||
let res = answer_chunk_request(ctx, req).await;
|
||||
match res {
|
||||
Ok(result) =>
|
||||
metrics.on_served(if result {SUCCEEDED} else {NOT_FOUND}),
|
||||
metrics.on_served_chunk(if result {SUCCEEDED} else {NOT_FOUND}),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
err= ?err,
|
||||
"Serving chunk failed with error"
|
||||
);
|
||||
metrics.on_served(FAILED);
|
||||
metrics.on_served_chunk(FAILED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Answer an incoming PoV fetch request by querying the av store.
|
||||
///
|
||||
/// Returns: Ok(true) if chunk was found and served.
|
||||
pub async fn answer_pov_request<Context>(
|
||||
ctx: &mut Context,
|
||||
req: IncomingRequest<v1::PoVFetchingRequest>,
|
||||
) -> Result<bool>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let _span = jaeger::Span::new(req.payload.candidate_hash, "answer-pov-request");
|
||||
|
||||
let av_data = query_available_data(ctx, req.payload.candidate_hash).await?;
|
||||
|
||||
let result = av_data.is_some();
|
||||
|
||||
let response = match av_data {
|
||||
None => v1::PoVFetchingResponse::NoSuchPoV,
|
||||
Some(av_data) => {
|
||||
let pov = match CompressedPoV::compress(&av_data.pov) {
|
||||
Ok(pov) => pov,
|
||||
Err(error) => {
|
||||
tracing::error!(
|
||||
target: LOG_TARGET,
|
||||
error = ?error,
|
||||
"Failed to create `CompressedPov`",
|
||||
);
|
||||
// this should really not happen, let this request time out:
|
||||
return Err(Error::PoVDecompression(error))
|
||||
}
|
||||
};
|
||||
v1::PoVFetchingResponse::PoV(pov)
|
||||
}
|
||||
};
|
||||
|
||||
req.send_response(response).map_err(|_| Error::SendResponse)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Answer an incoming chunk request by querying the av store.
|
||||
///
|
||||
/// Returns: Ok(true) if chunk was found and served.
|
||||
pub async fn answer_request<Context>(
|
||||
pub async fn answer_chunk_request<Context>(
|
||||
ctx: &mut Context,
|
||||
req: IncomingRequest<v1::ChunkFetchingRequest>,
|
||||
) -> Result<bool>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let span = jaeger::Span::new(req.payload.candidate_hash, "answer-request")
|
||||
.with_stage(jaeger::Stage::AvailabilityDistribution);
|
||||
let span = jaeger::Span::new(req.payload.candidate_hash, "answer-chunk-request");
|
||||
|
||||
let _child_span = span.child("answer-chunk-request")
|
||||
.with_chunk_index(req.payload.index.0);
|
||||
@@ -119,3 +183,21 @@ where
|
||||
Error::QueryChunkResponseChannel(e)
|
||||
})
|
||||
}
|
||||
|
||||
/// Query PoV from the availability store.
|
||||
#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
|
||||
async fn query_available_data<Context>(
|
||||
ctx: &mut Context,
|
||||
candidate_hash: CandidateHash,
|
||||
) -> Result<Option<AvailableData>>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let (tx, rx) = oneshot::channel();
|
||||
ctx.send_message(AllMessages::AvailabilityStore(
|
||||
AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx),
|
||||
))
|
||||
.await;
|
||||
|
||||
rx.await.map_err(|e| Error::QueryAvailableDataResponseChannel(e))
|
||||
}
|
||||
|
||||
@@ -0,0 +1,197 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Convenient interface to the runtime.
|
||||
|
||||
use lru::LruCache;
|
||||
|
||||
use sp_application_crypto::AppKey;
|
||||
use sp_core::crypto::Public;
|
||||
use sp_keystore::{CryptoStore, SyncCryptoStorePtr};
|
||||
|
||||
use polkadot_node_subsystem_util::{
|
||||
request_session_index_for_child_ctx, request_session_info_ctx,
|
||||
};
|
||||
use polkadot_primitives::v1::{GroupIndex, Hash, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex};
|
||||
use polkadot_subsystem::SubsystemContext;
|
||||
|
||||
use super::{
|
||||
error::recv_runtime,
|
||||
Error,
|
||||
};
|
||||
|
||||
/// Caching of session info as needed by availability distribution.
|
||||
///
|
||||
/// It should be ensured that a cached session stays live in the cache as long as we might need it.
|
||||
pub struct Runtime {
|
||||
/// Get the session index for a given relay parent.
|
||||
///
|
||||
/// We query this up to a 100 times per block, so caching it here without roundtrips over the
|
||||
/// overseer seems sensible.
|
||||
session_index_cache: LruCache<Hash, SessionIndex>,
|
||||
|
||||
/// Look up cached sessions by SessionIndex.
|
||||
session_info_cache: LruCache<SessionIndex, ExtendedSessionInfo>,
|
||||
|
||||
/// Key store for determining whether we are a validator and what `ValidatorIndex` we have.
|
||||
keystore: SyncCryptoStorePtr,
|
||||
}
|
||||
|
||||
/// SessionInfo with additional useful data for validator nodes.
|
||||
pub struct ExtendedSessionInfo {
|
||||
/// Actual session info as fetched from the runtime.
|
||||
pub session_info: SessionInfo,
|
||||
/// Contains useful information about ourselves, in case this node is a validator.
|
||||
pub validator_info: ValidatorInfo,
|
||||
}
|
||||
|
||||
/// Information about ourself, in case we are an `Authority`.
|
||||
///
|
||||
/// This data is derived from the `SessionInfo` and our key as found in the keystore.
|
||||
pub struct ValidatorInfo {
|
||||
/// The index this very validator has in `SessionInfo` vectors, if any.
|
||||
pub our_index: Option<ValidatorIndex>,
|
||||
/// The group we belong to, if any.
|
||||
pub our_group: Option<GroupIndex>,
|
||||
}
|
||||
|
||||
impl Runtime {
|
||||
/// Create a new `Runtime` for convenient runtime fetches.
|
||||
pub fn new(keystore: SyncCryptoStorePtr) -> Self {
|
||||
Self {
|
||||
// 5 relatively conservative, 1 to 2 should suffice:
|
||||
session_index_cache: LruCache::new(5),
|
||||
// We need to cache the current and the last session the most:
|
||||
session_info_cache: LruCache::new(2),
|
||||
keystore,
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve the current session index.
|
||||
pub async fn get_session_index<Context>(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
parent: Hash,
|
||||
) -> Result<SessionIndex, Error>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
match self.session_index_cache.get(&parent) {
|
||||
Some(index) => Ok(*index),
|
||||
None => {
|
||||
let index =
|
||||
recv_runtime(request_session_index_for_child_ctx(parent, ctx).await)
|
||||
.await?;
|
||||
self.session_index_cache.put(parent, index);
|
||||
Ok(index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get `ExtendedSessionInfo` by relay parent hash.
|
||||
pub async fn get_session_info<'a, Context>(
|
||||
&'a mut self,
|
||||
ctx: &mut Context,
|
||||
parent: Hash,
|
||||
) -> Result<&'a ExtendedSessionInfo, Error>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
let session_index = self.get_session_index(ctx, parent).await?;
|
||||
|
||||
self.get_session_info_by_index(ctx, parent, session_index).await
|
||||
}
|
||||
|
||||
/// Get `ExtendedSessionInfo` by session index.
|
||||
///
|
||||
/// `request_session_info_ctx` still requires the parent to be passed in, so we take the parent
|
||||
/// in addition to the `SessionIndex`.
|
||||
pub async fn get_session_info_by_index<'a, Context>(
|
||||
&'a mut self,
|
||||
ctx: &mut Context,
|
||||
parent: Hash,
|
||||
session_index: SessionIndex,
|
||||
) -> Result<&'a ExtendedSessionInfo, Error>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
if !self.session_info_cache.contains(&session_index) {
|
||||
let session_info =
|
||||
recv_runtime(request_session_info_ctx(parent, session_index, ctx).await)
|
||||
.await?
|
||||
.ok_or(Error::NoSuchSession(session_index))?;
|
||||
let validator_info = self.get_validator_info(&session_info).await?;
|
||||
|
||||
let full_info = ExtendedSessionInfo {
|
||||
session_info,
|
||||
validator_info,
|
||||
};
|
||||
|
||||
self.session_info_cache.put(session_index, full_info);
|
||||
}
|
||||
Ok(
|
||||
self.session_info_cache.get(&session_index)
|
||||
.expect("We just put the value there. qed.")
|
||||
)
|
||||
}
|
||||
|
||||
/// Build `ValidatorInfo` for the current session.
|
||||
///
|
||||
///
|
||||
/// Returns: `None` if not a validator.
|
||||
async fn get_validator_info(
|
||||
&self,
|
||||
session_info: &SessionInfo,
|
||||
) -> Result<ValidatorInfo, Error>
|
||||
{
|
||||
if let Some(our_index) = self.get_our_index(&session_info.validators).await {
|
||||
// Get our group index:
|
||||
let our_group = session_info.validator_groups
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(i, g)| {
|
||||
g.iter().find_map(|v| {
|
||||
if *v == our_index {
|
||||
Some(GroupIndex(i as u32))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
);
|
||||
let info = ValidatorInfo {
|
||||
our_index: Some(our_index),
|
||||
our_group,
|
||||
};
|
||||
return Ok(info)
|
||||
}
|
||||
return Ok(ValidatorInfo { our_index: None, our_group: None })
|
||||
}
|
||||
|
||||
/// Get our `ValidatorIndex`.
|
||||
///
|
||||
/// Returns: None if we are not a validator.
|
||||
async fn get_our_index(&self, validators: &[ValidatorId]) -> Option<ValidatorIndex> {
|
||||
for (i, v) in validators.iter().enumerate() {
|
||||
if CryptoStore::has_keys(&*self.keystore, &[(v.to_raw_vec(), ValidatorId::ID)])
|
||||
.await
|
||||
{
|
||||
return Some(ValidatorIndex(i as u32));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -33,8 +33,7 @@ use polkadot_primitives::v1::{
|
||||
use polkadot_subsystem::SubsystemContext;
|
||||
|
||||
use super::{
|
||||
error::{recv_runtime, NonFatalError},
|
||||
Error,
|
||||
error::{recv_runtime, Error},
|
||||
LOG_TARGET,
|
||||
};
|
||||
|
||||
@@ -82,7 +81,9 @@ pub struct SessionInfo {
|
||||
|
||||
/// Remember to which group we belong, so we won't start fetching chunks for candidates with
|
||||
/// our group being responsible. (We should have that chunk already.)
|
||||
pub our_group: GroupIndex,
|
||||
///
|
||||
/// `None`, if we are not in fact part of any group.
|
||||
pub our_group: Option<GroupIndex>,
|
||||
}
|
||||
|
||||
/// Report of bad validators.
|
||||
@@ -122,7 +123,7 @@ impl SessionCache {
|
||||
ctx: &mut Context,
|
||||
parent: Hash,
|
||||
with_info: F,
|
||||
) -> Result<Option<R>, NonFatalError>
|
||||
) -> Result<Option<R>, Error>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
F: FnOnce(&SessionInfo) -> R,
|
||||
@@ -219,7 +220,7 @@ impl SessionCache {
|
||||
ctx: &mut Context,
|
||||
parent: Hash,
|
||||
session_index: SessionIndex,
|
||||
) -> Result<Option<SessionInfo>, NonFatalError>
|
||||
) -> Result<Option<SessionInfo>, Error>
|
||||
where
|
||||
Context: SubsystemContext,
|
||||
{
|
||||
@@ -230,7 +231,7 @@ impl SessionCache {
|
||||
..
|
||||
} = recv_runtime(request_session_info_ctx(parent, session_index, ctx).await)
|
||||
.await?
|
||||
.ok_or(NonFatalError::NoSuchSession(session_index))?;
|
||||
.ok_or(Error::NoSuchSession(session_index))?;
|
||||
|
||||
if let Some(our_index) = self.get_our_index(validators).await {
|
||||
// Get our group index:
|
||||
@@ -245,8 +246,8 @@ impl SessionCache {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.expect("Every validator should be in a validator group. qed.");
|
||||
}
|
||||
);
|
||||
|
||||
// Shuffle validators in groups:
|
||||
let mut rng = thread_rng();
|
||||
@@ -274,9 +275,9 @@ impl SessionCache {
|
||||
session_index,
|
||||
our_group,
|
||||
};
|
||||
return Ok(Some(info));
|
||||
return Ok(Some(info))
|
||||
}
|
||||
return Ok(None);
|
||||
return Ok(None)
|
||||
}
|
||||
|
||||
/// Get our `ValidatorIndex`.
|
||||
|
||||
@@ -19,14 +19,29 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use sc_keystore::LocalKeystore;
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
use sp_application_crypto::AppKey;
|
||||
|
||||
use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks};
|
||||
use polkadot_primitives::v1::{AvailableData, BlockData, CandidateCommitments, CandidateDescriptor,
|
||||
CandidateHash, CommittedCandidateReceipt, ErasureChunk, GroupIndex, Hash, HeadData, Id
|
||||
as ParaId, OccupiedCore, PersistedValidationData, PoV, SessionInfo,
|
||||
ValidatorIndex
|
||||
use polkadot_primitives::v1::{
|
||||
AvailableData, BlockData, CandidateCommitments, CandidateDescriptor, CandidateHash,
|
||||
CommittedCandidateReceipt, ErasureChunk, GroupIndex, Hash, HeadData, Id as ParaId,
|
||||
OccupiedCore, PersistedValidationData, PoV, SessionInfo, ValidatorId, ValidatorIndex
|
||||
};
|
||||
use sp_keystore::{SyncCryptoStore, SyncCryptoStorePtr};
|
||||
|
||||
/// Get mock keystore with `Ferdie` key.
|
||||
pub fn make_ferdie_keystore() -> SyncCryptoStorePtr {
|
||||
let keystore: SyncCryptoStorePtr = Arc::new(LocalKeystore::in_memory());
|
||||
SyncCryptoStore::sr25519_generate_new(
|
||||
&*keystore,
|
||||
ValidatorId::ID,
|
||||
Some(&Sr25519Keyring::Ferdie.to_seed()),
|
||||
)
|
||||
.expect("Insert key into keystore");
|
||||
keystore
|
||||
}
|
||||
|
||||
/// Create dummy session info with two validator groups.
|
||||
pub fn make_session_info() -> SessionInfo {
|
||||
|
||||
@@ -23,10 +23,7 @@ use smallvec::smallvec;
|
||||
use futures::{FutureExt, channel::oneshot, SinkExt, channel::mpsc, StreamExt};
|
||||
use futures_timer::Delay;
|
||||
|
||||
use sc_keystore::LocalKeystore;
|
||||
use sp_application_crypto::AppKey;
|
||||
use sp_keystore::{SyncCryptoStore, SyncCryptoStorePtr};
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
use sp_keystore::SyncCryptoStorePtr;
|
||||
use sp_core::{traits::SpawnNamed, testing::TaskExecutor};
|
||||
use sc_network as network;
|
||||
use sc_network::IfDisconnected;
|
||||
@@ -39,7 +36,7 @@ use polkadot_subsystem::{ActiveLeavesUpdate, FromOverseer, OverseerSignal, Activ
|
||||
}
|
||||
};
|
||||
use polkadot_primitives::v1::{CandidateHash, CoreState, ErasureChunk, GroupIndex, Hash, Id
|
||||
as ParaId, ScheduledCore, SessionInfo, ValidatorId,
|
||||
as ParaId, ScheduledCore, SessionInfo,
|
||||
ValidatorIndex
|
||||
};
|
||||
use polkadot_node_network_protocol::{jaeger,
|
||||
@@ -48,7 +45,7 @@ use polkadot_node_network_protocol::{jaeger,
|
||||
use polkadot_subsystem_testhelpers as test_helpers;
|
||||
use test_helpers::SingleItemSink;
|
||||
|
||||
use super::mock::{make_session_info, OccupiedCoreBuilder, };
|
||||
use super::mock::{make_session_info, OccupiedCoreBuilder, make_ferdie_keystore};
|
||||
use crate::LOG_TARGET;
|
||||
|
||||
pub struct TestHarness {
|
||||
@@ -83,17 +80,10 @@ impl Default for TestState {
|
||||
|
||||
let chain_ids = vec![chain_a, chain_b];
|
||||
|
||||
let keystore: SyncCryptoStorePtr = Arc::new(LocalKeystore::in_memory());
|
||||
let keystore = make_ferdie_keystore();
|
||||
|
||||
let session_info = make_session_info();
|
||||
|
||||
SyncCryptoStore::sr25519_generate_new(
|
||||
&*keystore,
|
||||
ValidatorId::ID,
|
||||
Some(&Sr25519Keyring::Ferdie.to_seed()),
|
||||
)
|
||||
.expect("Insert key into keystore");
|
||||
|
||||
let (cores, chunks) = {
|
||||
let mut cores = HashMap::new();
|
||||
let mut chunks = HashMap::new();
|
||||
@@ -163,6 +153,9 @@ impl TestState {
|
||||
/// This will simply advance through the simulated chain and examines whether the subsystem
|
||||
/// behaves as expected: It will succeed if all valid chunks of other backing groups get stored
|
||||
/// and no other.
|
||||
///
|
||||
/// We try to be as agnostic about details as possible, how the subsystem achieves those goals
|
||||
/// should not be a matter to this test suite.
|
||||
async fn run_inner(self, executor: TaskExecutor, virtual_overseer: TestSubsystemContextHandle<AvailabilityDistributionMessage>) {
|
||||
// We skip genesis here (in reality ActiveLeavesUpdate can also skip a block:
|
||||
let updates = {
|
||||
@@ -258,15 +251,12 @@ impl TestState {
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected message received: {:?}", msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
async fn overseer_signal(
|
||||
mut tx: SingleItemSink<FromOverseer<AvailabilityDistributionMessage>>,
|
||||
msg: impl Into<OverseerSignal>,
|
||||
|
||||
@@ -696,7 +696,6 @@ mod tests {
|
||||
use polkadot_subsystem::messages::{
|
||||
ApprovalDistributionMessage,
|
||||
BitfieldDistributionMessage,
|
||||
PoVDistributionMessage,
|
||||
StatementDistributionMessage
|
||||
};
|
||||
use polkadot_node_subsystem_test_helpers::{
|
||||
@@ -897,13 +896,6 @@ mod tests {
|
||||
) if e == event.focus().expect("could not focus message")
|
||||
);
|
||||
|
||||
assert_matches!(
|
||||
virtual_overseer.recv().await,
|
||||
AllMessages::PoVDistribution(
|
||||
PoVDistributionMessage::NetworkBridgeUpdateV1(e)
|
||||
) if e == event.focus().expect("could not focus message")
|
||||
);
|
||||
|
||||
assert_matches!(
|
||||
virtual_overseer.recv().await,
|
||||
AllMessages::ApprovalDistribution(
|
||||
@@ -1166,13 +1158,12 @@ mod tests {
|
||||
).await;
|
||||
}
|
||||
|
||||
let pov_distribution_message = protocol_v1::PoVDistributionMessage::Awaiting(
|
||||
[0; 32].into(),
|
||||
vec![[1; 32].into()],
|
||||
let approval_distribution_message = protocol_v1::ApprovalDistributionMessage::Approvals(
|
||||
Vec::new()
|
||||
);
|
||||
|
||||
let message = protocol_v1::ValidationProtocol::PoVDistribution(
|
||||
pov_distribution_message.clone(),
|
||||
let message = protocol_v1::ValidationProtocol::ApprovalDistribution(
|
||||
approval_distribution_message.clone(),
|
||||
);
|
||||
|
||||
network_handle.peer_message(
|
||||
@@ -1183,18 +1174,18 @@ mod tests {
|
||||
|
||||
network_handle.disconnect_peer(peer.clone(), PeerSet::Validation).await;
|
||||
|
||||
// PoV distribution message comes first, and the message is only sent to that subsystem.
|
||||
// Approval distribution message comes first, and the message is only sent to that subsystem.
|
||||
// then a disconnection event arises that is sent to all validation networking subsystems.
|
||||
|
||||
assert_matches!(
|
||||
virtual_overseer.recv().await,
|
||||
AllMessages::PoVDistribution(
|
||||
PoVDistributionMessage::NetworkBridgeUpdateV1(
|
||||
AllMessages::ApprovalDistribution(
|
||||
ApprovalDistributionMessage::NetworkBridgeUpdateV1(
|
||||
NetworkBridgeEvent::PeerMessage(p, m)
|
||||
)
|
||||
) => {
|
||||
assert_eq!(p, peer);
|
||||
assert_eq!(m, pov_distribution_message);
|
||||
assert_eq!(m, approval_distribution_message);
|
||||
}
|
||||
);
|
||||
|
||||
@@ -1563,13 +1554,12 @@ mod tests {
|
||||
// send a validation protocol message.
|
||||
|
||||
{
|
||||
let pov_distribution_message = protocol_v1::PoVDistributionMessage::Awaiting(
|
||||
[0; 32].into(),
|
||||
vec![[1; 32].into()],
|
||||
let approval_distribution_message = protocol_v1::ApprovalDistributionMessage::Approvals(
|
||||
Vec::new()
|
||||
);
|
||||
|
||||
let message = protocol_v1::ValidationProtocol::PoVDistribution(
|
||||
pov_distribution_message.clone(),
|
||||
let message = protocol_v1::ValidationProtocol::ApprovalDistribution(
|
||||
approval_distribution_message.clone(),
|
||||
);
|
||||
|
||||
virtual_overseer.send(FromOverseer::Communication {
|
||||
@@ -1624,7 +1614,7 @@ mod tests {
|
||||
fn spread_event_to_subsystems_is_up_to_date() {
|
||||
// Number of subsystems expected to be interested in a network event,
|
||||
// and hence the network event broadcasted to.
|
||||
const EXPECTED_COUNT: usize = 4;
|
||||
const EXPECTED_COUNT: usize = 3;
|
||||
|
||||
let mut cnt = 0_usize;
|
||||
for msg in AllMessages::dispatch_iter(NetworkBridgeEvent::PeerDisconnected(PeerId::random())) {
|
||||
@@ -1640,7 +1630,6 @@ mod tests {
|
||||
AllMessages::BitfieldDistribution(_) => { cnt += 1; }
|
||||
AllMessages::BitfieldSigning(_) => unreachable!("Not interested in network events"),
|
||||
AllMessages::Provisioner(_) => unreachable!("Not interested in network events"),
|
||||
AllMessages::PoVDistribution(_) => { cnt += 1; }
|
||||
AllMessages::RuntimeApi(_) => unreachable!("Not interested in network events"),
|
||||
AllMessages::AvailabilityStore(_) => unreachable!("Not interested in network events"),
|
||||
AllMessages::NetworkBridge(_) => unreachable!("Not interested in network events"),
|
||||
|
||||
@@ -141,6 +141,11 @@ fn multiplex_single(
|
||||
decode_with_peer::<v1::CollationFetchingRequest>(peer, payload)?,
|
||||
pending_response,
|
||||
)),
|
||||
Protocol::PoVFetching => From::from(IncomingRequest::new(
|
||||
peer,
|
||||
decode_with_peer::<v1::PoVFetchingRequest>(peer, payload)?,
|
||||
pending_response,
|
||||
)),
|
||||
Protocol::AvailableDataFetching => From::from(IncomingRequest::new(
|
||||
peer,
|
||||
decode_with_peer::<v1::AvailableDataFetchingRequest>(peer, payload)?,
|
||||
|
||||
@@ -237,14 +237,14 @@ impl Network for Arc<NetworkService<Block, Hash>> {
|
||||
Recipient::Peer(peer_id) => Some(peer_id),
|
||||
Recipient::Authority(authority) =>
|
||||
authority_discovery
|
||||
.get_addresses_by_authority_id(authority)
|
||||
.await
|
||||
.and_then(|addrs| {
|
||||
addrs
|
||||
.into_iter()
|
||||
.find_map(|addr| peer_id_from_multiaddr(&addr))
|
||||
}),
|
||||
};
|
||||
.get_addresses_by_authority_id(authority)
|
||||
.await
|
||||
.and_then(|addrs| {
|
||||
addrs
|
||||
.into_iter()
|
||||
.find_map(|addr| peer_id_from_multiaddr(&addr))
|
||||
}),
|
||||
};
|
||||
|
||||
let peer_id = match peer_id {
|
||||
None => {
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "polkadot-pov-distribution"
|
||||
version = "0.1.0"
|
||||
authors = ["Parity Technologies <admin@parity.io>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
futures = "0.3.12"
|
||||
thiserror = "1.0.23"
|
||||
tracing = "0.1.25"
|
||||
|
||||
polkadot-primitives = { path = "../../../primitives" }
|
||||
polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" }
|
||||
polkadot-node-subsystem-util = { path = "../../subsystem-util" }
|
||||
polkadot-node-network-protocol = { path = "../../network/protocol" }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.4.0"
|
||||
env_logger = "0.8.1"
|
||||
log = "0.4.13"
|
||||
|
||||
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }
|
||||
@@ -1,33 +0,0 @@
|
||||
// Copyright 2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! The `Error` and `Result` types used by the subsystem.
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Subsystem(#[from] polkadot_subsystem::SubsystemError),
|
||||
#[error(transparent)]
|
||||
OneshotRecv(#[from] futures::channel::oneshot::Canceled),
|
||||
#[error(transparent)]
|
||||
Runtime(#[from] polkadot_subsystem::errors::RuntimeApiError),
|
||||
#[error(transparent)]
|
||||
Util(#[from] polkadot_node_subsystem_util::Error),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -1,996 +0,0 @@
|
||||
// Copyright 2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! PoV Distribution Subsystem of Polkadot.
|
||||
//!
|
||||
//! This is a gossip implementation of code that is responsible for distributing PoVs
|
||||
//! among validators.
|
||||
|
||||
#![deny(unused_crate_dependencies)]
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use polkadot_primitives::v1::{CandidateDescriptor, CompressedPoV, CoreIndex, CoreState, Hash, Id as ParaId, PoV, ValidatorId};
|
||||
use polkadot_subsystem::{
|
||||
ActiveLeavesUpdate, OverseerSignal, SubsystemContext, SubsystemResult, SubsystemError, Subsystem,
|
||||
FromOverseer, SpawnedSubsystem,
|
||||
jaeger,
|
||||
messages::{
|
||||
PoVDistributionMessage, AllMessages, NetworkBridgeMessage, NetworkBridgeEvent,
|
||||
},
|
||||
};
|
||||
use polkadot_node_subsystem_util::{
|
||||
validator_discovery,
|
||||
request_validators_ctx,
|
||||
request_validator_groups_ctx,
|
||||
request_availability_cores_ctx,
|
||||
metrics::{self, prometheus},
|
||||
};
|
||||
use polkadot_node_network_protocol::{
|
||||
peer_set::PeerSet, v1 as protocol_v1, PeerId, OurView, UnifiedReputationChange as Rep,
|
||||
};
|
||||
|
||||
use futures::prelude::*;
|
||||
use futures::channel::oneshot;
|
||||
|
||||
use std::collections::{hash_map::{Entry, HashMap}, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
mod error;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
const COST_APPARENT_FLOOD: Rep = Rep::CostMajor("Peer appears to be flooding us with PoV requests");
|
||||
const COST_UNEXPECTED_POV: Rep = Rep::CostMajor("Peer sent us an unexpected PoV");
|
||||
const COST_AWAITED_NOT_IN_VIEW: Rep
|
||||
= Rep::CostMinor("Peer claims to be awaiting something outside of its view");
|
||||
|
||||
const BENEFIT_FRESH_POV: Rep = Rep::BenefitMinorFirst("Peer supplied us with an awaited PoV");
|
||||
const BENEFIT_LATE_POV: Rep = Rep::BenefitMinor("Peer supplied us with an awaited PoV, \
|
||||
but was not the first to do so");
|
||||
|
||||
const LOG_TARGET: &str = "parachain::pov-distribution";
|
||||
|
||||
/// The PoV Distribution Subsystem.
|
||||
pub struct PoVDistribution {
|
||||
// Prometheus metrics
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
impl<C> Subsystem<C> for PoVDistribution
|
||||
where C: SubsystemContext<Message = PoVDistributionMessage>
|
||||
{
|
||||
fn start(self, ctx: C) -> SpawnedSubsystem {
|
||||
// Swallow error because failure is fatal to the node and we log with more precision
|
||||
// within `run`.
|
||||
let future = self.run(ctx)
|
||||
.map_err(|e| SubsystemError::with_origin("pov-distribution", e))
|
||||
.boxed();
|
||||
SpawnedSubsystem {
|
||||
name: "pov-distribution-subsystem",
|
||||
future,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct State {
|
||||
/// A state of things going on on a per-relay-parent basis.
|
||||
relay_parent_state: HashMap<Hash, BlockBasedState>,
|
||||
|
||||
/// Info on peers.
|
||||
peer_state: HashMap<PeerId, PeerState>,
|
||||
|
||||
/// Our own view.
|
||||
our_view: OurView,
|
||||
|
||||
/// Connect to relevant groups of validators at different relay parents.
|
||||
connection_requests: validator_discovery::ConnectionRequests,
|
||||
|
||||
/// Metrics.
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
struct BlockBasedState {
|
||||
known: HashMap<Hash, (Arc<PoV>, CompressedPoV)>,
|
||||
|
||||
/// All the PoVs we are or were fetching, coupled with channels expecting the data.
|
||||
///
|
||||
/// This may be an empty list, which indicates that we were once awaiting this PoV but have
|
||||
/// received it already.
|
||||
fetching: HashMap<Hash, Vec<oneshot::Sender<Arc<PoV>>>>,
|
||||
|
||||
n_validators: usize,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct PeerState {
|
||||
/// A set of awaited PoV-hashes for each relay-parent in the peer's view.
|
||||
awaited: HashMap<Hash, HashSet<Hash>>,
|
||||
}
|
||||
|
||||
fn awaiting_message(relay_parent: Hash, awaiting: Vec<Hash>)
|
||||
-> protocol_v1::ValidationProtocol
|
||||
{
|
||||
protocol_v1::ValidationProtocol::PoVDistribution(
|
||||
protocol_v1::PoVDistributionMessage::Awaiting(relay_parent, awaiting)
|
||||
)
|
||||
}
|
||||
|
||||
fn send_pov_message(
|
||||
relay_parent: Hash,
|
||||
pov_hash: Hash,
|
||||
pov: &CompressedPoV,
|
||||
) -> protocol_v1::ValidationProtocol {
|
||||
protocol_v1::ValidationProtocol::PoVDistribution(
|
||||
protocol_v1::PoVDistributionMessage::SendPoV(relay_parent, pov_hash, pov.clone())
|
||||
)
|
||||
}
|
||||
|
||||
/// Handles the signal. If successful, returns `true` if the subsystem should conclude,
|
||||
/// `false` otherwise.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, state), fields(subsystem = LOG_TARGET))]
|
||||
async fn handle_signal(
|
||||
state: &mut State,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
signal: OverseerSignal,
|
||||
) -> SubsystemResult<bool> {
|
||||
match signal {
|
||||
OverseerSignal::Conclude => Ok(true),
|
||||
OverseerSignal::ActiveLeaves(ActiveLeavesUpdate { activated, deactivated }) => {
|
||||
let _timer = state.metrics.time_handle_signal();
|
||||
|
||||
for activated in activated {
|
||||
let _span = activated.span.child("pov-dist")
|
||||
.with_stage(jaeger::Stage::PoVDistribution);
|
||||
|
||||
let relay_parent = activated.hash;
|
||||
match request_validators_ctx(relay_parent, ctx).await {
|
||||
Ok(vals_rx) => {
|
||||
let n_validators = match vals_rx.await? {
|
||||
Ok(v) => v.len(),
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
"Error fetching validators from runtime API for active leaf",
|
||||
);
|
||||
|
||||
// Not adding bookkeeping here might make us behave funny, but we
|
||||
// shouldn't take down the node on spurious runtime API errors.
|
||||
//
|
||||
// and this is "behave funny" as in be bad at our job, but not in any
|
||||
// slashable or security-related way.
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
state.relay_parent_state.insert(relay_parent, BlockBasedState {
|
||||
known: HashMap::new(),
|
||||
fetching: HashMap::new(),
|
||||
n_validators,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
// continue here also as above.
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
err = ?e,
|
||||
"Error fetching validators from runtime API for active leaf",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for relay_parent in deactivated {
|
||||
state.connection_requests.remove_all(&relay_parent);
|
||||
state.relay_parent_state.remove(&relay_parent);
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
OverseerSignal::BlockFinalized(..) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Notify peers that we are awaiting a given PoV hash.
|
||||
///
|
||||
/// This only notifies peers who have the relay parent in their view.
|
||||
#[tracing::instrument(level = "trace", skip(peers, ctx), fields(subsystem = LOG_TARGET))]
|
||||
async fn notify_all_we_are_awaiting(
|
||||
peers: &mut HashMap<PeerId, PeerState>,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
pov_hash: Hash,
|
||||
) {
|
||||
// We use `awaited` as a proxy for which heads are in the peer's view.
|
||||
let peers_to_send: Vec<_> = peers.iter()
|
||||
.filter_map(|(peer, state)| if state.awaited.contains_key(&relay_parent) {
|
||||
Some(peer.clone())
|
||||
} else {
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
|
||||
if peers_to_send.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let payload = awaiting_message(relay_parent, vec![pov_hash]);
|
||||
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
peers = ?peers_to_send,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
"Sending awaiting message",
|
||||
);
|
||||
ctx.send_message(AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage(
|
||||
peers_to_send,
|
||||
payload,
|
||||
))).await;
|
||||
}
|
||||
|
||||
/// Notify one peer about everything we're awaiting at a given relay-parent.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, relay_parent_state), fields(subsystem = LOG_TARGET))]
|
||||
async fn notify_one_we_are_awaiting_many(
|
||||
peer: &PeerId,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
relay_parent_state: &HashMap<Hash, BlockBasedState>,
|
||||
relay_parent: Hash,
|
||||
) {
|
||||
let awaiting_hashes = relay_parent_state.get(&relay_parent).into_iter().flat_map(|s| {
|
||||
// Send the peer everything we are fetching at this relay-parent
|
||||
s.fetching.iter()
|
||||
.filter(|(_, senders)| !senders.is_empty()) // that has not been completed already.
|
||||
.map(|(pov_hash, _)| *pov_hash)
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
if awaiting_hashes.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?awaiting_hashes,
|
||||
"Sending awaiting message",
|
||||
);
|
||||
let payload = awaiting_message(relay_parent, awaiting_hashes);
|
||||
|
||||
ctx.send_message(AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage(
|
||||
vec![peer.clone()],
|
||||
payload,
|
||||
))).await;
|
||||
}
|
||||
|
||||
/// Distribute a PoV to peers who are awaiting it.
|
||||
#[tracing::instrument(level = "trace", skip(peers, ctx, metrics, pov), fields(subsystem = LOG_TARGET))]
|
||||
async fn distribute_to_awaiting(
|
||||
peers: &mut HashMap<PeerId, PeerState>,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
metrics: &Metrics,
|
||||
relay_parent: Hash,
|
||||
pov_hash: Hash,
|
||||
pov: &CompressedPoV,
|
||||
) {
|
||||
// Send to all peers who are awaiting the PoV and have that relay-parent in their view.
|
||||
//
|
||||
// Also removes it from their awaiting set.
|
||||
let peers_to_send: Vec<_> = peers.iter_mut()
|
||||
.filter_map(|(peer, state)| state.awaited.get_mut(&relay_parent).and_then(|awaited| {
|
||||
if awaited.remove(&pov_hash) {
|
||||
Some(peer.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
.collect();
|
||||
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
peers = ?peers_to_send,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
"Sending PoV message",
|
||||
);
|
||||
if peers_to_send.is_empty() { return; }
|
||||
|
||||
let payload = send_pov_message(relay_parent, pov_hash, pov);
|
||||
|
||||
ctx.send_message(AllMessages::NetworkBridge(NetworkBridgeMessage::SendValidationMessage(
|
||||
peers_to_send,
|
||||
payload,
|
||||
))).await;
|
||||
|
||||
metrics.on_pov_distributed();
|
||||
}
|
||||
|
||||
/// Connect to relevant validators in case we are not already.
|
||||
async fn connect_to_relevant_validators(
|
||||
connection_requests: &mut validator_discovery::ConnectionRequests,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
descriptor: &CandidateDescriptor,
|
||||
) {
|
||||
let para_id = descriptor.para_id;
|
||||
if let Ok(Some(relevant_validators)) =
|
||||
determine_relevant_validators(ctx, relay_parent, para_id).await
|
||||
{
|
||||
// We only need one connection request per (relay_parent, para_id)
|
||||
// so here we take this shortcut to avoid calling `connect_to_validators`
|
||||
// more than once.
|
||||
if !connection_requests.contains_request(&relay_parent, para_id) {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
validators=?relevant_validators,
|
||||
?relay_parent,
|
||||
"connecting to validators"
|
||||
);
|
||||
match validator_discovery::connect_to_validators(
|
||||
ctx,
|
||||
relay_parent,
|
||||
relevant_validators,
|
||||
PeerSet::Validation,
|
||||
).await {
|
||||
Ok(new_connection_request) => {
|
||||
connection_requests.put(relay_parent, para_id, new_connection_request);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
"Failed to create a validator connection request {:?}",
|
||||
e,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the Id of the Core that is assigned to the para being collated on if any
|
||||
/// and the total number of cores.
|
||||
async fn determine_core(
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
para_id: ParaId,
|
||||
relay_parent: Hash,
|
||||
) -> error::Result<Option<(CoreIndex, usize)>> {
|
||||
let cores = request_availability_cores_ctx(relay_parent, ctx).await?.await??;
|
||||
|
||||
for (idx, core) in cores.iter().enumerate() {
|
||||
if let CoreState::Scheduled(occupied) = core {
|
||||
if occupied.para_id == para_id {
|
||||
return Ok(Some(((idx as u32).into(), cores.len())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Figure out a group of validators assigned to a given `ParaId`.
|
||||
async fn determine_validators_for_core(
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
core_index: CoreIndex,
|
||||
num_cores: usize,
|
||||
relay_parent: Hash,
|
||||
) -> error::Result<Option<Vec<ValidatorId>>> {
|
||||
let groups = request_validator_groups_ctx(relay_parent, ctx).await?.await??;
|
||||
|
||||
let group_index = groups.1.group_for_core(core_index, num_cores);
|
||||
|
||||
let connect_to_validators = match groups.0.get(group_index.0 as usize) {
|
||||
Some(group) => group.clone(),
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let validators = request_validators_ctx(relay_parent, ctx).await?.await??;
|
||||
|
||||
let validators = connect_to_validators
|
||||
.into_iter()
|
||||
.map(|idx| validators[idx.0 as usize].clone())
|
||||
.collect();
|
||||
|
||||
Ok(Some(validators))
|
||||
}
|
||||
|
||||
async fn determine_relevant_validators(
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
para_id: ParaId,
|
||||
) -> error::Result<Option<Vec<ValidatorId>>> {
|
||||
// Determine which core the para_id is assigned to.
|
||||
let (core, num_cores) = match determine_core(ctx, para_id, relay_parent).await? {
|
||||
Some(core) => core,
|
||||
None => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"Looks like no core is assigned to {:?} at {:?}",
|
||||
para_id,
|
||||
relay_parent,
|
||||
);
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
determine_validators_for_core(ctx, core, num_cores, relay_parent).await
|
||||
}
|
||||
|
||||
/// Handles a `FetchPoV` message.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, state, response_sender), fields(subsystem = LOG_TARGET))]
|
||||
async fn handle_fetch(
|
||||
state: &mut State,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
descriptor: CandidateDescriptor,
|
||||
response_sender: oneshot::Sender<Arc<PoV>>,
|
||||
) {
|
||||
let _timer = state.metrics.time_handle_fetch();
|
||||
|
||||
let relay_parent_state = match state.relay_parent_state.get_mut(&relay_parent) {
|
||||
Some(s) => s,
|
||||
None => return,
|
||||
};
|
||||
|
||||
if let Some((pov, _)) = relay_parent_state.known.get(&descriptor.pov_hash) {
|
||||
let _ = response_sender.send(pov.clone());
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
match relay_parent_state.fetching.entry(descriptor.pov_hash) {
|
||||
Entry::Occupied(mut e) => {
|
||||
// we are already awaiting this PoV if there is an entry.
|
||||
e.get_mut().push(response_sender);
|
||||
return;
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
connect_to_relevant_validators(&mut state.connection_requests, ctx, relay_parent, &descriptor).await;
|
||||
e.insert(vec![response_sender]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if relay_parent_state.fetching.len() > 2 * relay_parent_state.n_validators {
|
||||
tracing::warn!(
|
||||
target = LOG_TARGET,
|
||||
relay_parent_state.fetching.len = relay_parent_state.fetching.len(),
|
||||
"other subsystems have requested PoV distribution to fetch more PoVs than reasonably expected",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Issue an `Awaiting` message to all peers with this in their view.
|
||||
notify_all_we_are_awaiting(
|
||||
&mut state.peer_state,
|
||||
ctx,
|
||||
relay_parent,
|
||||
descriptor.pov_hash
|
||||
).await
|
||||
}
|
||||
|
||||
/// Handles a `DistributePoV` message.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, state, pov), fields(subsystem = LOG_TARGET))]
|
||||
async fn handle_distribute(
|
||||
state: &mut State,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
relay_parent: Hash,
|
||||
descriptor: CandidateDescriptor,
|
||||
pov: Arc<PoV>,
|
||||
) {
|
||||
let _timer = state.metrics.time_handle_distribute();
|
||||
|
||||
let relay_parent_state = match state.relay_parent_state.get_mut(&relay_parent) {
|
||||
Some(s) => s,
|
||||
None => return,
|
||||
};
|
||||
|
||||
connect_to_relevant_validators(&mut state.connection_requests, ctx, relay_parent, &descriptor).await;
|
||||
|
||||
if let Some(our_awaited) = relay_parent_state.fetching.get_mut(&descriptor.pov_hash) {
|
||||
// Drain all the senders, but keep the entry in the map around intentionally.
|
||||
//
|
||||
// It signals that we were at one point awaiting this, so we will be able to tell
|
||||
// why peers are sending it to us.
|
||||
for response_sender in our_awaited.drain(..) {
|
||||
let _ = response_sender.send(pov.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let encoded_pov = match CompressedPoV::compress(&*pov) {
|
||||
Ok(pov) => pov,
|
||||
Err(error) => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?error,
|
||||
"Failed to create `CompressedPov`."
|
||||
);
|
||||
return
|
||||
}
|
||||
};
|
||||
|
||||
distribute_to_awaiting(
|
||||
&mut state.peer_state,
|
||||
ctx,
|
||||
&state.metrics,
|
||||
relay_parent,
|
||||
descriptor.pov_hash,
|
||||
&encoded_pov,
|
||||
).await;
|
||||
|
||||
relay_parent_state.known.insert(descriptor.pov_hash, (pov, encoded_pov));
|
||||
}
|
||||
|
||||
/// Report a reputation change for a peer.
|
||||
#[tracing::instrument(level = "trace", skip(ctx), fields(subsystem = LOG_TARGET))]
|
||||
async fn report_peer(
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
peer: PeerId,
|
||||
rep: Rep,
|
||||
) {
|
||||
ctx.send_message(AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(peer, rep))).await
|
||||
}
|
||||
|
||||
/// Handle a notification from a peer that they are awaiting some PoVs.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, state), fields(subsystem = LOG_TARGET))]
|
||||
async fn handle_awaiting(
|
||||
state: &mut State,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
peer: PeerId,
|
||||
relay_parent: Hash,
|
||||
pov_hashes: Vec<Hash>,
|
||||
) {
|
||||
if !state.our_view.contains(&relay_parent) {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hashes,
|
||||
"Received awaiting message for unknown block",
|
||||
);
|
||||
report_peer(ctx, peer, COST_AWAITED_NOT_IN_VIEW).await;
|
||||
return;
|
||||
}
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hashes,
|
||||
"Received awaiting message",
|
||||
);
|
||||
|
||||
let relay_parent_state = match state.relay_parent_state.get_mut(&relay_parent) {
|
||||
None => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
"PoV Distribution relay parent state out-of-sync with our view"
|
||||
);
|
||||
return;
|
||||
}
|
||||
Some(s) => s,
|
||||
};
|
||||
|
||||
let peer_awaiting = match
|
||||
state.peer_state.get_mut(&peer).and_then(|s| s.awaited.get_mut(&relay_parent))
|
||||
{
|
||||
None => {
|
||||
report_peer(ctx, peer, COST_AWAITED_NOT_IN_VIEW).await;
|
||||
return;
|
||||
}
|
||||
Some(a) => a,
|
||||
};
|
||||
|
||||
let will_be_awaited = peer_awaiting.len() + pov_hashes.len();
|
||||
if will_be_awaited <= 2 * relay_parent_state.n_validators {
|
||||
for pov_hash in pov_hashes {
|
||||
// For all requested PoV hashes, if we have it, we complete the request immediately.
|
||||
// Otherwise, we note that the peer is awaiting the PoV.
|
||||
if let Some((_, ref pov)) = relay_parent_state.known.get(&pov_hash) {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
"Sending awaited PoV message",
|
||||
);
|
||||
let payload = send_pov_message(relay_parent, pov_hash, pov);
|
||||
|
||||
ctx.send_message(AllMessages::NetworkBridge(
|
||||
NetworkBridgeMessage::SendValidationMessage(vec![peer.clone()], payload)
|
||||
)).await;
|
||||
} else {
|
||||
peer_awaiting.insert(pov_hash);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
"Too many PoV requests",
|
||||
);
|
||||
report_peer(ctx, peer, COST_APPARENT_FLOOD).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle an incoming PoV from our peer. Reports them if unexpected, rewards them if not.
|
||||
///
|
||||
/// Completes any requests awaiting that PoV.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, state, encoded_pov), fields(subsystem = LOG_TARGET))]
|
||||
async fn handle_incoming_pov(
|
||||
state: &mut State,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
peer: PeerId,
|
||||
relay_parent: Hash,
|
||||
pov_hash: Hash,
|
||||
encoded_pov: CompressedPoV,
|
||||
) {
|
||||
let relay_parent_state = match state.relay_parent_state.get_mut(&relay_parent) {
|
||||
None => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
"Unexpected PoV",
|
||||
);
|
||||
report_peer(ctx, peer, COST_UNEXPECTED_POV).await;
|
||||
return;
|
||||
},
|
||||
Some(r) => r,
|
||||
};
|
||||
|
||||
let pov = match encoded_pov.decompress() {
|
||||
Ok(pov) => pov,
|
||||
Err(error) => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
error = ?error,
|
||||
"Could not extract PoV",
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let pov = {
|
||||
// Do validity checks and complete all senders awaiting this PoV.
|
||||
let fetching = match relay_parent_state.fetching.get_mut(&pov_hash) {
|
||||
None => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
"Unexpected PoV",
|
||||
);
|
||||
report_peer(ctx, peer, COST_UNEXPECTED_POV).await;
|
||||
return;
|
||||
}
|
||||
Some(f) => f,
|
||||
};
|
||||
|
||||
let hash = pov.hash();
|
||||
if hash != pov_hash {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
?hash,
|
||||
"Mismatched PoV",
|
||||
);
|
||||
report_peer(ctx, peer, COST_UNEXPECTED_POV).await;
|
||||
return;
|
||||
}
|
||||
|
||||
let pov = Arc::new(pov);
|
||||
|
||||
if fetching.is_empty() {
|
||||
// fetching is empty whenever we were awaiting something and
|
||||
// it was completed afterwards.
|
||||
report_peer(ctx, peer.clone(), BENEFIT_LATE_POV).await;
|
||||
} else {
|
||||
// fetching is non-empty when the peer just provided us with data we needed.
|
||||
report_peer(ctx, peer.clone(), BENEFIT_FRESH_POV).await;
|
||||
}
|
||||
|
||||
for response_sender in fetching.drain(..) {
|
||||
let _ = response_sender.send(pov.clone());
|
||||
}
|
||||
|
||||
pov
|
||||
};
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?relay_parent,
|
||||
?pov_hash,
|
||||
"Received PoV",
|
||||
);
|
||||
// make sure we don't consider this peer as awaiting that PoV anymore.
|
||||
if let Some(peer_state) = state.peer_state.get_mut(&peer) {
|
||||
peer_state.awaited.remove(&pov_hash);
|
||||
}
|
||||
|
||||
// distribute the PoV to all other peers who are awaiting it.
|
||||
distribute_to_awaiting(
|
||||
&mut state.peer_state,
|
||||
ctx,
|
||||
&state.metrics,
|
||||
relay_parent,
|
||||
pov_hash,
|
||||
&encoded_pov,
|
||||
).await;
|
||||
|
||||
relay_parent_state.known.insert(pov_hash, (pov, encoded_pov));
|
||||
}
|
||||
|
||||
/// Handles a newly or already connected validator in the context of some relay leaf.
|
||||
fn handle_validator_connected(state: &mut State, peer_id: PeerId) {
|
||||
state.peer_state.entry(peer_id).or_default();
|
||||
}
|
||||
|
||||
/// Handles a network bridge update.
|
||||
#[tracing::instrument(level = "trace", skip(ctx, state), fields(subsystem = LOG_TARGET))]
|
||||
async fn handle_network_update(
|
||||
state: &mut State,
|
||||
ctx: &mut impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
update: NetworkBridgeEvent<protocol_v1::PoVDistributionMessage>,
|
||||
) {
|
||||
let _timer = state.metrics.time_handle_network_update();
|
||||
|
||||
match update {
|
||||
NetworkBridgeEvent::PeerConnected(peer, role) => {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
?role,
|
||||
"Peer connected",
|
||||
);
|
||||
handle_validator_connected(state, peer);
|
||||
}
|
||||
NetworkBridgeEvent::PeerDisconnected(peer) => {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer,
|
||||
"Peer disconnected",
|
||||
);
|
||||
state.peer_state.remove(&peer);
|
||||
}
|
||||
NetworkBridgeEvent::PeerViewChange(peer_id, view) => {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
?peer_id,
|
||||
?view,
|
||||
"Peer view change",
|
||||
);
|
||||
if let Some(peer_state) = state.peer_state.get_mut(&peer_id) {
|
||||
// prune anything not in the new view.
|
||||
peer_state.awaited.retain(|relay_parent, _| view.contains(&relay_parent));
|
||||
|
||||
// introduce things from the new view.
|
||||
for relay_parent in view.iter() {
|
||||
if let Entry::Vacant(entry) = peer_state.awaited.entry(*relay_parent) {
|
||||
entry.insert(HashSet::new());
|
||||
|
||||
// Notify the peer about everything we're awaiting at the new relay-parent.
|
||||
notify_one_we_are_awaiting_many(
|
||||
&peer_id,
|
||||
ctx,
|
||||
&state.relay_parent_state,
|
||||
*relay_parent,
|
||||
).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
NetworkBridgeEvent::PeerMessage(peer, message) => {
|
||||
match message {
|
||||
protocol_v1::PoVDistributionMessage::Awaiting(relay_parent, pov_hashes)
|
||||
=> handle_awaiting(
|
||||
state,
|
||||
ctx,
|
||||
peer,
|
||||
relay_parent,
|
||||
pov_hashes,
|
||||
).await,
|
||||
protocol_v1::PoVDistributionMessage::SendPoV(relay_parent, pov_hash, pov)
|
||||
=> handle_incoming_pov(
|
||||
state,
|
||||
ctx,
|
||||
peer,
|
||||
relay_parent,
|
||||
pov_hash,
|
||||
pov,
|
||||
).await,
|
||||
}
|
||||
}
|
||||
NetworkBridgeEvent::OurViewChange(view) => {
|
||||
tracing::trace!(
|
||||
target: LOG_TARGET,
|
||||
"Own view change",
|
||||
);
|
||||
state.our_view = view;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PoVDistribution {
|
||||
/// Create a new instance of `PovDistribution`.
|
||||
pub fn new(metrics: Metrics) -> Self {
|
||||
Self { metrics }
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self, ctx), fields(subsystem = LOG_TARGET))]
|
||||
async fn run(
|
||||
self,
|
||||
ctx: impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
) -> SubsystemResult<()> {
|
||||
self.run_with_state(ctx, State::default()).await
|
||||
}
|
||||
|
||||
async fn run_with_state(
|
||||
self,
|
||||
mut ctx: impl SubsystemContext<Message = PoVDistributionMessage>,
|
||||
mut state: State,
|
||||
) -> SubsystemResult<()> {
|
||||
state.metrics = self.metrics;
|
||||
|
||||
loop {
|
||||
// `select_biased` is used since receiving connection notifications and
|
||||
// peer view update messages may be racy and we want connection notifications
|
||||
// first.
|
||||
futures::select_biased! {
|
||||
v = state.connection_requests.next().fuse() => handle_validator_connected(&mut state, v.peer_id),
|
||||
v = ctx.recv().fuse() => {
|
||||
match v? {
|
||||
FromOverseer::Signal(signal) => if handle_signal(
|
||||
&mut state,
|
||||
&mut ctx,
|
||||
signal,
|
||||
).await? {
|
||||
return Ok(());
|
||||
}
|
||||
FromOverseer::Communication { msg } => match msg {
|
||||
PoVDistributionMessage::FetchPoV(relay_parent, descriptor, response_sender) =>
|
||||
handle_fetch(
|
||||
&mut state,
|
||||
&mut ctx,
|
||||
relay_parent,
|
||||
descriptor,
|
||||
response_sender,
|
||||
).await,
|
||||
PoVDistributionMessage::DistributePoV(relay_parent, descriptor, pov) =>
|
||||
handle_distribute(
|
||||
&mut state,
|
||||
&mut ctx,
|
||||
relay_parent,
|
||||
descriptor,
|
||||
pov,
|
||||
).await,
|
||||
PoVDistributionMessage::NetworkBridgeUpdateV1(event) =>
|
||||
handle_network_update(
|
||||
&mut state,
|
||||
&mut ctx,
|
||||
event,
|
||||
).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
povs_distributed: prometheus::Counter<prometheus::U64>,
|
||||
handle_signal: prometheus::Histogram,
|
||||
handle_fetch: prometheus::Histogram,
|
||||
handle_distribute: prometheus::Histogram,
|
||||
handle_network_update: prometheus::Histogram,
|
||||
}
|
||||
|
||||
/// Availability Distribution metrics.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
impl Metrics {
|
||||
fn on_pov_distributed(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.povs_distributed.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_signal` which observes on drop.
|
||||
fn time_handle_signal(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.handle_signal.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_fetch` which observes on drop.
|
||||
fn time_handle_fetch(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.handle_fetch.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_distribute` which observes on drop.
|
||||
fn time_handle_distribute(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.handle_distribute.start_timer())
|
||||
}
|
||||
|
||||
/// Provide a timer for `handle_network_update` which observes on drop.
|
||||
fn time_handle_network_update(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.handle_network_update.start_timer())
|
||||
}
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &prometheus::Registry) -> std::result::Result<Self, prometheus::PrometheusError> {
|
||||
let metrics = MetricsInner {
|
||||
povs_distributed: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"parachain_povs_distributed_total",
|
||||
"Number of PoVs distributed to other peers."
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
handle_signal: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"parachain_pov_distribution_handle_signal",
|
||||
"Time spent within `pov_distribution::handle_signal`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
handle_fetch: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"parachain_pov_distribution_handle_fetch",
|
||||
"Time spent within `pov_distribution::handle_fetch`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
handle_distribute: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"parachain_pov_distribution_handle_distribute",
|
||||
"Time spent within `pov_distribution::handle_distribute`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
handle_network_update: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"parachain_pov_distribution_handle_network_update",
|
||||
"Time spent within `pov_distribution::handle_network_update`",
|
||||
)
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(metrics)))
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -13,3 +13,4 @@ parity-scale-codec = { version = "2.0.0", default-features = false, features = [
|
||||
sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
strum = { version = "0.20", features = ["derive"] }
|
||||
futures = "0.3.12"
|
||||
thiserror = "1.0.23"
|
||||
|
||||
@@ -289,7 +289,7 @@ pub mod v1 {
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use polkadot_primitives::v1::{
|
||||
CandidateIndex, CollatorId, CompressedPoV, Hash, Id as ParaId, SignedAvailabilityBitfield,
|
||||
CandidateIndex, CollatorId, Hash, Id as ParaId, SignedAvailabilityBitfield,
|
||||
CollatorSignature,
|
||||
};
|
||||
use polkadot_node_primitives::{
|
||||
@@ -305,19 +305,6 @@ pub mod v1 {
|
||||
Bitfield(Hash, SignedAvailabilityBitfield),
|
||||
}
|
||||
|
||||
/// Network messages used by the PoV distribution subsystem.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum PoVDistributionMessage {
|
||||
/// Notification that we are awaiting the given PoVs (by hash) against a
|
||||
/// specific relay-parent hash.
|
||||
#[codec(index = 0)]
|
||||
Awaiting(Hash, Vec<Hash>),
|
||||
/// Notification of an awaited PoV, in a given relay-parent context.
|
||||
/// (relay_parent, pov_hash, compressed_pov)
|
||||
#[codec(index = 1)]
|
||||
SendPoV(Hash, Hash, CompressedPoV),
|
||||
}
|
||||
|
||||
/// Network messages used by the statement distribution subsystem.
|
||||
#[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)]
|
||||
pub enum StatementDistributionMessage {
|
||||
@@ -361,9 +348,6 @@ pub mod v1 {
|
||||
/// Bitfield distribution messages
|
||||
#[codec(index = 1)]
|
||||
BitfieldDistribution(BitfieldDistributionMessage),
|
||||
/// PoV Distribution messages
|
||||
#[codec(index = 2)]
|
||||
PoVDistribution(PoVDistributionMessage),
|
||||
/// Statement distribution messages
|
||||
#[codec(index = 3)]
|
||||
StatementDistribution(StatementDistributionMessage),
|
||||
@@ -373,7 +357,6 @@ pub mod v1 {
|
||||
}
|
||||
|
||||
impl_try_from!(ValidationProtocol, BitfieldDistribution, BitfieldDistributionMessage);
|
||||
impl_try_from!(ValidationProtocol, PoVDistribution, PoVDistributionMessage);
|
||||
impl_try_from!(ValidationProtocol, StatementDistribution, StatementDistributionMessage);
|
||||
impl_try_from!(ValidationProtocol, ApprovalDistribution, ApprovalDistributionMessage);
|
||||
|
||||
|
||||
@@ -60,6 +60,8 @@ pub enum Protocol {
|
||||
ChunkFetching,
|
||||
/// Protocol for fetching collations from collators.
|
||||
CollationFetching,
|
||||
/// Protocol for fetching seconded PoVs from validators of the same group.
|
||||
PoVFetching,
|
||||
/// Protocol for fetching available data.
|
||||
AvailableDataFetching,
|
||||
}
|
||||
@@ -107,11 +109,18 @@ impl Protocol {
|
||||
request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
|
||||
inbound_queue: Some(tx),
|
||||
},
|
||||
Protocol::PoVFetching => RequestResponseConfig {
|
||||
name: p_name,
|
||||
max_request_size: 1_000,
|
||||
max_response_size: MAX_COMPRESSED_POV_SIZE as u64,
|
||||
request_timeout: DEFAULT_REQUEST_TIMEOUT_CONNECTED,
|
||||
inbound_queue: Some(tx),
|
||||
},
|
||||
Protocol::AvailableDataFetching => RequestResponseConfig {
|
||||
name: p_name,
|
||||
max_request_size: 1_000,
|
||||
// Available data size is dominated by the PoV size.
|
||||
max_response_size: 30_000_000,
|
||||
max_response_size: MAX_COMPRESSED_POV_SIZE as u64,
|
||||
request_timeout: DEFAULT_REQUEST_TIMEOUT,
|
||||
inbound_queue: Some(tx),
|
||||
},
|
||||
@@ -130,6 +139,8 @@ impl Protocol {
|
||||
Protocol::ChunkFetching => 100,
|
||||
// 10 seems reasonable, considering group sizes of max 10 validators.
|
||||
Protocol::CollationFetching => 10,
|
||||
// 10 seems reasonable, considering group sizes of max 10 validators.
|
||||
Protocol::PoVFetching => 10,
|
||||
// Validators are constantly self-selecting to request available data which may lead
|
||||
// to constant load and occasional burstiness.
|
||||
Protocol::AvailableDataFetching => 100,
|
||||
@@ -146,6 +157,7 @@ impl Protocol {
|
||||
match self {
|
||||
Protocol::ChunkFetching => "/polkadot/req_chunk/1",
|
||||
Protocol::CollationFetching => "/polkadot/req_collation/1",
|
||||
Protocol::PoVFetching => "/polkadot/req_pov/1",
|
||||
Protocol::AvailableDataFetching => "/polkadot/req_available_data/1",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
use futures::channel::oneshot;
|
||||
use futures::prelude::Future;
|
||||
|
||||
use thiserror::Error;
|
||||
use parity_scale_codec::{Decode, Encode, Error as DecodingError};
|
||||
use sc_network as network;
|
||||
use sc_network::config as netconfig;
|
||||
@@ -42,6 +43,8 @@ pub enum Requests {
|
||||
ChunkFetching(OutgoingRequest<v1::ChunkFetchingRequest>),
|
||||
/// Fetch a collation from a collator which previously announced it.
|
||||
CollationFetching(OutgoingRequest<v1::CollationFetchingRequest>),
|
||||
/// Fetch a PoV from a validator which previously sent out a seconded statement.
|
||||
PoVFetching(OutgoingRequest<v1::PoVFetchingRequest>),
|
||||
/// Request full available data from a node.
|
||||
AvailableDataFetching(OutgoingRequest<v1::AvailableDataFetchingRequest>),
|
||||
}
|
||||
@@ -52,6 +55,7 @@ impl Requests {
|
||||
match self {
|
||||
Self::ChunkFetching(_) => Protocol::ChunkFetching,
|
||||
Self::CollationFetching(_) => Protocol::CollationFetching,
|
||||
Self::PoVFetching(_) => Protocol::PoVFetching,
|
||||
Self::AvailableDataFetching(_) => Protocol::AvailableDataFetching,
|
||||
}
|
||||
}
|
||||
@@ -67,6 +71,7 @@ impl Requests {
|
||||
match self {
|
||||
Self::ChunkFetching(r) => r.encode_request(),
|
||||
Self::CollationFetching(r) => r.encode_request(),
|
||||
Self::PoVFetching(r) => r.encode_request(),
|
||||
Self::AvailableDataFetching(r) => r.encode_request(),
|
||||
}
|
||||
}
|
||||
@@ -96,16 +101,19 @@ pub struct OutgoingRequest<Req> {
|
||||
}
|
||||
|
||||
/// Any error that can occur when sending a request.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RequestError {
|
||||
/// Response could not be decoded.
|
||||
InvalidResponse(DecodingError),
|
||||
#[error("Response could not be decoded")]
|
||||
InvalidResponse(#[source] DecodingError),
|
||||
|
||||
/// Some error in substrate/libp2p happened.
|
||||
NetworkError(network::RequestFailure),
|
||||
#[error("Some network error occurred")]
|
||||
NetworkError(#[source] network::RequestFailure),
|
||||
|
||||
/// Response got canceled by networking.
|
||||
Canceled(oneshot::Canceled),
|
||||
#[error("Response channel got canceled")]
|
||||
Canceled(#[source] oneshot::Canceled),
|
||||
}
|
||||
|
||||
/// Responses received for an `OutgoingRequest`.
|
||||
|
||||
@@ -114,6 +114,29 @@ impl IsRequest for CollationFetchingRequest {
|
||||
const PROTOCOL: Protocol = Protocol::CollationFetching;
|
||||
}
|
||||
|
||||
/// Request the advertised collation at that relay-parent.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub struct PoVFetchingRequest {
|
||||
/// Candidate we want a PoV for.
|
||||
pub candidate_hash: CandidateHash,
|
||||
}
|
||||
|
||||
/// Responses to `PoVFetchingRequest`.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub enum PoVFetchingResponse {
|
||||
/// Deliver requested PoV.
|
||||
#[codec(index = 0)]
|
||||
PoV(CompressedPoV),
|
||||
/// PoV was not found in store.
|
||||
#[codec(index = 1)]
|
||||
NoSuchPoV,
|
||||
}
|
||||
|
||||
impl IsRequest for PoVFetchingRequest {
|
||||
type Response = PoVFetchingResponse;
|
||||
const PROTOCOL: Protocol = Protocol::PoVFetching;
|
||||
}
|
||||
|
||||
/// Request the entire available data for a candidate.
|
||||
#[derive(Debug, Clone, Encode, Decode)]
|
||||
pub struct AvailableDataFetchingRequest {
|
||||
|
||||
Reference in New Issue
Block a user