refactor+feat: allow subsystems to send only declared messages, generate graphviz (#5314)

Closes #3774
Closes #3826
This commit is contained in:
Bernhard Schuster
2022-05-12 17:39:05 +02:00
committed by GitHub
parent 26340b9054
commit 511891dcce
102 changed files with 3853 additions and 2514 deletions
@@ -21,7 +21,7 @@ use sp_keystore::SyncCryptoStorePtr;
use polkadot_node_network_protocol::request_response::{v1, IncomingRequestReceiver};
use polkadot_node_subsystem::{
messages::AvailabilityDistributionMessage, overseer, FromOverseer, OverseerSignal,
SpawnedSubsystem, SubsystemContext, SubsystemError,
SpawnedSubsystem, SubsystemError,
};
/// Error and [`Result`] type for this subsystem.
@@ -68,11 +68,8 @@ pub struct IncomingRequestReceivers {
pub chunk_req_receiver: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
}
impl<Context> overseer::Subsystem<Context, SubsystemError> for AvailabilityDistributionSubsystem
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
Context: overseer::SubsystemContext<Message = AvailabilityDistributionMessage>,
{
#[overseer::subsystem(AvailabilityDistribution, error=SubsystemError, prefix=self::overseer)]
impl<Context> AvailabilityDistributionSubsystem {
fn start(self, ctx: Context) -> SpawnedSubsystem {
let future = self
.run(ctx)
@@ -83,6 +80,7 @@ where
}
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl AvailabilityDistributionSubsystem {
/// Create a new instance of the availability distribution.
pub fn new(
@@ -95,11 +93,7 @@ impl AvailabilityDistributionSubsystem {
}
/// Start processing work as passed on from the Overseer.
async fn run<Context>(self, mut ctx: Context) -> std::result::Result<(), FatalError>
where
Context: SubsystemContext<Message = AvailabilityDistributionMessage>,
Context: overseer::SubsystemContext<Message = AvailabilityDistributionMessage>,
{
async fn run<Context>(self, mut ctx: Context) -> std::result::Result<(), FatalError> {
let Self { mut runtime, recvs, metrics } = self;
let IncomingRequestReceivers { pov_req_receiver, chunk_req_receiver } = recvs;
@@ -27,7 +27,7 @@ use polkadot_node_primitives::PoV;
use polkadot_node_subsystem::{
jaeger,
messages::{IfDisconnected, NetworkBridgeMessage},
SubsystemContext,
overseer,
};
use polkadot_node_subsystem_util::runtime::RuntimeInfo;
use polkadot_primitives::v2::{AuthorityDiscoveryId, CandidateHash, Hash, ValidatorIndex};
@@ -39,6 +39,7 @@ use crate::{
};
/// Start background worker for taking care of fetching the requested `PoV` from the network.
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
pub async fn fetch_pov<Context>(
ctx: &mut Context,
runtime: &mut RuntimeInfo,
@@ -48,10 +49,7 @@ pub async fn fetch_pov<Context>(
pov_hash: Hash,
tx: oneshot::Sender<PoV>,
metrics: Metrics,
) -> Result<()>
where
Context: SubsystemContext,
{
) -> Result<()> {
let info = &runtime.get_session_info(ctx.sender(), parent).await?.session_info;
let authority_id = info
.discovery_keys
@@ -30,8 +30,8 @@ use polkadot_node_network_protocol::request_response::{
use polkadot_node_primitives::ErasureChunk;
use polkadot_node_subsystem::{
jaeger,
messages::{AllMessages, AvailabilityStoreMessage, IfDisconnected, NetworkBridgeMessage},
SubsystemContext,
messages::{AvailabilityStoreMessage, IfDisconnected, NetworkBridgeMessage},
overseer,
};
use polkadot_primitives::v2::{
AuthorityDiscoveryId, BlakeTwo256, CandidateHash, GroupIndex, Hash, HashT, OccupiedCore,
@@ -84,7 +84,7 @@ enum FetchedState {
/// Messages sent from `FetchTask`s to be handled/forwarded.
pub enum FromFetchTask {
/// Message to other subsystem.
Message(AllMessages),
Message(overseer::AvailabilityDistributionOutgoingMessages),
/// Concluded with result.
///
@@ -171,14 +171,12 @@ impl FetchTaskConfig {
}
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl FetchTask {
/// Start fetching a chunk.
///
/// A task handling the fetching of the configured chunk will be spawned.
pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self>
where
Context: SubsystemContext,
{
pub async fn start<Context>(config: FetchTaskConfig, ctx: &mut Context) -> Result<Self> {
let FetchTaskConfig { prepared_running, live_in } = config;
if let Some(running) = prepared_running {
@@ -333,9 +331,10 @@ impl RunningTask {
let requests = Requests::ChunkFetchingV1(full_request);
self.sender
.send(FromFetchTask::Message(AllMessages::NetworkBridge(
NetworkBridgeMessage::SendRequests(vec![requests], IfDisconnected::ImmediateError),
)))
.send(FromFetchTask::Message(
NetworkBridgeMessage::SendRequests(vec![requests], IfDisconnected::ImmediateError)
.into(),
))
.await
.map_err(|_| TaskError::ShuttingDown)?;
@@ -413,13 +412,14 @@ impl RunningTask {
let (tx, rx) = oneshot::channel();
let r = self
.sender
.send(FromFetchTask::Message(AllMessages::AvailabilityStore(
.send(FromFetchTask::Message(
AvailabilityStoreMessage::StoreChunk {
candidate_hash: self.request.candidate_hash,
chunk,
tx,
},
)))
}
.into(),
))
.await;
if let Err(err) = r {
gum::error!(target: LOG_TARGET, err= ?err, "Storing erasure chunk failed, system shutting down?");
@@ -227,7 +227,11 @@ impl TestRun {
/// Returns true, if after processing of the given message it would be OK for the stream to
/// end.
async fn handle_message(&self, msg: AllMessages) -> bool {
async fn handle_message(
&self,
msg: overseer::AvailabilityDistributionOutgoingMessages,
) -> bool {
let msg = AllMessages::from(msg);
match msg {
AllMessages::NetworkBridge(NetworkBridgeMessage::SendRequests(
reqs,
@@ -33,8 +33,8 @@ use futures::{
};
use polkadot_node_subsystem::{
messages::{AllMessages, ChainApiMessage},
ActivatedLeaf, ActiveLeavesUpdate, LeafStatus, SubsystemContext,
messages::{ChainApiMessage, RuntimeApiMessage},
overseer, ActivatedLeaf, ActiveLeavesUpdate, LeafStatus,
};
use polkadot_node_subsystem_util::runtime::{get_occupied_cores, RuntimeInfo};
use polkadot_primitives::v2::{CandidateHash, Hash, OccupiedCore, SessionIndex};
@@ -78,6 +78,7 @@ pub struct Requester {
metrics: Metrics,
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl Requester {
/// How many ancestors of the leaf should we consider along with it.
pub(crate) const LEAF_ANCESTRY_LEN_WITHIN_SESSION: usize = 3;
@@ -99,10 +100,7 @@ impl Requester {
ctx: &mut Context,
runtime: &mut RuntimeInfo,
update: ActiveLeavesUpdate,
) -> Result<()>
where
Context: SubsystemContext,
{
) -> Result<()> {
gum::trace!(target: LOG_TARGET, ?update, "Update fetching heads");
let ActiveLeavesUpdate { activated, deactivated } = update;
// Stale leaves happen after a reversion - we don't want to re-run availability there.
@@ -125,13 +123,11 @@ impl Requester {
ctx: &mut Context,
runtime: &mut RuntimeInfo,
new_head: ActivatedLeaf,
) -> Result<()>
where
Context: SubsystemContext,
{
) -> Result<()> {
let sender = &mut ctx.sender().clone();
let ActivatedLeaf { hash: leaf, .. } = new_head;
let (leaf_session_index, ancestors_in_session) = get_block_ancestors_in_same_session(
ctx,
sender,
runtime,
leaf,
Self::LEAF_ANCESTRY_LEN_WITHIN_SESSION,
@@ -139,7 +135,7 @@ impl Requester {
.await?;
// Also spawn or bump tasks for candidates in ancestry in the same session.
for hash in std::iter::once(leaf).chain(ancestors_in_session) {
let cores = get_occupied_cores(ctx, hash).await?;
let cores = get_occupied_cores(sender, hash).await?;
gum::trace!(
target: LOG_TARGET,
occupied_cores = ?cores,
@@ -177,15 +173,12 @@ impl Requester {
/// passed in leaf might be some later block where the candidate is still pending availability.
async fn add_cores<Context>(
&mut self,
ctx: &mut Context,
context: &mut Context,
runtime: &mut RuntimeInfo,
leaf: Hash,
leaf_session_index: SessionIndex,
cores: impl IntoIterator<Item = OccupiedCore>,
) -> Result<()>
where
Context: SubsystemContext,
{
) -> Result<()> {
for core in cores {
match self.fetches.entry(core.candidate_hash) {
Entry::Occupied(mut e) =>
@@ -200,7 +193,7 @@ impl Requester {
let task_cfg = self
.session_cache
.with_session_info(
ctx,
context,
runtime,
// We use leaf here, the relay_parent must be in the same session as the
// leaf. This is guaranteed by runtime which ensures that cores are cleared
@@ -221,7 +214,7 @@ impl Requester {
});
if let Ok(Some(task_cfg)) = task_cfg {
e.insert(FetchTask::start(task_cfg, ctx).await?);
e.insert(FetchTask::start(task_cfg, context).await?);
}
// Not a validator, nothing to do.
},
@@ -232,9 +225,9 @@ impl Requester {
}
impl Stream for Requester {
type Item = AllMessages;
type Item = overseer::AvailabilityDistributionOutgoingMessages;
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context) -> Poll<Option<AllMessages>> {
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context) -> Poll<Option<Self::Item>> {
loop {
match Pin::new(&mut self.rx).poll_next(ctx) {
Poll::Ready(Some(FromFetchTask::Message(m))) => return Poll::Ready(Some(m)),
@@ -257,26 +250,27 @@ impl Stream for Requester {
/// Requests up to `limit` ancestor hashes of relay parent in the same session.
///
/// Also returns session index of the `head`.
async fn get_block_ancestors_in_same_session<Context>(
ctx: &mut Context,
async fn get_block_ancestors_in_same_session<Sender>(
sender: &mut Sender,
runtime: &mut RuntimeInfo,
head: Hash,
limit: usize,
) -> Result<(SessionIndex, Vec<Hash>)>
where
Context: SubsystemContext,
Sender:
overseer::SubsystemSender<RuntimeApiMessage> + overseer::SubsystemSender<ChainApiMessage>,
{
// The order is parent, grandparent, ...
//
// `limit + 1` since a session index for the last element in ancestry
// is obtained through its parent. It always gets truncated because
// `session_ancestry_len` can only be incremented `ancestors.len() - 1` times.
let mut ancestors = get_block_ancestors(ctx, head, limit + 1).await?;
let mut ancestors = get_block_ancestors(sender, head, limit + 1).await?;
let mut ancestors_iter = ancestors.iter();
// `head` is the child of the first block in `ancestors`, request its session index.
let head_session_index = match ancestors_iter.next() {
Some(parent) => runtime.get_session_index_for_child(ctx.sender(), *parent).await?,
Some(parent) => runtime.get_session_index_for_child(sender, *parent).await?,
None => {
// No first element, i.e. empty.
return Ok((0, ancestors))
@@ -287,7 +281,7 @@ where
// The first parent is skipped.
for parent in ancestors_iter {
// Parent is the i-th ancestor, request session index for its child -- (i-1)th element.
let session_index = runtime.get_session_index_for_child(ctx.sender(), *parent).await?;
let session_index = runtime.get_session_index_for_child(sender, *parent).await?;
if session_index == head_session_index {
session_ancestry_len += 1;
} else {
@@ -302,21 +296,22 @@ where
}
/// Request up to `limit` ancestor hashes of relay parent from the Chain API.
async fn get_block_ancestors<Context>(
ctx: &mut Context,
async fn get_block_ancestors<Sender>(
sender: &mut Sender,
relay_parent: Hash,
limit: usize,
) -> Result<Vec<Hash>>
where
Context: SubsystemContext,
Sender: overseer::SubsystemSender<ChainApiMessage>,
{
let (tx, rx) = oneshot::channel();
ctx.send_message(ChainApiMessage::Ancestors {
hash: relay_parent,
k: limit,
response_channel: tx,
})
.await;
sender
.send_message(ChainApiMessage::Ancestors {
hash: relay_parent,
k: limit,
response_channel: tx,
})
.await;
let ancestors = rx
.await
@@ -19,7 +19,7 @@ use std::collections::HashSet;
use lru::LruCache;
use rand::{seq::SliceRandom, thread_rng};
use polkadot_node_subsystem::SubsystemContext;
use polkadot_node_subsystem::overseer;
use polkadot_node_subsystem_util::runtime::RuntimeInfo;
use polkadot_primitives::v2::{
AuthorityDiscoveryId, GroupIndex, Hash, SessionIndex, ValidatorIndex,
@@ -79,6 +79,7 @@ pub struct BadValidators {
pub bad_validators: Vec<AuthorityDiscoveryId>,
}
#[overseer::contextbounds(AvailabilityDistribution, prefix = self::overseer)]
impl SessionCache {
/// Create a new `SessionCache`.
pub fn new() -> Self {
@@ -103,7 +104,6 @@ impl SessionCache {
with_info: F,
) -> Result<Option<R>>
where
Context: SubsystemContext,
F: FnOnce(&SessionInfo) -> R,
{
if let Some(o_info) = self.session_info_cache.get(&session_index) {
@@ -178,10 +178,7 @@ impl SessionCache {
runtime: &mut RuntimeInfo,
relay_parent: Hash,
session_index: SessionIndex,
) -> Result<Option<SessionInfo>>
where
Context: SubsystemContext,
{
) -> Result<Option<SessionInfo>> {
let info = runtime
.get_session_info_by_index(ctx.sender(), relay_parent, session_index)
.await?;
@@ -43,7 +43,7 @@ pub async fn run_pov_receiver<Sender>(
mut receiver: IncomingRequestReceiver<v1::PoVFetchingRequest>,
metrics: Metrics,
) where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
loop {
match receiver.recv(|| vec![COST_INVALID_REQUEST]).await.into_nested() {
@@ -71,7 +71,7 @@ pub async fn run_chunk_receiver<Sender>(
mut receiver: IncomingRequestReceiver<v1::ChunkFetchingRequest>,
metrics: Metrics,
) where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
loop {
match receiver.recv(|| vec![COST_INVALID_REQUEST]).await.into_nested() {
@@ -105,7 +105,7 @@ pub async fn answer_pov_request_log<Sender>(
req: IncomingRequest<v1::PoVFetchingRequest>,
metrics: &Metrics,
) where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let res = answer_pov_request(sender, req).await;
match res {
@@ -130,7 +130,7 @@ pub async fn answer_chunk_request_log<Sender>(
metrics: &Metrics,
) -> ()
where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let res = answer_chunk_request(sender, req).await;
match res {
@@ -154,7 +154,7 @@ pub async fn answer_pov_request<Sender>(
req: IncomingRequest<v1::PoVFetchingRequest>,
) -> Result<bool>
where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let _span = jaeger::Span::new(req.payload.candidate_hash, "answer-pov-request");
@@ -182,7 +182,7 @@ pub async fn answer_chunk_request<Sender>(
req: IncomingRequest<v1::ChunkFetchingRequest>,
) -> Result<bool>
where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let span = jaeger::Span::new(req.payload.candidate_hash, "answer-chunk-request");
@@ -217,7 +217,7 @@ async fn query_chunk<Sender>(
validator_index: ValidatorIndex,
) -> std::result::Result<Option<ErasureChunk>, JfyiError>
where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let (tx, rx) = oneshot::channel();
sender
@@ -245,7 +245,7 @@ async fn query_available_data<Sender>(
candidate_hash: CandidateHash,
) -> Result<Option<AvailableData>>
where
Sender: SubsystemSender,
Sender: SubsystemSender<AvailabilityStoreMessage>,
{
let (tx, rx) = oneshot::channel();
sender