Split NetworkBridge and break cycles with Unbounded (#2736)

* overseer: pass messages directly between subsystems

* test that message is held on to

* Update node/overseer/src/lib.rs

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>

* give every subsystem an unbounded sender too

* remove metered_channel::name

1. we don't provide good names
2. these names are never used anywhere

* unused mut

* remove unnecessary &mut

* subsystem unbounded_send

* remove unused MaybeTimer

We have channel size metrics that serve the same purpose better now and the implementation of message timing was pretty ugly.

* remove comment

* split up senders and receivers

* update metrics

* fix tests

* fix test subsystem context

* use SubsystemSender in jobs system now

* refactor of awful jobs code

* expose public `run` on JobSubsystem

* update candidate backing to new jobs & use unbounded

* bitfield signing

* candidate-selection

* provisioner

* approval voting: send unbounded for assignment/approvals

* async not needed

* begin bridge split

* split up network tasks into background worker

* port over network bridge

* Update node/network/bridge/src/lib.rs

Co-authored-by: Andronik Ordian <write@reusable.software>

* rename ValidationWorkerNotifications

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>
Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Robert Habermeier
2021-03-29 01:18:53 +02:00
committed by GitHub
parent 6f464a360f
commit 8ebbe19d10
16 changed files with 1191 additions and 1346 deletions
+34 -29
View File
@@ -25,14 +25,14 @@ use futures::{
prelude::*,
};
use polkadot_node_subsystem::{
errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, jaeger,
errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, SubsystemSender, jaeger,
messages::{
AllMessages, CandidateBackingMessage, ChainApiMessage, ProvisionableData, ProvisionerInherentData,
CandidateBackingMessage, ChainApiMessage, ProvisionableData, ProvisionerInherentData,
ProvisionerMessage,
},
};
use polkadot_node_subsystem_util::{
self as util, delegated_subsystem, FromJobCommand,
self as util, JobSubsystem, JobSender,
request_availability_cores, request_persisted_validation_data, JobTrait, metrics::{self, prometheus},
};
use polkadot_primitives::v1::{
@@ -80,9 +80,9 @@ impl InherentAfter {
}
}
struct ProvisioningJob {
/// A per-relay-parent job for the provisioning subsystem.
pub struct ProvisioningJob {
relay_parent: Hash,
sender: mpsc::Sender<FromJobCommand>,
receiver: mpsc::Receiver<ProvisionerMessage>,
backed_candidates: Vec<CandidateReceipt>,
signed_bitfields: Vec<SignedAvailabilityBitfield>,
@@ -91,8 +91,10 @@ struct ProvisioningJob {
awaiting_inherent: Vec<oneshot::Sender<ProvisionerInherentData>>
}
/// Errors in the provisioner.
#[derive(Debug, Error)]
enum Error {
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Util(#[from] util::Error),
@@ -139,38 +141,35 @@ impl JobTrait for ProvisioningJob {
//
// this function is in charge of creating and executing the job's main loop
#[tracing::instrument(skip(span, _run_args, metrics, receiver, sender), fields(subsystem = LOG_TARGET))]
fn run(
fn run<S: SubsystemSender>(
relay_parent: Hash,
span: Arc<jaeger::Span>,
_run_args: Self::RunArgs,
metrics: Self::Metrics,
receiver: mpsc::Receiver<ProvisionerMessage>,
sender: mpsc::Sender<FromJobCommand>,
mut sender: JobSender<S>,
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>> {
async move {
let job = ProvisioningJob::new(
relay_parent,
metrics,
sender,
receiver,
);
job.run_loop(PerLeafSpan::new(span, "provisioner")).await
job.run_loop(sender.subsystem_sender(), PerLeafSpan::new(span, "provisioner")).await
}
.boxed()
}
}
impl ProvisioningJob {
pub fn new(
fn new(
relay_parent: Hash,
metrics: Metrics,
sender: mpsc::Sender<FromJobCommand>,
receiver: mpsc::Receiver<ProvisionerMessage>,
) -> Self {
Self {
relay_parent,
sender,
receiver,
backed_candidates: Vec::new(),
signed_bitfields: Vec::new(),
@@ -180,7 +179,11 @@ impl ProvisioningJob {
}
}
async fn run_loop(mut self, span: PerLeafSpan) -> Result<(), Error> {
async fn run_loop(
mut self,
sender: &mut impl SubsystemSender,
span: PerLeafSpan,
) -> Result<(), Error> {
use ProvisionerMessage::{
ProvisionableData, RequestInherentData,
};
@@ -192,7 +195,7 @@ impl ProvisioningJob {
let _timer = self.metrics.time_request_inherent_data();
if self.inherent_after.is_ready() {
self.send_inherent_data(vec![return_sender]).await;
self.send_inherent_data(sender, vec![return_sender]).await;
} else {
self.awaiting_inherent.push(return_sender);
}
@@ -209,7 +212,7 @@ impl ProvisioningJob {
let _span = span.child("send-inherent-data");
let return_senders = std::mem::take(&mut self.awaiting_inherent);
if !return_senders.is_empty() {
self.send_inherent_data(return_senders).await;
self.send_inherent_data(sender, return_senders).await;
}
}
}
@@ -220,6 +223,7 @@ impl ProvisioningJob {
async fn send_inherent_data(
&mut self,
sender: &mut impl SubsystemSender,
return_senders: Vec<oneshot::Sender<ProvisionerInherentData>>,
) {
if let Err(err) = send_inherent_data(
@@ -227,7 +231,7 @@ impl ProvisioningJob {
&self.signed_bitfields,
&self.backed_candidates,
return_senders,
&mut self.sender,
sender,
)
.await
{
@@ -279,10 +283,10 @@ async fn send_inherent_data(
bitfields: &[SignedAvailabilityBitfield],
candidates: &[CandidateReceipt],
return_senders: Vec<oneshot::Sender<ProvisionerInherentData>>,
from_job: &mut mpsc::Sender<FromJobCommand>,
from_job: &mut impl SubsystemSender,
) -> Result<(), Error> {
let availability_cores = request_availability_cores(relay_parent, from_job)
.await?
.await
.await.map_err(|err| Error::CanceledAvailabilityCores(err))??;
let bitfields = select_availability_bitfields(&availability_cores, bitfields);
@@ -351,7 +355,7 @@ async fn select_candidates(
bitfields: &[SignedAvailabilityBitfield],
candidates: &[CandidateReceipt],
relay_parent: Hash,
sender: &mut mpsc::Sender<FromJobCommand>,
sender: &mut impl SubsystemSender,
) -> Result<Vec<BackedCandidate>, Error> {
let block_number = get_block_number_under_construction(relay_parent, sender).await?;
@@ -388,7 +392,7 @@ async fn select_candidates(
assumption,
sender,
)
.await?
.await
.await.map_err(|err| Error::CanceledPersistedValidationData(err))??
{
Some(v) => v,
@@ -418,11 +422,11 @@ async fn select_candidates(
// now get the backed candidates corresponding to these candidate receipts
let (tx, rx) = oneshot::channel();
sender.send(AllMessages::CandidateBacking(CandidateBackingMessage::GetBackedCandidates(
sender.send_message(CandidateBackingMessage::GetBackedCandidates(
relay_parent,
selected_candidates.clone(),
tx,
)).into()).await.map_err(|err| Error::GetBackedCandidatesSend(err))?;
).into()).await;
let mut candidates = rx.await.map_err(|err| Error::CanceledBackedCandidates(err))?;
// `selected_candidates` is generated in ascending order by core index, and `GetBackedCandidates`
@@ -470,16 +474,16 @@ async fn select_candidates(
#[tracing::instrument(level = "trace", skip(sender), fields(subsystem = LOG_TARGET))]
async fn get_block_number_under_construction(
relay_parent: Hash,
sender: &mut mpsc::Sender<FromJobCommand>,
sender: &mut impl SubsystemSender,
) -> Result<BlockNumber, Error> {
let (tx, rx) = oneshot::channel();
sender
.send(AllMessages::from(ChainApiMessage::BlockNumber(
.send_message(ChainApiMessage::BlockNumber(
relay_parent,
tx,
)).into())
.await
.map_err(|e| Error::ChainApiMessageSend(e))?;
).into())
.await;
match rx.await.map_err(|err| Error::CanceledBlockNumber(err))? {
Ok(Some(n)) => Ok(n + 1),
Ok(None) => Ok(0),
@@ -596,7 +600,8 @@ impl metrics::Metrics for Metrics {
}
delegated_subsystem!(ProvisioningJob((), Metrics) <- ProvisionerMessage as ProvisioningSubsystem);
/// The provisioning subsystem.
pub type ProvisioningSubsystem<Spawner> = JobSubsystem<ProvisioningJob, Spawner>;
#[cfg(test)]
mod tests;