Split NetworkBridge and break cycles with Unbounded (#2736)

* overseer: pass messages directly between subsystems

* test that message is held on to

* Update node/overseer/src/lib.rs

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>

* give every subsystem an unbounded sender too

* remove metered_channel::name

1. we don't provide good names
2. these names are never used anywhere

* unused mut

* remove unnecessary &mut

* subsystem unbounded_send

* remove unused MaybeTimer

We have channel size metrics that serve the same purpose better now and the implementation of message timing was pretty ugly.

* remove comment

* split up senders and receivers

* update metrics

* fix tests

* fix test subsystem context

* use SubsystemSender in jobs system now

* refactor of awful jobs code

* expose public `run` on JobSubsystem

* update candidate backing to new jobs & use unbounded

* bitfield signing

* candidate-selection

* provisioner

* approval voting: send unbounded for assignment/approvals

* async not needed

* begin bridge split

* split up network tasks into background worker

* port over network bridge

* Update node/network/bridge/src/lib.rs

Co-authored-by: Andronik Ordian <write@reusable.software>

* rename ValidationWorkerNotifications

Co-authored-by: Peter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>
Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Robert Habermeier
2021-03-29 01:18:53 +02:00
committed by GitHub
parent 6f464a360f
commit 8ebbe19d10
16 changed files with 1191 additions and 1346 deletions
+34 -29
View File
@@ -25,14 +25,14 @@ use futures::{
prelude::*,
};
use polkadot_node_subsystem::{
errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, jaeger,
errors::{ChainApiError, RuntimeApiError}, PerLeafSpan, SubsystemSender, jaeger,
messages::{
AllMessages, CandidateBackingMessage, ChainApiMessage, ProvisionableData, ProvisionerInherentData,
CandidateBackingMessage, ChainApiMessage, ProvisionableData, ProvisionerInherentData,
ProvisionerMessage,
},
};
use polkadot_node_subsystem_util::{
self as util, delegated_subsystem, FromJobCommand,
self as util, JobSubsystem, JobSender,
request_availability_cores, request_persisted_validation_data, JobTrait, metrics::{self, prometheus},
};
use polkadot_primitives::v1::{
@@ -80,9 +80,9 @@ impl InherentAfter {
}
}
struct ProvisioningJob {
/// A per-relay-parent job for the provisioning subsystem.
pub struct ProvisioningJob {
relay_parent: Hash,
sender: mpsc::Sender<FromJobCommand>,
receiver: mpsc::Receiver<ProvisionerMessage>,
backed_candidates: Vec<CandidateReceipt>,
signed_bitfields: Vec<SignedAvailabilityBitfield>,
@@ -91,8 +91,10 @@ struct ProvisioningJob {
awaiting_inherent: Vec<oneshot::Sender<ProvisionerInherentData>>
}
/// Errors in the provisioner.
#[derive(Debug, Error)]
enum Error {
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Util(#[from] util::Error),
@@ -139,38 +141,35 @@ impl JobTrait for ProvisioningJob {
//
// this function is in charge of creating and executing the job's main loop
#[tracing::instrument(skip(span, _run_args, metrics, receiver, sender), fields(subsystem = LOG_TARGET))]
fn run(
fn run<S: SubsystemSender>(
relay_parent: Hash,
span: Arc<jaeger::Span>,
_run_args: Self::RunArgs,
metrics: Self::Metrics,
receiver: mpsc::Receiver<ProvisionerMessage>,
sender: mpsc::Sender<FromJobCommand>,
mut sender: JobSender<S>,
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>> {
async move {
let job = ProvisioningJob::new(
relay_parent,
metrics,
sender,
receiver,
);
job.run_loop(PerLeafSpan::new(span, "provisioner")).await
job.run_loop(sender.subsystem_sender(), PerLeafSpan::new(span, "provisioner")).await
}
.boxed()
}
}
impl ProvisioningJob {
pub fn new(
fn new(
relay_parent: Hash,
metrics: Metrics,
sender: mpsc::Sender<FromJobCommand>,
receiver: mpsc::Receiver<ProvisionerMessage>,
) -> Self {
Self {
relay_parent,
sender,
receiver,
backed_candidates: Vec::new(),
signed_bitfields: Vec::new(),
@@ -180,7 +179,11 @@ impl ProvisioningJob {
}
}
async fn run_loop(mut self, span: PerLeafSpan) -> Result<(), Error> {
async fn run_loop(
mut self,
sender: &mut impl SubsystemSender,
span: PerLeafSpan,
) -> Result<(), Error> {
use ProvisionerMessage::{
ProvisionableData, RequestInherentData,
};
@@ -192,7 +195,7 @@ impl ProvisioningJob {
let _timer = self.metrics.time_request_inherent_data();
if self.inherent_after.is_ready() {
self.send_inherent_data(vec![return_sender]).await;
self.send_inherent_data(sender, vec![return_sender]).await;
} else {
self.awaiting_inherent.push(return_sender);
}
@@ -209,7 +212,7 @@ impl ProvisioningJob {
let _span = span.child("send-inherent-data");
let return_senders = std::mem::take(&mut self.awaiting_inherent);
if !return_senders.is_empty() {
self.send_inherent_data(return_senders).await;
self.send_inherent_data(sender, return_senders).await;
}
}
}
@@ -220,6 +223,7 @@ impl ProvisioningJob {
async fn send_inherent_data(
&mut self,
sender: &mut impl SubsystemSender,
return_senders: Vec<oneshot::Sender<ProvisionerInherentData>>,
) {
if let Err(err) = send_inherent_data(
@@ -227,7 +231,7 @@ impl ProvisioningJob {
&self.signed_bitfields,
&self.backed_candidates,
return_senders,
&mut self.sender,
sender,
)
.await
{
@@ -279,10 +283,10 @@ async fn send_inherent_data(
bitfields: &[SignedAvailabilityBitfield],
candidates: &[CandidateReceipt],
return_senders: Vec<oneshot::Sender<ProvisionerInherentData>>,
from_job: &mut mpsc::Sender<FromJobCommand>,
from_job: &mut impl SubsystemSender,
) -> Result<(), Error> {
let availability_cores = request_availability_cores(relay_parent, from_job)
.await?
.await
.await.map_err(|err| Error::CanceledAvailabilityCores(err))??;
let bitfields = select_availability_bitfields(&availability_cores, bitfields);
@@ -351,7 +355,7 @@ async fn select_candidates(
bitfields: &[SignedAvailabilityBitfield],
candidates: &[CandidateReceipt],
relay_parent: Hash,
sender: &mut mpsc::Sender<FromJobCommand>,
sender: &mut impl SubsystemSender,
) -> Result<Vec<BackedCandidate>, Error> {
let block_number = get_block_number_under_construction(relay_parent, sender).await?;
@@ -388,7 +392,7 @@ async fn select_candidates(
assumption,
sender,
)
.await?
.await
.await.map_err(|err| Error::CanceledPersistedValidationData(err))??
{
Some(v) => v,
@@ -418,11 +422,11 @@ async fn select_candidates(
// now get the backed candidates corresponding to these candidate receipts
let (tx, rx) = oneshot::channel();
sender.send(AllMessages::CandidateBacking(CandidateBackingMessage::GetBackedCandidates(
sender.send_message(CandidateBackingMessage::GetBackedCandidates(
relay_parent,
selected_candidates.clone(),
tx,
)).into()).await.map_err(|err| Error::GetBackedCandidatesSend(err))?;
).into()).await;
let mut candidates = rx.await.map_err(|err| Error::CanceledBackedCandidates(err))?;
// `selected_candidates` is generated in ascending order by core index, and `GetBackedCandidates`
@@ -470,16 +474,16 @@ async fn select_candidates(
#[tracing::instrument(level = "trace", skip(sender), fields(subsystem = LOG_TARGET))]
async fn get_block_number_under_construction(
relay_parent: Hash,
sender: &mut mpsc::Sender<FromJobCommand>,
sender: &mut impl SubsystemSender,
) -> Result<BlockNumber, Error> {
let (tx, rx) = oneshot::channel();
sender
.send(AllMessages::from(ChainApiMessage::BlockNumber(
.send_message(ChainApiMessage::BlockNumber(
relay_parent,
tx,
)).into())
.await
.map_err(|e| Error::ChainApiMessageSend(e))?;
).into())
.await;
match rx.await.map_err(|err| Error::CanceledBlockNumber(err))? {
Ok(Some(n)) => Ok(n + 1),
Ok(None) => Ok(0),
@@ -596,7 +600,8 @@ impl metrics::Metrics for Metrics {
}
delegated_subsystem!(ProvisioningJob((), Metrics) <- ProvisionerMessage as ProvisioningSubsystem);
/// The provisioning subsystem.
pub type ProvisioningSubsystem<Spawner> = JobSubsystem<ProvisioningJob, Spawner>;
#[cfg(test)]
mod tests;
+17 -34
View File
@@ -191,7 +191,6 @@ mod select_availability_bitfields {
}
mod select_candidates {
use futures_timer::Delay;
use super::super::*;
use super::{build_occupied_core, occupied_core, scheduled_core, default_bitvec};
use polkadot_node_subsystem::messages::{
@@ -201,6 +200,7 @@ mod select_candidates {
use polkadot_primitives::v1::{
BlockNumber, CandidateDescriptor, PersistedValidationData, CommittedCandidateReceipt, CandidateCommitments,
};
use polkadot_node_subsystem_test_helpers::TestSubsystemSender;
const BLOCK_UNDER_PRODUCTION: BlockNumber = 128;
@@ -208,12 +208,12 @@ mod select_candidates {
overseer_factory: OverseerFactory,
test_factory: TestFactory,
) where
OverseerFactory: FnOnce(mpsc::Receiver<FromJobCommand>) -> Overseer,
OverseerFactory: FnOnce(mpsc::UnboundedReceiver<AllMessages>) -> Overseer,
Overseer: Future<Output = ()>,
TestFactory: FnOnce(mpsc::Sender<FromJobCommand>) -> Test,
TestFactory: FnOnce(TestSubsystemSender) -> Test,
Test: Future<Output = ()>,
{
let (tx, rx) = mpsc::channel(64);
let (tx, rx) = polkadot_node_subsystem_test_helpers::sender_receiver();
let overseer = overseer_factory(rx);
let test = test_factory(tx);
@@ -298,24 +298,27 @@ mod select_candidates {
]
}
async fn mock_overseer(mut receiver: mpsc::Receiver<FromJobCommand>, expected: Vec<BackedCandidate>) {
async fn mock_overseer(
mut receiver: mpsc::UnboundedReceiver<AllMessages>,
expected: Vec<BackedCandidate>,
) {
use ChainApiMessage::BlockNumber;
use RuntimeApiMessage::Request;
while let Some(from_job) = receiver.next().await {
match from_job {
FromJobCommand::SendMessage(AllMessages::ChainApi(BlockNumber(_relay_parent, tx))) => {
AllMessages::ChainApi(BlockNumber(_relay_parent, tx)) => {
tx.send(Ok(Some(BLOCK_UNDER_PRODUCTION - 1))).unwrap()
}
FromJobCommand::SendMessage(AllMessages::RuntimeApi(Request(
AllMessages::RuntimeApi(Request(
_parent_hash,
PersistedValidationDataReq(_para_id, _assumption, tx),
))) => tx.send(Ok(Some(Default::default()))).unwrap(),
FromJobCommand::SendMessage(AllMessages::RuntimeApi(Request(_parent_hash, AvailabilityCores(tx)))) => {
)) => tx.send(Ok(Some(Default::default()))).unwrap(),
AllMessages::RuntimeApi(Request(_parent_hash, AvailabilityCores(tx))) => {
tx.send(Ok(mock_availability_cores())).unwrap()
}
FromJobCommand::SendMessage(
AllMessages::CandidateBacking(CandidateBackingMessage::GetBackedCandidates(_, _, sender))
AllMessages::CandidateBacking(
CandidateBackingMessage::GetBackedCandidates(_, _, sender)
) => {
let _ = sender.send(expected.clone());
}
@@ -324,29 +327,9 @@ mod select_candidates {
}
}
#[test]
fn handles_overseer_failure() {
let overseer = |rx: mpsc::Receiver<FromJobCommand>| async move {
// drop the receiver so it closes and the sender can't send, then just sleep long enough that
// this is almost certainly not the first of the two futures to complete
std::mem::drop(rx);
Delay::new(std::time::Duration::from_secs(1)).await;
};
let test = |mut tx: mpsc::Sender<FromJobCommand>| async move {
// wait so that the overseer can drop the rx before we attempt to send
Delay::new(std::time::Duration::from_millis(50)).await;
let result = select_candidates(&[], &[], &[], Default::default(), &mut tx).await;
println!("{:?}", result);
assert!(std::matches!(result, Err(Error::ChainApiMessageSend(_))));
};
test_harness(overseer, test);
}
#[test]
fn can_succeed() {
test_harness(|r| mock_overseer(r, Vec::new()), |mut tx: mpsc::Sender<FromJobCommand>| async move {
test_harness(|r| mock_overseer(r, Vec::new()), |mut tx: TestSubsystemSender| async move {
select_candidates(&[], &[], &[], Default::default(), &mut tx).await.unwrap();
})
}
@@ -411,7 +394,7 @@ mod select_candidates {
})
.collect();
test_harness(|r| mock_overseer(r, expected_backed), |mut tx: mpsc::Sender<FromJobCommand>| async move {
test_harness(|r| mock_overseer(r, expected_backed), |mut tx: TestSubsystemSender| async move {
let result =
select_candidates(&mock_cores, &[], &candidates, Default::default(), &mut tx)
.await.unwrap();
@@ -470,7 +453,7 @@ mod select_candidates {
})
.collect();
test_harness(|r| mock_overseer(r, expected_backed), |mut tx: mpsc::Sender<FromJobCommand>| async move {
test_harness(|r| mock_overseer(r, expected_backed), |mut tx: TestSubsystemSender| async move {
let result =
select_candidates(&mock_cores, &[], &candidates, Default::default(), &mut tx)
.await.unwrap();