implement bitfield signing subsystem (#1364)

* update guide to reduce confusion and TODOs

* work from previous bitfield signing effort

There were large merge issues with the old bitfield signing PR, so
we're just copying all the work from that onto this and restarting.

Much of the existing work will be discarded because we now have better
tools available, but that's fine.

* start rewriting bitfield signing in terms of the util module

* implement construct_availability_bitvec

It's not an ideal implementation--we can make it much more concurrent--
but at least it compiles.

* implement the unimplemented portions of bitfield signing

* get core availability concurrently, not sequentially

* use sp-std instead of std for a parachain item

* resolve type inference failure caused by multiple From impls

* handle bitfield signing subsystem & Allmessages variant in overseer

* fix more multi-From inference issues

* more concisely handle overflow

Co-authored-by: Andronik Ordian <write@reusable.software>

* Revert "resolve type inference failure caused by multiple From impls"

This reverts commit 7fc77805de5e5074a1b01037f8d4e3919e03e0e1.

* Revert "fix more multi-From inference issues"

This reverts commit f14ffe589e20d664d8a900ed62f68b6fb844a514.

* impl From<i32> for ParaId

* handle another instance of AllSubsystems

* improve consistency when returning existing options

Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Peter Goodspeed-Niklaus
2020-07-23 16:05:48 +02:00
committed by GitHub
parent a1c704d446
commit ba4bfa4dd0
10 changed files with 411 additions and 6 deletions
+15
View File
@@ -0,0 +1,15 @@
[package]
name = "polkadot-node-bitfield-signing"
version = "0.1.0"
authors = ["Peter Goodspeed-Niklaus <peter.r.goodspeedniklaus@gmail.com>"]
edition = "2018"
[dependencies]
bitvec = "0.17.4"
derive_more = "0.99.9"
futures = "0.3.5"
log = "0.4.8"
polkadot-primitives = { path = "../../primitives" }
polkadot-node-subsystem = { path = "../subsystem" }
keystore = { package = "sc-keystore", git = "https://github.com/paritytech/substrate", branch = "master" }
wasm-timer = "0.2.4"
+290
View File
@@ -0,0 +1,290 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! The bitfield signing subsystem produces `SignedAvailabilityBitfield`s once per block.
use bitvec::bitvec;
use futures::{
channel::{mpsc, oneshot},
prelude::*,
stream, Future,
};
use keystore::KeyStorePtr;
use polkadot_node_subsystem::{
messages::{
self, AllMessages, AvailabilityStoreMessage, BitfieldDistributionMessage,
BitfieldSigningMessage, CandidateBackingMessage, RuntimeApiMessage,
},
util::{self, JobManager, JobTrait, ToJobTrait, Validator},
};
use polkadot_primitives::v1::{AvailabilityBitfield, CoreOccupied, Hash};
use std::{convert::TryFrom, pin::Pin, time::Duration};
use wasm_timer::{Delay, Instant};
/// Delay between starting a bitfield signing job and its attempting to create a bitfield.
const JOB_DELAY: Duration = Duration::from_millis(1500);
/// Each `BitfieldSigningJob` prepares a signed bitfield for a single relay parent.
pub struct BitfieldSigningJob;
/// Messages which a `BitfieldSigningJob` is prepared to receive.
pub enum ToJob {
BitfieldSigning(BitfieldSigningMessage),
Stop,
}
impl ToJobTrait for ToJob {
const STOP: Self = ToJob::Stop;
fn relay_parent(&self) -> Option<Hash> {
match self {
Self::BitfieldSigning(bsm) => bsm.relay_parent(),
Self::Stop => None,
}
}
}
impl TryFrom<AllMessages> for ToJob {
type Error = ();
fn try_from(msg: AllMessages) -> Result<Self, Self::Error> {
match msg {
AllMessages::BitfieldSigning(bsm) => Ok(ToJob::BitfieldSigning(bsm)),
_ => Err(()),
}
}
}
impl From<BitfieldSigningMessage> for ToJob {
fn from(bsm: BitfieldSigningMessage) -> ToJob {
ToJob::BitfieldSigning(bsm)
}
}
/// Messages which may be sent from a `BitfieldSigningJob`.
pub enum FromJob {
AvailabilityStore(AvailabilityStoreMessage),
BitfieldDistribution(BitfieldDistributionMessage),
CandidateBacking(CandidateBackingMessage),
RuntimeApi(RuntimeApiMessage),
}
impl From<FromJob> for AllMessages {
fn from(from_job: FromJob) -> AllMessages {
match from_job {
FromJob::AvailabilityStore(asm) => AllMessages::AvailabilityStore(asm),
FromJob::BitfieldDistribution(bdm) => AllMessages::BitfieldDistribution(bdm),
FromJob::CandidateBacking(cbm) => AllMessages::CandidateBacking(cbm),
FromJob::RuntimeApi(ram) => AllMessages::RuntimeApi(ram),
}
}
}
impl TryFrom<AllMessages> for FromJob {
type Error = ();
fn try_from(msg: AllMessages) -> Result<Self, Self::Error> {
match msg {
AllMessages::AvailabilityStore(asm) => Ok(Self::AvailabilityStore(asm)),
AllMessages::BitfieldDistribution(bdm) => Ok(Self::BitfieldDistribution(bdm)),
AllMessages::CandidateBacking(cbm) => Ok(Self::CandidateBacking(cbm)),
AllMessages::RuntimeApi(ram) => Ok(Self::RuntimeApi(ram)),
_ => Err(()),
}
}
}
/// Errors we may encounter in the course of executing the `BitfieldSigningSubsystem`.
#[derive(Debug, derive_more::From)]
pub enum Error {
/// error propagated from the utility subsystem
#[from]
Util(util::Error),
/// io error
#[from]
Io(std::io::Error),
/// a one shot channel was canceled
#[from]
Oneshot(oneshot::Canceled),
/// a mspc channel failed to send
#[from]
MpscSend(mpsc::SendError),
/// several errors collected into one
#[from]
Multiple(Vec<Error>),
}
// this function exists mainly to collect a bunch of potential error points into one.
async fn get_core_availability(
relay_parent: Hash,
idx: usize,
core: Option<CoreOccupied>,
sender: &mpsc::Sender<FromJob>,
) -> Result<bool, Error> {
use messages::{
AvailabilityStoreMessage::QueryPoVAvailable,
RuntimeApiRequest::CandidatePendingAvailability,
};
use FromJob::{AvailabilityStore, RuntimeApi};
use RuntimeApiMessage::Request;
// we have to (cheaply) clone this sender so we can mutate it to actually send anything
let mut sender = sender.clone();
// REVIEW: is it safe to ignore parathreads here, or do they also figure in the availability mapping?
if let Some(CoreOccupied::Parachain) = core {
let (tx, rx) = oneshot::channel();
sender
.send(RuntimeApi(Request(
relay_parent,
CandidatePendingAvailability(idx.into(), tx),
)))
.await?;
let committed_candidate_receipt = match rx.await? {
Some(ccr) => ccr,
None => return Ok(false),
};
let (tx, rx) = oneshot::channel();
sender
.send(AvailabilityStore(QueryPoVAvailable(
committed_candidate_receipt.descriptor.pov_hash,
tx,
)))
.await?;
return rx.await.map_err(Into::into);
}
Ok(false)
}
// the way this function works is not intuitive:
//
// - get the scheduler roster so we have a list of cores, in order.
// - for each occupied core, fetch `candidate_pending_availability` from runtime
// - from there, we can get the `CandidateDescriptor`
// - from there, we can send a `AvailabilityStore::QueryPoV` and set the indexed bit to 1 if it returns Some(_)
async fn construct_availability_bitfield(
relay_parent: Hash,
sender: &mut mpsc::Sender<FromJob>,
) -> Result<AvailabilityBitfield, Error> {
use futures::lock::Mutex;
use messages::RuntimeApiRequest::ValidatorGroups;
use FromJob::RuntimeApi;
use RuntimeApiMessage::Request;
// request the validator groups so we can get the scheduler roster
let (tx, rx) = oneshot::channel();
sender
.send(RuntimeApi(Request(relay_parent, ValidatorGroups(tx))))
.await?;
// we now need sender to be immutable so we can copy the reference to multiple concurrent closures
let sender = &*sender;
// wait for the scheduler roster
let scheduler_roster = rx.await?;
// prepare outputs
let out =
Mutex::new(bitvec!(bitvec::order::Lsb0, u8; 0; scheduler_roster.availability_cores.len()));
// in principle, we know that we never want concurrent access to the _same_ bit within the vec;
// we could `let out_ref = out.as_mut_ptr();` here instead, and manually assign bits, avoiding
// any need to ever wait to lock this mutex.
// in practice, it's safer to just use the mutex, and speed optimizations should wait until
// benchmarking proves that they are necessary.
let out_ref = &out;
let errs = Mutex::new(Vec::new());
let errs_ref = &errs;
// Handle each (idx, core) pair concurrently
//
// In principle, this work is all concurrent, not parallel. In practice, we can't guarantee it, which is why
// we need the mutexes and explicit references above.
stream::iter(scheduler_roster.availability_cores.into_iter().enumerate())
.for_each_concurrent(None, |(idx, core)| async move {
let availability = match get_core_availability(relay_parent, idx, core, sender).await {
Ok(availability) => availability,
Err(err) => {
errs_ref.lock().await.push(err);
return;
}
};
out_ref.lock().await.set(idx, availability);
})
.await;
let errs = errs.into_inner();
if errs.is_empty() {
Ok(out.into_inner().into())
} else {
Err(errs.into())
}
}
impl JobTrait for BitfieldSigningJob {
type ToJob = ToJob;
type FromJob = FromJob;
type Error = Error;
type RunArgs = KeyStorePtr;
const NAME: &'static str = "BitfieldSigningJob";
/// Run a job for the parent block indicated
fn run(
relay_parent: Hash,
keystore: Self::RunArgs,
_receiver: mpsc::Receiver<ToJob>,
mut sender: mpsc::Sender<FromJob>,
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>> {
async move {
// figure out when to wait to
let wait_until = Instant::now() + JOB_DELAY;
// now do all the work we can before we need to wait for the availability store
// if we're not a validator, we can just succeed effortlessly
let validator = match Validator::new(relay_parent, keystore, sender.clone()).await {
Ok(validator) => validator,
Err(util::Error::NotAValidator) => return Ok(()),
Err(err) => return Err(Error::Util(err)),
};
// wait a bit before doing anything else
Delay::new_at(wait_until).await?;
let bitfield = construct_availability_bitfield(relay_parent, &mut sender).await?;
let signed_bitfield = validator.sign(bitfield);
// make an anonymous scope to contain some use statements to simplify creating the outbound message
{
use BitfieldDistributionMessage::DistributeBitfield;
use FromJob::BitfieldDistribution;
sender
.send(BitfieldDistribution(DistributeBitfield(
relay_parent,
signed_bitfield,
)))
.await
.map_err(Into::into)
}
}
.boxed()
}
}
/// BitfieldSigningSubsystem manages a number of bitfield signing jobs.
pub type BitfieldSigningSubsystem<Spawner, Context> =
JobManager<Spawner, Context, BitfieldSigningJob>;
@@ -147,6 +147,7 @@ fn main() {
candidate_selection: DummySubsystem,
statement_distribution: DummySubsystem,
availability_distribution: DummySubsystem,
bitfield_signing: DummySubsystem,
bitfield_distribution: DummySubsystem,
provisioner: DummySubsystem,
pov_distribution: DummySubsystem,
+28 -4
View File
@@ -77,7 +77,7 @@ use client::{BlockImportNotification, BlockchainEvents, FinalityNotification};
use polkadot_subsystem::messages::{
CandidateValidationMessage, CandidateBackingMessage,
CandidateSelectionMessage, StatementDistributionMessage,
AvailabilityDistributionMessage, BitfieldDistributionMessage,
AvailabilityDistributionMessage, BitfieldSigningMessage, BitfieldDistributionMessage,
ProvisionerMessage, PoVDistributionMessage, RuntimeApiMessage,
AvailabilityStoreMessage, NetworkBridgeMessage, AllMessages,
};
@@ -339,6 +339,9 @@ pub struct Overseer<S: SpawnNamed> {
/// An availability distribution subsystem.
availability_distribution_subsystem: OverseenSubsystem<AvailabilityDistributionMessage>,
/// A bitfield signing subsystem.
bitfield_signing_subsystem: OverseenSubsystem<BitfieldSigningMessage>,
/// A bitfield distribution subsystem.
bitfield_distribution_subsystem: OverseenSubsystem<BitfieldDistributionMessage>,
@@ -390,7 +393,7 @@ pub struct Overseer<S: SpawnNamed> {
///
/// [`Subsystem`]: trait.Subsystem.html
/// [`DummySubsystem`]: struct.DummySubsystem.html
pub struct AllSubsystems<CV, CB, CS, SD, AD, BD, P, PoVD, RA, AS, NB> {
pub struct AllSubsystems<CV, CB, CS, SD, AD, BS, BD, P, PoVD, RA, AS, NB> {
/// A candidate validation subsystem.
pub candidate_validation: CV,
/// A candidate backing subsystem.
@@ -401,6 +404,8 @@ pub struct AllSubsystems<CV, CB, CS, SD, AD, BD, P, PoVD, RA, AS, NB> {
pub statement_distribution: SD,
/// An availability distribution subsystem.
pub availability_distribution: AD,
/// A bitfield signing subsystem.
pub bitfield_signing: BS,
/// A bitfield distribution subsystem.
pub bitfield_distribution: BD,
/// A provisioner subsystem.
@@ -487,6 +492,7 @@ where
/// candidate_selection: DummySubsystem,
/// statement_distribution: DummySubsystem,
/// availability_distribution: DummySubsystem,
/// bitfield_signing: DummySubsystem,
/// bitfield_distribution: DummySubsystem,
/// provisioner: DummySubsystem,
/// pov_distribution: DummySubsystem,
@@ -513,9 +519,9 @@ where
/// #
/// # }); }
/// ```
pub fn new<CV, CB, CS, SD, AD, BD, P, PoVD, RA, AS, NB>(
pub fn new<CV, CB, CS, SD, AD, BS, BD, P, PoVD, RA, AS, NB>(
leaves: impl IntoIterator<Item = BlockInfo>,
all_subsystems: AllSubsystems<CV, CB, CS, SD, AD, BD, P, PoVD, RA, AS, NB>,
all_subsystems: AllSubsystems<CV, CB, CS, SD, AD, BS, BD, P, PoVD, RA, AS, NB>,
mut s: S,
) -> SubsystemResult<(Self, OverseerHandler)>
where
@@ -524,6 +530,7 @@ where
CS: Subsystem<OverseerSubsystemContext<CandidateSelectionMessage>> + Send,
SD: Subsystem<OverseerSubsystemContext<StatementDistributionMessage>> + Send,
AD: Subsystem<OverseerSubsystemContext<AvailabilityDistributionMessage>> + Send,
BS: Subsystem<OverseerSubsystemContext<BitfieldSigningMessage>> + Send,
BD: Subsystem<OverseerSubsystemContext<BitfieldDistributionMessage>> + Send,
P: Subsystem<OverseerSubsystemContext<ProvisionerMessage>> + Send,
PoVD: Subsystem<OverseerSubsystemContext<PoVDistributionMessage>> + Send,
@@ -575,6 +582,13 @@ where
all_subsystems.availability_distribution,
)?;
let bitfield_signing_subsystem = spawn(
&mut s,
&mut running_subsystems,
&mut running_subsystems_rx,
all_subsystems.bitfield_signing,
)?;
let bitfield_distribution_subsystem = spawn(
&mut s,
&mut running_subsystems,
@@ -630,6 +644,7 @@ where
candidate_selection_subsystem,
statement_distribution_subsystem,
availability_distribution_subsystem,
bitfield_signing_subsystem,
bitfield_distribution_subsystem,
provisioner_subsystem,
pov_distribution_subsystem,
@@ -871,6 +886,11 @@ where
let _ = s.tx.send(FromOverseer::Communication { msg }).await;
}
}
AllMessages::BitfieldSigning(msg) => {
if let Some(ref mut s) = self.bitfield_signing_subsystem.instance {
let _ = s.tx.send(FromOverseer::Communication{ msg }).await;
}
}
AllMessages::Provisioner(msg) => {
if let Some(ref mut s) = self.provisioner_subsystem.instance {
let _ = s.tx.send(FromOverseer::Communication { msg }).await;
@@ -1050,6 +1070,7 @@ mod tests {
candidate_selection: DummySubsystem,
statement_distribution: DummySubsystem,
availability_distribution: DummySubsystem,
bitfield_signing: DummySubsystem,
bitfield_distribution: DummySubsystem,
provisioner: DummySubsystem,
pov_distribution: DummySubsystem,
@@ -1112,6 +1133,7 @@ mod tests {
candidate_selection: DummySubsystem,
statement_distribution: DummySubsystem,
availability_distribution: DummySubsystem,
bitfield_signing: DummySubsystem,
bitfield_distribution: DummySubsystem,
provisioner: DummySubsystem,
pov_distribution: DummySubsystem,
@@ -1227,6 +1249,7 @@ mod tests {
candidate_selection: DummySubsystem,
statement_distribution: DummySubsystem,
availability_distribution: DummySubsystem,
bitfield_signing: DummySubsystem,
bitfield_distribution: DummySubsystem,
provisioner: DummySubsystem,
pov_distribution: DummySubsystem,
@@ -1323,6 +1346,7 @@ mod tests {
candidate_selection: DummySubsystem,
statement_distribution: DummySubsystem,
availability_distribution: DummySubsystem,
bitfield_signing: DummySubsystem,
bitfield_distribution: DummySubsystem,
provisioner: DummySubsystem,
pov_distribution: DummySubsystem,
+1
View File
@@ -302,6 +302,7 @@ fn real_overseer<S: SpawnNamed>(
candidate_selection: DummySubsystem,
statement_distribution: DummySubsystem,
availability_distribution: DummySubsystem,
bitfield_signing: DummySubsystem,
bitfield_distribution: DummySubsystem,
provisioner: DummySubsystem,
pov_distribution: DummySubsystem,
+26 -1
View File
@@ -26,7 +26,7 @@ use futures::channel::{mpsc, oneshot};
use polkadot_primitives::v1::{
BlockNumber, Hash,
CandidateReceipt, PoV, ErasureChunk, BackedCandidate, Id as ParaId,
CandidateReceipt, CommittedCandidateReceipt, PoV, ErasureChunk, BackedCandidate, Id as ParaId,
SignedAvailabilityBitfield, SigningContext, ValidatorId, ValidationCode, ValidatorIndex,
CoreAssignment, CoreOccupied, HeadData, CandidateDescriptor,
ValidatorSignature, OmittedValidationData,
@@ -219,12 +219,32 @@ impl BitfieldDistributionMessage {
}
}
/// Bitfield signing message.
///
/// Currently non-instantiable.
#[derive(Debug)]
pub enum BitfieldSigningMessage {}
impl BitfieldSigningMessage {
/// If the current variant contains the relay parent hash, return it.
pub fn relay_parent(&self) -> Option<Hash> {
None
}
}
/// Availability store subsystem message.
#[derive(Debug)]
pub enum AvailabilityStoreMessage {
/// Query a `PoV` from the AV store.
QueryPoV(Hash, oneshot::Sender<Option<PoV>>),
/// Query whether a `PoV` exists within the AV Store.
///
/// This is useful in cases like bitfield signing, when existence
/// matters, but we don't want to necessarily pass around multiple
/// megabytes of data to get a single bit of information.
QueryPoVAvailable(Hash, oneshot::Sender<bool>),
/// Query an `ErasureChunk` from the AV store.
QueryChunk(Hash, ValidatorIndex, oneshot::Sender<ErasureChunk>),
@@ -237,6 +257,7 @@ impl AvailabilityStoreMessage {
pub fn relay_parent(&self) -> Option<Hash> {
match self {
Self::QueryPoV(hash, _) => Some(*hash),
Self::QueryPoVAvailable(hash, _) => Some(*hash),
Self::QueryChunk(hash, _, _) => Some(*hash),
Self::StoreChunk(hash, _, _) => Some(*hash),
}
@@ -271,6 +292,8 @@ pub enum RuntimeApiRequest {
ValidationCode(ParaId, BlockNumber, Option<BlockNumber>, oneshot::Sender<ValidationCode>),
/// Get head data for a specific para.
HeadData(ParaId, oneshot::Sender<HeadData>),
/// Get a the candidate pending availability for a particular parachain by parachain / core index
CandidatePendingAvailability(ParaId, oneshot::Sender<Option<CommittedCandidateReceipt>>),
}
/// A message to the Runtime API subsystem.
@@ -397,6 +420,8 @@ pub enum AllMessages {
AvailabilityDistribution(AvailabilityDistributionMessage),
/// Message for the bitfield distribution subsystem.
BitfieldDistribution(BitfieldDistributionMessage),
/// Message for the bitfield signing subsystem.
BitfieldSigning(BitfieldSigningMessage),
/// Message for the Provisioner subsystem.
Provisioner(ProvisionerMessage),
/// Message for the PoV Distribution subsystem.