mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-04-27 08:07:58 +00:00
5bc2b2779d
* skeleton for dispute-coordinator * add coordinator and participation message types * begin dispute-coordinator DB * functions for loading * implement strongly-typed DB transaction * add some tests for DB transaction * core logic for pruning * guide: update candidate-votes key for coordinator * update candidate-votes key * use big-endian encoding for session, and implement upper bound generator * finish implementing pruning * add a test for note_current_session * define state of the subsystem itself * barebones subsystem definition * control flow * more control flow * implement session-updating logic * trace * control flow for message handling * Update node/core/dispute-coordinator/src/lib.rs Co-authored-by: André Silva <123550+andresilva@users.noreply.github.com> * Update node/subsystem/src/messages.rs Co-authored-by: André Silva <123550+andresilva@users.noreply.github.com> * some more control flow * guide: remove overlay * more control flow * implement some DB getters * make progress on importing statements * add SignedDisputeStatement struct * move ApprovalVote to shared primitives * add a signing-payload API to explicit dispute statements * add signing-payload to CompactStatement * add relay-parent hash to seconded/valid dispute variatns * correct import * type-safe wrapper around dispute statements * use checked dispute statement in message type * extract rolling session window cache to subsystem-util * extract session window tests * approval-voting: use rolling session info cache * reduce dispute window to match runtime in practice * add byzantine_threshold and supermajority_threshold utilities to primitives * integrate rolling session window * Add PartialOrd to CandidateHash * add Ord to CandidateHash * implement active dispute update * add dispute messages to AllMessages * add dispute stubs to overseer * inform dispute participation to participate * implement issue_local_statement * implement `determine_undisputed_chain` * fix warnings * test harness for dispute coordinator tests * add more helpers to test harness * add some more helpers * some tests for dispute coordinator * ignore wrong validator indices * test finality voting rule constraint * add more tests * add variants to network bridge * fix test compilation * remove most dispute coordinator functionality as of #3222 we can do most of the work within the approval voting subsystem * Revert "remove most dispute coordinator functionality" This reverts commit 9cd615e8eb6ca0b382cbaff525d813e753d6004e. * Use thiserror Co-authored-by: Bernhard Schuster <bernhard@ahoi.io> * Update node/core/dispute-coordinator/src/lib.rs Co-authored-by: Bernhard Schuster <bernhard@ahoi.io> * extract tests to separate module * address nit * adjust run_iteration API Co-authored-by: André Silva <123550+andresilva@users.noreply.github.com> Co-authored-by: Bernhard Schuster <bernhard@ahoi.io>
867 lines
25 KiB
Rust
867 lines
25 KiB
Rust
// Copyright 2017-2020 Parity Technologies (UK) Ltd.
|
|
// This file is part of Polkadot.
|
|
|
|
// Polkadot is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
|
|
// Polkadot is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//! Utility module for subsystems
|
|
//!
|
|
//! Many subsystems have common interests such as canceling a bunch of spawned jobs,
|
|
//! or determining what their validator ID is. These common interests are factored into
|
|
//! this module.
|
|
//!
|
|
//! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems.
|
|
|
|
#![warn(missing_docs)]
|
|
|
|
use polkadot_node_subsystem::{
|
|
errors::RuntimeApiError,
|
|
messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, RuntimeApiSender, BoundToRelayParent},
|
|
FromOverseer, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemError, SubsystemSender,
|
|
ActiveLeavesUpdate, OverseerSignal,
|
|
};
|
|
use polkadot_node_jaeger as jaeger;
|
|
use futures::{channel::{mpsc, oneshot}, prelude::*, select, stream::Stream};
|
|
use futures_timer::Delay;
|
|
use parity_scale_codec::Encode;
|
|
use pin_project::pin_project;
|
|
use polkadot_primitives::v1::{
|
|
CandidateEvent, CommittedCandidateReceipt, CoreState, EncodeAs, PersistedValidationData,
|
|
GroupRotationInfo, Hash, Id as ParaId, OccupiedCoreAssumption,
|
|
SessionIndex, Signed, SigningContext, ValidationCode, ValidatorId, ValidatorIndex, SessionInfo,
|
|
AuthorityDiscoveryId, GroupIndex,
|
|
};
|
|
use sp_core::{traits::SpawnNamed, Public};
|
|
use sp_application_crypto::AppKey;
|
|
use sp_keystore::{CryptoStore, SyncCryptoStorePtr, Error as KeystoreError};
|
|
use std::{
|
|
collections::{HashMap, hash_map::Entry}, convert::TryFrom, marker::Unpin, pin::Pin, task::{Poll, Context},
|
|
time::Duration, fmt, sync::Arc,
|
|
};
|
|
use streamunordered::{StreamUnordered, StreamYield};
|
|
use thiserror::Error;
|
|
|
|
pub use metered_channel as metered;
|
|
pub use polkadot_node_network_protocol::MIN_GOSSIP_PEERS;
|
|
|
|
/// Error classification.
|
|
pub use error_handling::{Fault, unwrap_non_fatal};
|
|
|
|
/// These reexports are required so that external crates can use the `delegated_subsystem` macro properly.
|
|
pub mod reexports {
|
|
pub use sp_core::traits::SpawnNamed;
|
|
pub use polkadot_node_subsystem::{
|
|
SpawnedSubsystem,
|
|
Subsystem,
|
|
SubsystemContext,
|
|
};
|
|
}
|
|
|
|
/// Convenient and efficient runtime info access.
|
|
pub mod runtime;
|
|
/// A rolling session window cache.
|
|
pub mod rolling_session_window;
|
|
|
|
mod error_handling;
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
/// Duration a job will wait after sending a stop signal before hard-aborting.
|
|
pub const JOB_GRACEFUL_STOP_DURATION: Duration = Duration::from_secs(1);
|
|
/// Capacity of channels to and from individual jobs
|
|
pub const JOB_CHANNEL_CAPACITY: usize = 64;
|
|
|
|
/// Utility errors
|
|
#[derive(Debug, Error)]
|
|
pub enum Error {
|
|
/// Attempted to send or receive on a oneshot channel which had been canceled
|
|
#[error(transparent)]
|
|
Oneshot(#[from] oneshot::Canceled),
|
|
/// Attempted to send on a MPSC channel which has been canceled
|
|
#[error(transparent)]
|
|
Mpsc(#[from] mpsc::SendError),
|
|
/// A subsystem error
|
|
#[error(transparent)]
|
|
Subsystem(#[from] SubsystemError),
|
|
/// An error in the Runtime API.
|
|
#[error(transparent)]
|
|
RuntimeApi(#[from] RuntimeApiError),
|
|
/// The type system wants this even though it doesn't make sense
|
|
#[error(transparent)]
|
|
Infallible(#[from] std::convert::Infallible),
|
|
/// Attempted to convert from an AllMessages to a FromJob, and failed.
|
|
#[error("AllMessage not relevant to Job")]
|
|
SenderConversion(String),
|
|
/// The local node is not a validator.
|
|
#[error("Node is not a validator")]
|
|
NotAValidator,
|
|
/// Already forwarding errors to another sender
|
|
#[error("AlreadyForwarding")]
|
|
AlreadyForwarding,
|
|
}
|
|
|
|
/// A type alias for Runtime API receivers.
|
|
pub type RuntimeApiReceiver<T> = oneshot::Receiver<Result<T, RuntimeApiError>>;
|
|
|
|
/// Request some data from the `RuntimeApi`.
|
|
pub async fn request_from_runtime<RequestBuilder, Response, Sender>(
|
|
parent: Hash,
|
|
sender: &mut Sender,
|
|
request_builder: RequestBuilder,
|
|
) -> RuntimeApiReceiver<Response>
|
|
where
|
|
RequestBuilder: FnOnce(RuntimeApiSender<Response>) -> RuntimeApiRequest,
|
|
Sender: SubsystemSender,
|
|
{
|
|
let (tx, rx) = oneshot::channel();
|
|
|
|
sender.send_message(RuntimeApiMessage::Request(parent, request_builder(tx)).into()).await;
|
|
|
|
rx
|
|
}
|
|
|
|
/// Construct specialized request functions for the runtime.
|
|
///
|
|
/// These would otherwise get pretty repetitive.
|
|
macro_rules! specialize_requests {
|
|
// expand return type name for documentation purposes
|
|
(fn $func_name:ident( $( $param_name:ident : $param_ty:ty ),* ) -> $return_ty:ty ; $request_variant:ident;) => {
|
|
specialize_requests!{
|
|
named stringify!($request_variant) ; fn $func_name( $( $param_name : $param_ty ),* ) -> $return_ty ; $request_variant;
|
|
}
|
|
};
|
|
|
|
// create a single specialized request function
|
|
(named $doc_name:expr ; fn $func_name:ident( $( $param_name:ident : $param_ty:ty ),* ) -> $return_ty:ty ; $request_variant:ident;) => {
|
|
#[doc = "Request `"]
|
|
#[doc = $doc_name]
|
|
#[doc = "` from the runtime"]
|
|
pub async fn $func_name(
|
|
parent: Hash,
|
|
$(
|
|
$param_name: $param_ty,
|
|
)*
|
|
sender: &mut impl SubsystemSender,
|
|
) -> RuntimeApiReceiver<$return_ty> {
|
|
request_from_runtime(parent, sender, |tx| RuntimeApiRequest::$request_variant(
|
|
$( $param_name, )* tx
|
|
)).await
|
|
}
|
|
};
|
|
|
|
// recursive decompose
|
|
(
|
|
fn $func_name:ident( $( $param_name:ident : $param_ty:ty ),* ) -> $return_ty:ty ; $request_variant:ident;
|
|
$(
|
|
fn $t_func_name:ident( $( $t_param_name:ident : $t_param_ty:ty ),* ) -> $t_return_ty:ty ; $t_request_variant:ident;
|
|
)+
|
|
) => {
|
|
specialize_requests!{
|
|
fn $func_name( $( $param_name : $param_ty ),* ) -> $return_ty ; $request_variant ;
|
|
}
|
|
specialize_requests!{
|
|
$(
|
|
fn $t_func_name( $( $t_param_name : $t_param_ty ),* ) -> $t_return_ty ; $t_request_variant ;
|
|
)+
|
|
}
|
|
};
|
|
}
|
|
|
|
specialize_requests! {
|
|
fn request_authorities() -> Vec<AuthorityDiscoveryId>; Authorities;
|
|
fn request_validators() -> Vec<ValidatorId>; Validators;
|
|
fn request_validator_groups() -> (Vec<Vec<ValidatorIndex>>, GroupRotationInfo); ValidatorGroups;
|
|
fn request_availability_cores() -> Vec<CoreState>; AvailabilityCores;
|
|
fn request_persisted_validation_data(para_id: ParaId, assumption: OccupiedCoreAssumption) -> Option<PersistedValidationData>; PersistedValidationData;
|
|
fn request_session_index_for_child() -> SessionIndex; SessionIndexForChild;
|
|
fn request_validation_code(para_id: ParaId, assumption: OccupiedCoreAssumption) -> Option<ValidationCode>; ValidationCode;
|
|
fn request_candidate_pending_availability(para_id: ParaId) -> Option<CommittedCandidateReceipt>; CandidatePendingAvailability;
|
|
fn request_candidate_events() -> Vec<CandidateEvent>; CandidateEvents;
|
|
fn request_session_info(index: SessionIndex) -> Option<SessionInfo>; SessionInfo;
|
|
}
|
|
|
|
/// From the given set of validators, find the first key we can sign with, if any.
|
|
pub async fn signing_key(validators: &[ValidatorId], keystore: &SyncCryptoStorePtr)
|
|
-> Option<ValidatorId>
|
|
{
|
|
signing_key_and_index(validators, keystore).await.map(|(k, _)| k)
|
|
}
|
|
|
|
/// From the given set of validators, find the first key we can sign with, if any, and return it
|
|
/// along with the validator index.
|
|
pub async fn signing_key_and_index(validators: &[ValidatorId], keystore: &SyncCryptoStorePtr)
|
|
-> Option<(ValidatorId, ValidatorIndex)>
|
|
{
|
|
for (i, v) in validators.iter().enumerate() {
|
|
if CryptoStore::has_keys(&**keystore, &[(v.to_raw_vec(), ValidatorId::ID)]).await {
|
|
return Some((v.clone(), ValidatorIndex(i as _)));
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Find the validator group the given validator index belongs to.
|
|
pub fn find_validator_group(groups: &[Vec<ValidatorIndex>], index: ValidatorIndex)
|
|
-> Option<GroupIndex>
|
|
{
|
|
groups.iter().enumerate().find_map(|(i, g)| if g.contains(&index) {
|
|
Some(GroupIndex(i as _))
|
|
} else {
|
|
None
|
|
})
|
|
}
|
|
|
|
/// Chooses a random subset of sqrt(v.len()), but at least `min` elements.
|
|
pub fn choose_random_sqrt_subset<T>(mut v: Vec<T>, min: usize) -> Vec<T> {
|
|
use rand::seq::SliceRandom as _;
|
|
let mut rng = rand::thread_rng();
|
|
v.shuffle(&mut rng);
|
|
|
|
let len = max_of_min_and_sqrt_len(v.len(), min);
|
|
v.truncate(len);
|
|
v
|
|
}
|
|
|
|
/// Returns bool with a probability of `max(len.sqrt(), min) / len`
|
|
/// being true.
|
|
pub fn gen_ratio_sqrt_subset(len: usize, min: usize) -> bool {
|
|
use rand::Rng as _;
|
|
let mut rng = rand::thread_rng();
|
|
let threshold = max_of_min_and_sqrt_len(len, min);
|
|
let n = rng.gen_range(0..len);
|
|
n < threshold
|
|
}
|
|
|
|
fn max_of_min_and_sqrt_len(len: usize, min: usize) -> usize {
|
|
let len_sqrt = (len as f64).sqrt() as usize;
|
|
std::cmp::max(min, len_sqrt)
|
|
}
|
|
|
|
/// Local validator information
|
|
///
|
|
/// It can be created if the local node is a validator in the context of a particular
|
|
/// relay chain block.
|
|
#[derive(Debug)]
|
|
pub struct Validator {
|
|
signing_context: SigningContext,
|
|
key: ValidatorId,
|
|
index: ValidatorIndex,
|
|
}
|
|
|
|
impl Validator {
|
|
/// Get a struct representing this node's validator if this node is in fact a validator in the context of the given block.
|
|
pub async fn new(
|
|
parent: Hash,
|
|
keystore: SyncCryptoStorePtr,
|
|
sender: &mut impl SubsystemSender,
|
|
) -> Result<Self, Error> {
|
|
// Note: request_validators and request_session_index_for_child do not and cannot
|
|
// run concurrently: they both have a mutable handle to the same sender.
|
|
// However, each of them returns a oneshot::Receiver, and those are resolved concurrently.
|
|
let (validators, session_index) = futures::try_join!(
|
|
request_validators(parent, sender).await,
|
|
request_session_index_for_child(parent, sender).await,
|
|
)?;
|
|
|
|
let signing_context = SigningContext {
|
|
session_index: session_index?,
|
|
parent_hash: parent,
|
|
};
|
|
|
|
let validators = validators?;
|
|
|
|
Self::construct(&validators, signing_context, keystore).await
|
|
}
|
|
|
|
/// Construct a validator instance without performing runtime fetches.
|
|
///
|
|
/// This can be useful if external code also needs the same data.
|
|
pub async fn construct(
|
|
validators: &[ValidatorId],
|
|
signing_context: SigningContext,
|
|
keystore: SyncCryptoStorePtr,
|
|
) -> Result<Self, Error> {
|
|
let (key, index) = signing_key_and_index(validators, &keystore)
|
|
.await
|
|
.ok_or(Error::NotAValidator)?;
|
|
|
|
Ok(Validator {
|
|
signing_context,
|
|
key,
|
|
index,
|
|
})
|
|
}
|
|
|
|
/// Get this validator's id.
|
|
pub fn id(&self) -> ValidatorId {
|
|
self.key.clone()
|
|
}
|
|
|
|
/// Get this validator's local index.
|
|
pub fn index(&self) -> ValidatorIndex {
|
|
self.index
|
|
}
|
|
|
|
/// Get the current signing context.
|
|
pub fn signing_context(&self) -> &SigningContext {
|
|
&self.signing_context
|
|
}
|
|
|
|
/// Sign a payload with this validator
|
|
pub async fn sign<Payload: EncodeAs<RealPayload>, RealPayload: Encode>(
|
|
&self,
|
|
keystore: SyncCryptoStorePtr,
|
|
payload: Payload,
|
|
) -> Result<Option<Signed<Payload, RealPayload>>, KeystoreError> {
|
|
Signed::sign(&keystore, payload, &self.signing_context, self.index, &self.key).await
|
|
}
|
|
}
|
|
|
|
struct AbortOnDrop(future::AbortHandle);
|
|
|
|
impl Drop for AbortOnDrop {
|
|
fn drop(&mut self) {
|
|
self.0.abort();
|
|
}
|
|
}
|
|
|
|
/// A JobHandle manages a particular job for a subsystem.
|
|
struct JobHandle<ToJob> {
|
|
_abort_handle: AbortOnDrop,
|
|
to_job: mpsc::Sender<ToJob>,
|
|
}
|
|
|
|
impl<ToJob> JobHandle<ToJob> {
|
|
/// Send a message to the job.
|
|
async fn send_msg(&mut self, msg: ToJob) -> Result<(), Error> {
|
|
self.to_job.send(msg).await.map_err(Into::into)
|
|
}
|
|
}
|
|
|
|
/// This module reexports Prometheus types and defines the [`Metrics`] trait.
|
|
pub mod metrics {
|
|
/// Reexport Substrate Prometheus types.
|
|
pub use substrate_prometheus_endpoint as prometheus;
|
|
|
|
|
|
/// Subsystem- or job-specific Prometheus metrics.
|
|
///
|
|
/// Usually implemented as a wrapper for `Option<ActualMetrics>`
|
|
/// to ensure `Default` bounds or as a dummy type ().
|
|
/// Prometheus metrics internally hold an `Arc` reference, so cloning them is fine.
|
|
pub trait Metrics: Default + Clone {
|
|
/// Try to register metrics in the Prometheus registry.
|
|
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError>;
|
|
|
|
/// Convenience method to register metrics in the optional Promethius registry.
|
|
///
|
|
/// If no registry is provided, returns `Default::default()`. Otherwise, returns the same
|
|
/// thing that `try_register` does.
|
|
fn register(registry: Option<&prometheus::Registry>) -> Result<Self, prometheus::PrometheusError> {
|
|
match registry {
|
|
None => Ok(Self::default()),
|
|
Some(registry) => Self::try_register(registry),
|
|
}
|
|
}
|
|
}
|
|
|
|
// dummy impl
|
|
impl Metrics for () {
|
|
fn try_register(_registry: &prometheus::Registry) -> Result<(), prometheus::PrometheusError> {
|
|
Ok(())
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Commands from a job to the broader subsystem.
|
|
pub enum FromJobCommand {
|
|
/// Spawn a child task on the executor.
|
|
Spawn(&'static str, Pin<Box<dyn Future<Output = ()> + Send>>),
|
|
/// Spawn a blocking child task on the executor's dedicated thread pool.
|
|
SpawnBlocking(&'static str, Pin<Box<dyn Future<Output = ()> + Send>>),
|
|
}
|
|
|
|
/// A sender for messages from jobs, as well as commands to the overseer.
|
|
#[derive(Clone)]
|
|
pub struct JobSender<S> {
|
|
sender: S,
|
|
from_job: mpsc::Sender<FromJobCommand>,
|
|
}
|
|
|
|
impl<S: SubsystemSender> JobSender<S> {
|
|
/// Get access to the underlying subsystem sender.
|
|
pub fn subsystem_sender(&mut self) -> &mut S {
|
|
&mut self.sender
|
|
}
|
|
|
|
/// Send a direct message to some other `Subsystem`, routed based on message type.
|
|
pub async fn send_message(&mut self, msg: AllMessages) {
|
|
self.sender.send_message(msg).await
|
|
}
|
|
|
|
/// Send multiple direct messages to other `Subsystem`s, routed based on message type.
|
|
pub async fn send_messages<T>(&mut self, msgs: T)
|
|
where T: IntoIterator<Item = AllMessages> + Send, T::IntoIter: Send
|
|
{
|
|
self.sender.send_messages(msgs).await
|
|
}
|
|
|
|
|
|
/// Send a message onto the unbounded queue of some other `Subsystem`, routed based on message
|
|
/// type.
|
|
///
|
|
/// This function should be used only when there is some other bounding factor on the messages
|
|
/// sent with it. Otherwise, it risks a memory leak.
|
|
pub fn send_unbounded_message(&mut self, msg: AllMessages) {
|
|
self.sender.send_unbounded_message(msg)
|
|
}
|
|
|
|
/// Send a command to the subsystem, to be relayed onwards to the overseer.
|
|
pub async fn send_command(&mut self, msg: FromJobCommand) -> Result<(), mpsc::SendError> {
|
|
self.from_job.send(msg).await
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait]
|
|
impl<S: SubsystemSender> SubsystemSender for JobSender<S> {
|
|
async fn send_message(&mut self, msg: AllMessages) {
|
|
self.sender.send_message(msg).await
|
|
}
|
|
|
|
async fn send_messages<T>(&mut self, msgs: T)
|
|
where T: IntoIterator<Item = AllMessages> + Send, T::IntoIter: Send
|
|
{
|
|
self.sender.send_messages(msgs).await
|
|
}
|
|
|
|
fn send_unbounded_message(&mut self, msg: AllMessages) {
|
|
self.sender.send_unbounded_message(msg)
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for FromJobCommand {
|
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
Self::Spawn(name, _) => write!(fmt, "FromJobCommand::Spawn({})", name),
|
|
Self::SpawnBlocking(name, _) => write!(fmt, "FromJobCommand::SpawnBlocking({})", name),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// This trait governs jobs.
|
|
///
|
|
/// Jobs are instantiated and killed automatically on appropriate overseer messages.
|
|
/// Other messages are passed along to and from the job via the overseer to other subsystems.
|
|
pub trait JobTrait: Unpin + Sized {
|
|
/// Message type used to send messages to the job.
|
|
type ToJob: 'static + BoundToRelayParent + Send;
|
|
/// Job runtime error.
|
|
type Error: 'static + std::error::Error + Send;
|
|
/// Extra arguments this job needs to run properly.
|
|
///
|
|
/// If no extra information is needed, it is perfectly acceptable to set it to `()`.
|
|
type RunArgs: 'static + Send;
|
|
/// Subsystem-specific Prometheus metrics.
|
|
///
|
|
/// Jobs spawned by one subsystem should share the same
|
|
/// instance of metrics (use `.clone()`).
|
|
/// The `delegate_subsystem!` macro should take care of this.
|
|
type Metrics: 'static + metrics::Metrics + Send;
|
|
|
|
/// Name of the job, i.e. `CandidateBackingJob`
|
|
const NAME: &'static str;
|
|
|
|
/// Run a job for the given relay `parent`.
|
|
///
|
|
/// The job should be ended when `receiver` returns `None`.
|
|
fn run<S: SubsystemSender>(
|
|
parent: Hash,
|
|
span: Arc<jaeger::Span>,
|
|
run_args: Self::RunArgs,
|
|
metrics: Self::Metrics,
|
|
receiver: mpsc::Receiver<Self::ToJob>,
|
|
sender: JobSender<S>,
|
|
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>>;
|
|
}
|
|
|
|
/// Error which can be returned by the jobs manager
|
|
///
|
|
/// Wraps the utility error type and the job-specific error
|
|
#[derive(Debug, Error)]
|
|
pub enum JobsError<JobError: std::fmt::Debug + std::error::Error + 'static> {
|
|
/// utility error
|
|
#[error("Utility")]
|
|
Utility(#[source] Error),
|
|
/// internal job error
|
|
#[error("Internal")]
|
|
Job(#[source] JobError),
|
|
}
|
|
|
|
/// Jobs manager for a subsystem
|
|
///
|
|
/// - Spawns new jobs for a given relay-parent on demand.
|
|
/// - Closes old jobs for a given relay-parent on demand.
|
|
/// - Dispatches messages to the appropriate job for a given relay-parent.
|
|
/// - When dropped, aborts all remaining jobs.
|
|
/// - implements `Stream<Item=FromJobCommand>`, collecting all messages from subordinate jobs.
|
|
#[pin_project]
|
|
struct Jobs<Spawner, ToJob> {
|
|
spawner: Spawner,
|
|
running: HashMap<Hash, JobHandle<ToJob>>,
|
|
outgoing_msgs: StreamUnordered<mpsc::Receiver<FromJobCommand>>,
|
|
}
|
|
|
|
impl<Spawner: SpawnNamed, ToJob: Send + 'static> Jobs<Spawner, ToJob> {
|
|
/// Create a new Jobs manager which handles spawning appropriate jobs.
|
|
pub fn new(spawner: Spawner) -> Self {
|
|
Self {
|
|
spawner,
|
|
running: HashMap::new(),
|
|
outgoing_msgs: StreamUnordered::new(),
|
|
}
|
|
}
|
|
|
|
/// Spawn a new job for this `parent_hash`, with whatever args are appropriate.
|
|
fn spawn_job<Job, Sender>(
|
|
&mut self,
|
|
parent_hash: Hash,
|
|
span: Arc<jaeger::Span>,
|
|
run_args: Job::RunArgs,
|
|
metrics: Job::Metrics,
|
|
sender: Sender,
|
|
)
|
|
where Job: JobTrait<ToJob = ToJob>, Sender: SubsystemSender,
|
|
{
|
|
let (to_job_tx, to_job_rx) = mpsc::channel(JOB_CHANNEL_CAPACITY);
|
|
let (from_job_tx, from_job_rx) = mpsc::channel(JOB_CHANNEL_CAPACITY);
|
|
|
|
let (future, abort_handle) = future::abortable(async move {
|
|
if let Err(e) = Job::run(
|
|
parent_hash,
|
|
span,
|
|
run_args,
|
|
metrics,
|
|
to_job_rx,
|
|
JobSender {
|
|
sender,
|
|
from_job: from_job_tx,
|
|
},
|
|
).await {
|
|
tracing::error!(
|
|
job = Job::NAME,
|
|
parent_hash = %parent_hash,
|
|
err = ?e,
|
|
"job finished with an error",
|
|
);
|
|
|
|
return Err(e);
|
|
}
|
|
|
|
Ok(())
|
|
});
|
|
|
|
self.spawner.spawn(Job::NAME, future.map(drop).boxed());
|
|
self.outgoing_msgs.push(from_job_rx);
|
|
|
|
let handle = JobHandle {
|
|
_abort_handle: AbortOnDrop(abort_handle),
|
|
to_job: to_job_tx,
|
|
};
|
|
|
|
self.running.insert(parent_hash, handle);
|
|
}
|
|
|
|
/// Stop the job associated with this `parent_hash`.
|
|
pub async fn stop_job(&mut self, parent_hash: Hash) {
|
|
self.running.remove(&parent_hash);
|
|
}
|
|
|
|
/// Send a message to the appropriate job for this `parent_hash`.
|
|
async fn send_msg(&mut self, parent_hash: Hash, msg: ToJob) {
|
|
if let Entry::Occupied(mut job) = self.running.entry(parent_hash) {
|
|
if job.get_mut().send_msg(msg).await.is_err() {
|
|
job.remove();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<Spawner, ToJob> Stream for Jobs<Spawner, ToJob>
|
|
where
|
|
Spawner: SpawnNamed,
|
|
{
|
|
type Item = FromJobCommand;
|
|
|
|
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
|
loop {
|
|
match Pin::new(&mut self.outgoing_msgs).poll_next(cx) {
|
|
Poll::Pending => return Poll::Pending,
|
|
Poll::Ready(r) => match r.map(|v| v.0) {
|
|
Some(StreamYield::Item(msg)) => return Poll::Ready(Some(msg)),
|
|
// If a job is finished, rerun the loop
|
|
Some(StreamYield::Finished(_)) => continue,
|
|
// Don't end if there are no jobs running
|
|
None => return Poll::Pending,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<Spawner, ToJob> stream::FusedStream for Jobs<Spawner, ToJob>
|
|
where
|
|
Spawner: SpawnNamed,
|
|
{
|
|
fn is_terminated(&self) -> bool {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Parameters to a job subsystem.
|
|
struct JobSubsystemParams<Spawner, RunArgs, Metrics> {
|
|
/// A spawner for sub-tasks.
|
|
spawner: Spawner,
|
|
/// Arguments to each job.
|
|
run_args: RunArgs,
|
|
/// Metrics for the subsystem.
|
|
metrics: Metrics,
|
|
}
|
|
|
|
/// A subsystem which wraps jobs.
|
|
///
|
|
/// Conceptually, this is very simple: it just loops forever.
|
|
///
|
|
/// - On incoming overseer messages, it starts or stops jobs as appropriate.
|
|
/// - On other incoming messages, if they can be converted into Job::ToJob and
|
|
/// include a hash, then they're forwarded to the appropriate individual job.
|
|
/// - On outgoing messages from the jobs, it forwards them to the overseer.
|
|
pub struct JobSubsystem<Job: JobTrait, Spawner> {
|
|
params: JobSubsystemParams<Spawner, Job::RunArgs, Job::Metrics>,
|
|
_marker: std::marker::PhantomData<Job>,
|
|
}
|
|
|
|
impl<Job: JobTrait, Spawner> JobSubsystem<Job, Spawner> {
|
|
/// Create a new `JobSubsystem`.
|
|
pub fn new(spawner: Spawner, run_args: Job::RunArgs, metrics: Job::Metrics) -> Self {
|
|
JobSubsystem {
|
|
params: JobSubsystemParams {
|
|
spawner,
|
|
run_args,
|
|
metrics,
|
|
},
|
|
_marker: std::marker::PhantomData,
|
|
}
|
|
}
|
|
|
|
/// Run the subsystem to completion.
|
|
pub async fn run<Context>(self, mut ctx: Context)
|
|
where
|
|
Spawner: SpawnNamed + Send + Clone + Unpin + 'static,
|
|
Context: SubsystemContext,
|
|
Job: 'static + JobTrait + Send,
|
|
Job::RunArgs: Clone + Sync,
|
|
Job::ToJob: From<<Context as SubsystemContext>::Message> + Sync,
|
|
Job::Metrics: Sync,
|
|
{
|
|
let JobSubsystem {
|
|
params: JobSubsystemParams {
|
|
spawner,
|
|
run_args,
|
|
metrics,
|
|
},
|
|
..
|
|
} = self;
|
|
|
|
let mut jobs = Jobs::new(spawner);
|
|
|
|
loop {
|
|
select! {
|
|
incoming = ctx.recv().fuse() => {
|
|
match incoming {
|
|
Ok(FromOverseer::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
|
activated,
|
|
deactivated,
|
|
}))) => {
|
|
for activated in activated {
|
|
let sender: Context::Sender = ctx.sender().clone();
|
|
jobs.spawn_job::<Job, _>(
|
|
activated.hash,
|
|
activated.span,
|
|
run_args.clone(),
|
|
metrics.clone(),
|
|
sender,
|
|
)
|
|
}
|
|
|
|
for hash in deactivated {
|
|
jobs.stop_job(hash).await;
|
|
}
|
|
}
|
|
Ok(FromOverseer::Signal(OverseerSignal::Conclude)) => {
|
|
jobs.running.clear();
|
|
break;
|
|
}
|
|
Ok(FromOverseer::Signal(OverseerSignal::BlockFinalized(..))) => {}
|
|
Ok(FromOverseer::Communication { msg }) => {
|
|
if let Ok(to_job) = <Job::ToJob>::try_from(msg) {
|
|
jobs.send_msg(to_job.relay_parent(), to_job).await;
|
|
}
|
|
}
|
|
Err(err) => {
|
|
tracing::error!(
|
|
job = Job::NAME,
|
|
err = ?err,
|
|
"error receiving message from subsystem context for job",
|
|
);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
outgoing = jobs.next() => {
|
|
let res = match outgoing.expect("the Jobs stream never ends; qed") {
|
|
FromJobCommand::Spawn(name, task) => ctx.spawn(name, task).await,
|
|
FromJobCommand::SpawnBlocking(name, task)
|
|
=> ctx.spawn_blocking(name, task).await,
|
|
};
|
|
|
|
if let Err(e) = res {
|
|
tracing::warn!(err = ?e, "failed to handle command from job");
|
|
}
|
|
}
|
|
complete => break,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<Context, Job, Spawner> Subsystem<Context> for JobSubsystem<Job, Spawner>
|
|
where
|
|
Spawner: SpawnNamed + Send + Clone + Unpin + 'static,
|
|
Context: SubsystemContext,
|
|
Job: 'static + JobTrait + Send,
|
|
Job::RunArgs: Clone + Sync,
|
|
Job::ToJob: From<<Context as SubsystemContext>::Message> + Sync,
|
|
Job::Metrics: Sync,
|
|
{
|
|
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
|
let future = Box::pin(async move {
|
|
self.run(ctx).await;
|
|
Ok(())
|
|
});
|
|
|
|
SpawnedSubsystem {
|
|
name: Job::NAME.strip_suffix("Job").unwrap_or(Job::NAME),
|
|
future,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A future that wraps another future with a `Delay` allowing for time-limited futures.
|
|
#[pin_project]
|
|
pub struct Timeout<F: Future> {
|
|
#[pin]
|
|
future: F,
|
|
#[pin]
|
|
delay: Delay,
|
|
}
|
|
|
|
/// Extends `Future` to allow time-limited futures.
|
|
pub trait TimeoutExt: Future {
|
|
/// Adds a timeout of `duration` to the given `Future`.
|
|
/// Returns a new `Future`.
|
|
fn timeout(self, duration: Duration) -> Timeout<Self>
|
|
where
|
|
Self: Sized,
|
|
{
|
|
Timeout {
|
|
future: self,
|
|
delay: Delay::new(duration),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<F: Future> TimeoutExt for F {}
|
|
|
|
impl<F: Future> Future for Timeout<F> {
|
|
type Output = Option<F::Output>;
|
|
|
|
fn poll(self: Pin<&mut Self>, ctx: &mut Context) -> Poll<Self::Output> {
|
|
let this = self.project();
|
|
|
|
if this.delay.poll(ctx).is_ready() {
|
|
return Poll::Ready(None);
|
|
}
|
|
|
|
if let Poll::Ready(output) = this.future.poll(ctx) {
|
|
return Poll::Ready(Some(output));
|
|
}
|
|
|
|
Poll::Pending
|
|
}
|
|
}
|
|
|
|
|
|
#[derive(Copy, Clone)]
|
|
enum MetronomeState {
|
|
Snooze,
|
|
SetAlarm,
|
|
}
|
|
|
|
/// Create a stream of ticks with a defined cycle duration.
|
|
pub struct Metronome {
|
|
delay: Delay,
|
|
period: Duration,
|
|
state: MetronomeState,
|
|
}
|
|
|
|
impl Metronome
|
|
{
|
|
/// Create a new metronome source with a defined cycle duration.
|
|
pub fn new(cycle: Duration) -> Self {
|
|
let period = cycle.into();
|
|
Self {
|
|
period,
|
|
delay: Delay::new(period),
|
|
state: MetronomeState::Snooze,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl futures::Stream for Metronome
|
|
{
|
|
type Item = ();
|
|
fn poll_next(
|
|
mut self: Pin<&mut Self>,
|
|
cx: &mut Context<'_>
|
|
) -> Poll<Option<Self::Item>> {
|
|
loop {
|
|
match self.state {
|
|
MetronomeState::SetAlarm => {
|
|
let val = self.period.clone();
|
|
self.delay.reset(val);
|
|
self.state = MetronomeState::Snooze;
|
|
}
|
|
MetronomeState::Snooze => {
|
|
if !Pin::new(&mut self.delay).poll(cx).is_ready() {
|
|
break
|
|
}
|
|
self.state = MetronomeState::SetAlarm;
|
|
return Poll::Ready(Some(()));
|
|
}
|
|
}
|
|
}
|
|
Poll::Pending
|
|
}
|
|
}
|