// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see .
//! The scheduler module for parachains and parathreads.
//!
//! This module is responsible for two main tasks:
//! - Partitioning validators into groups and assigning groups to parachains and parathreads
//! - Scheduling parachains and parathreads
//!
//! It aims to achieve these tasks with these goals in mind:
//! - It should be possible to know at least a block ahead-of-time, ideally more, which validators
//! are going to be assigned to which parachains.
//! - Parachains that have a candidate pending availability in this fork of the chain should not be
//! assigned.
//! - Validator assignments should not be gameable. Malicious cartels should not be able to
//! manipulate the scheduler to assign themselves as desired.
//! - High or close to optimal throughput of parachains and parathreads. Work among validator groups
//! should be balanced.
//!
//! The Scheduler manages resource allocation using the concept of "Availability Cores".
//! There will be one availability core for each parachain, and a fixed number of cores
//! used for multiplexing parathreads. Validators will be partitioned into groups, with the same
//! number of groups as availability cores. Validator groups will be assigned to different
//! availability cores over time.
use crate::{configuration, initializer::SessionChangeNotification, paras};
use frame_support::pallet_prelude::*;
use frame_system::pallet_prelude::BlockNumberFor;
pub use polkadot_core_primitives::v2::BlockNumber;
use primitives::{
CoreIndex, GroupIndex, GroupRotationInfo, Id as ParaId, ScheduledCore, ValidatorIndex,
};
use sp_runtime::traits::One;
use sp_std::{
collections::{btree_map::BTreeMap, vec_deque::VecDeque},
prelude::*,
};
pub mod common;
use common::{Assignment, AssignmentProvider, AssignmentProviderConfig};
pub use pallet::*;
#[cfg(test)]
mod tests;
const LOG_TARGET: &str = "runtime::parachains::scheduler";
pub mod migration;
#[frame_support::pallet]
pub mod pallet {
use super::*;
const STORAGE_VERSION: StorageVersion = StorageVersion::new(1);
#[pallet::pallet]
#[pallet::without_storage_info]
#[pallet::storage_version(STORAGE_VERSION)]
pub struct Pallet(_);
#[pallet::config]
pub trait Config: frame_system::Config + configuration::Config + paras::Config {
type AssignmentProvider: AssignmentProvider>;
}
/// All the validator groups. One for each core. Indices are into `ActiveValidators` - not the
/// broader set of Polkadot validators, but instead just the subset used for parachains during
/// this session.
///
/// Bound: The number of cores is the sum of the numbers of parachains and parathread
/// multiplexers. Reasonably, 100-1000. The dominant factor is the number of validators: safe
/// upper bound at 10k.
#[pallet::storage]
#[pallet::getter(fn validator_groups)]
pub(crate) type ValidatorGroups = StorageValue<_, Vec>, ValueQuery>;
/// One entry for each availability core. Entries are `None` if the core is not currently
/// occupied. Can be temporarily `Some` if scheduled but not occupied.
/// The i'th parachain belongs to the i'th core, with the remaining cores all being
/// parathread-multiplexers.
///
/// Bounded by the maximum of either of these two values:
/// * The number of parachains and parathread multiplexers
/// * The number of validators divided by `configuration.max_validators_per_core`.
#[pallet::storage]
#[pallet::getter(fn availability_cores)]
pub(crate) type AvailabilityCores =
StorageValue<_, Vec>>, ValueQuery>;
/// Representation of a core in `AvailabilityCores`.
///
/// This is not to be confused with `CoreState` which is an enriched variant of this and exposed
/// to the node side. It also provides information about scheduled/upcoming assignments for
/// example and is computed on the fly in the `availability_cores` runtime call.
#[derive(Clone, Encode, Decode, TypeInfo, RuntimeDebug)]
#[cfg_attr(feature = "std", derive(PartialEq))]
pub enum CoreOccupied {
/// No candidate is waiting availability on this core right now (the core is not occupied).
Free,
/// A para is currently waiting for availability/inclusion on this core.
Paras(ParasEntry),
}
impl CoreOccupied {
/// Is core free?
pub fn is_free(&self) -> bool {
matches!(self, Self::Free)
}
}
/// Reasons a core might be freed.
#[derive(Clone, Copy)]
pub enum FreedReason {
/// The core's work concluded and the parablock assigned to it is considered available.
Concluded,
/// The core's work timed out.
TimedOut,
}
/// The block number where the session start occurred. Used to track how many group rotations
/// have occurred.
///
/// Note that in the context of parachains modules the session change is signaled during
/// the block and enacted at the end of the block (at the finalization stage, to be exact).
/// Thus for all intents and purposes the effect of the session change is observed at the
/// block following the session change, block number of which we save in this storage value.
#[pallet::storage]
#[pallet::getter(fn session_start_block)]
pub(crate) type SessionStartBlock = StorageValue<_, BlockNumberFor, ValueQuery>;
/// One entry for each availability core. The `VecDeque` represents the assignments to be
/// scheduled on that core. `None` is used to signal to not schedule the next para of the core
/// as there is one currently being scheduled. Not using `None` here would overwrite the
/// `CoreState` in the runtime API. The value contained here will not be valid after the end of
/// a block. Runtime APIs should be used to determine scheduled cores/ for the upcoming block.
#[pallet::storage]
#[pallet::getter(fn claimqueue)]
pub(crate) type ClaimQueue = StorageValue<
_,
BTreeMap>>>>,
ValueQuery,
>;
/// Assignments as tracked in the claim queue.
#[derive(Clone, Encode, Decode, TypeInfo, PartialEq, RuntimeDebug)]
pub struct ParasEntry {
/// The underlying `Assignment`
pub assignment: Assignment,
/// The number of times the entry has timed out in availability already.
pub availability_timeouts: u32,
/// The block height until this entry needs to be backed.
///
/// If missed the entry will be removed from the claim queue without ever having occupied
/// the core.
pub ttl: N,
}
impl ParasEntry {
/// Return `Id` from the underlying `Assignment`.
pub fn para_id(&self) -> ParaId {
self.assignment.para_id
}
/// Create a new `ParasEntry`.
pub fn new(assignment: Assignment, now: N) -> Self {
ParasEntry { assignment, availability_timeouts: 0, ttl: now }
}
}
/// How a core is mapped to a backing group and a `ParaId`
#[derive(Clone, Encode, Decode, PartialEq, TypeInfo)]
#[cfg_attr(feature = "std", derive(Debug))]
pub struct CoreAssignment {
/// The core that is assigned.
pub core: CoreIndex,
/// The para id and accompanying information needed to collate and back a parablock.
pub paras_entry: ParasEntry,
}
impl CoreAssignment {
/// Returns the [`ParaId`] of the assignment.
pub fn para_id(&self) -> ParaId {
self.paras_entry.para_id()
}
/// Returns the inner [`ParasEntry`] of the assignment.
pub fn to_paras_entry(self) -> ParasEntry {
self.paras_entry
}
}
/// Availability timeout status of a core.
pub(crate) struct AvailabilityTimeoutStatus {
/// Is the core already timed out?
///
/// If this is true the core will be freed at this block.
pub timed_out: bool,
/// When does this core timeout.
///
/// The block number the core times out. If `timed_out` is true, this will correspond to
/// now (current block number).
pub live_until: BlockNumber,
}
}
type PositionInClaimqueue = u32;
type TimedoutParas = BTreeMap>>;
type ConcludedParas = BTreeMap;
impl Pallet {
/// Called by the initializer to initialize the scheduler pallet.
pub(crate) fn initializer_initialize(_now: BlockNumberFor) -> Weight {
Weight::zero()
}
/// Called by the initializer to finalize the scheduler pallet.
pub(crate) fn initializer_finalize() {}
/// Called before the initializer notifies of a new session.
pub(crate) fn pre_new_session() {
Self::push_claimqueue_items_to_assignment_provider();
Self::push_occupied_cores_to_assignment_provider();
}
/// Called by the initializer to note that a new session has started.
pub(crate) fn initializer_on_new_session(
notification: &SessionChangeNotification>,
) {
let SessionChangeNotification { validators, new_config, .. } = notification;
let config = new_config;
let n_cores = core::cmp::max(
T::AssignmentProvider::session_core_count(),
match config.max_validators_per_core {
Some(x) if x != 0 => validators.len() as u32 / x,
_ => 0,
},
);
AvailabilityCores::::mutate(|cores| {
cores.resize(n_cores as _, CoreOccupied::Free);
});
// shuffle validators into groups.
if n_cores == 0 || validators.is_empty() {
ValidatorGroups::::set(Vec::new());
} else {
let group_base_size = validators.len() / n_cores as usize;
let n_larger_groups = validators.len() % n_cores as usize;
// Groups contain indices into the validators from the session change notification,
// which are already shuffled.
let mut groups: Vec> = Vec::new();
for i in 0..n_larger_groups {
let offset = (group_base_size + 1) * i;
groups.push(
(0..group_base_size + 1)
.map(|j| offset + j)
.map(|j| ValidatorIndex(j as _))
.collect(),
);
}
for i in 0..(n_cores as usize - n_larger_groups) {
let offset = (n_larger_groups * (group_base_size + 1)) + (i * group_base_size);
groups.push(
(0..group_base_size)
.map(|j| offset + j)
.map(|j| ValidatorIndex(j as _))
.collect(),
);
}
ValidatorGroups::::set(groups);
}
let now = >::block_number() + One::one();
>::set(now);
}
/// Free unassigned cores. Provide a list of cores that should be considered newly-freed along
/// with the reason for them being freed. Returns a tuple of concluded and timedout paras.
fn free_cores(
just_freed_cores: impl IntoIterator,
) -> (ConcludedParas, TimedoutParas) {
let mut timedout_paras: BTreeMap>> =
BTreeMap::new();
let mut concluded_paras = BTreeMap::new();
AvailabilityCores::::mutate(|cores| {
let c_len = cores.len();
just_freed_cores
.into_iter()
.filter(|(freed_index, _)| (freed_index.0 as usize) < c_len)
.for_each(|(freed_index, freed_reason)| {
match &cores[freed_index.0 as usize] {
CoreOccupied::Free => {},
CoreOccupied::Paras(entry) => {
match freed_reason {
FreedReason::Concluded => {
concluded_paras.insert(freed_index, entry.para_id());
},
FreedReason::TimedOut => {
timedout_paras.insert(freed_index, entry.clone());
},
};
},
};
cores[freed_index.0 as usize] = CoreOccupied::Free;
})
});
(concluded_paras, timedout_paras)
}
/// Note that the given cores have become occupied. Update the claimqueue accordingly.
pub(crate) fn occupied(
now_occupied: BTreeMap,
) -> BTreeMap {
let mut availability_cores = AvailabilityCores::::get();
log::debug!(target: LOG_TARGET, "[occupied] now_occupied {:?}", now_occupied);
let pos_mapping: BTreeMap = now_occupied
.iter()
.flat_map(|(core_idx, para_id)| {
match Self::remove_from_claimqueue(*core_idx, *para_id) {
Err(e) => {
log::debug!(
target: LOG_TARGET,
"[occupied] error on remove_from_claimqueue {}",
e
);
None
},
Ok((pos_in_claimqueue, pe)) => {
// is this correct?
availability_cores[core_idx.0 as usize] = CoreOccupied::Paras(pe);
Some((*core_idx, pos_in_claimqueue))
},
}
})
.collect();
// Drop expired claims after processing now_occupied.
Self::drop_expired_claims_from_claimqueue();
AvailabilityCores::::set(availability_cores);
pos_mapping
}
/// Iterates through every element in all claim queues and tries to add new assignments from the
/// `AssignmentProvider`. A claim is considered expired if it's `ttl` field is lower than the
/// current block height.
fn drop_expired_claims_from_claimqueue() {
let now = >::block_number();
let availability_cores = AvailabilityCores::::get();
ClaimQueue::::mutate(|cq| {
for (idx, _) in (0u32..).zip(availability_cores) {
let core_idx = CoreIndex(idx);
if let Some(core_claimqueue) = cq.get_mut(&core_idx) {
let mut dropped_claims: Vec