638 lines
18 KiB
Rust
638 lines
18 KiB
Rust
// Copyright (C) Parity Technologies (UK) Ltd.
|
|
// This file is part of Pezkuwi.
|
|
|
|
// Pezkuwi is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
|
|
// Pezkuwi is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//! The collation generation subsystem is the interface between pezkuwi and the collators.
|
|
//!
|
|
//! # Protocol
|
|
//!
|
|
//! On every `ActiveLeavesUpdate`:
|
|
//!
|
|
//! * If there is no collation generation config, ignore.
|
|
//! * Otherwise, for each `activated` head in the update:
|
|
//! * Determine if the para is scheduled on any core by fetching the `availability_cores` Runtime
|
|
//! API.
|
|
//! * Use the Runtime API subsystem to fetch the full validation data.
|
|
//! * Invoke the `collator`, and use its outputs to produce a
|
|
//! [`pezkuwi_primitives::CandidateReceiptV2`], signed with the configuration's `key`.
|
|
//! * Dispatch a [`CollatorProtocolMessage::DistributeCollation`]`(receipt, pov)`.
|
|
|
|
#![deny(missing_docs)]
|
|
|
|
use codec::Encode;
|
|
use error::{Error, Result};
|
|
use futures::{channel::oneshot, future::FutureExt, select};
|
|
use pezkuwi_pez_node_primitives::{
|
|
AvailableData, Collation, CollationGenerationConfig, CollationSecondedSignal, PoV,
|
|
SubmitCollationParams,
|
|
};
|
|
use pezkuwi_node_subsystem::{
|
|
messages::{CollationGenerationMessage, CollatorProtocolMessage, RuntimeApiMessage},
|
|
overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem,
|
|
SubsystemContext, SubsystemError, SubsystemResult, SubsystemSender,
|
|
};
|
|
use pezkuwi_node_subsystem_util::{
|
|
request_claim_queue, request_persisted_validation_data, request_session_index_for_child,
|
|
request_validation_code_hash, request_validators, runtime::ClaimQueueSnapshot,
|
|
};
|
|
use pezkuwi_primitives::{
|
|
transpose_claim_queue, CandidateCommitments, CandidateDescriptorV2,
|
|
CommittedCandidateReceiptV2, CoreIndex, Hash, Id as ParaId, OccupiedCoreAssumption,
|
|
PersistedValidationData, SessionIndex, TransposedClaimQueue, ValidationCodeHash,
|
|
};
|
|
use schnellru::{ByLength, LruMap};
|
|
use std::{collections::HashSet, sync::Arc};
|
|
|
|
mod error;
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
mod metrics;
|
|
use self::metrics::Metrics;
|
|
|
|
const LOG_TARGET: &'static str = "teyrchain::collation-generation";
|
|
|
|
/// Collation Generation Subsystem
|
|
pub struct CollationGenerationSubsystem {
|
|
config: Option<Arc<CollationGenerationConfig>>,
|
|
session_info_cache: SessionInfoCache,
|
|
metrics: Metrics,
|
|
}
|
|
|
|
#[overseer::contextbounds(CollationGeneration, prefix = self::overseer)]
|
|
impl CollationGenerationSubsystem {
|
|
/// Create a new instance of the `CollationGenerationSubsystem`.
|
|
pub fn new(metrics: Metrics) -> Self {
|
|
Self { config: None, metrics, session_info_cache: SessionInfoCache::new() }
|
|
}
|
|
|
|
/// Run this subsystem
|
|
///
|
|
/// Conceptually, this is very simple: it just loops forever.
|
|
///
|
|
/// - On incoming overseer messages, it starts or stops jobs as appropriate.
|
|
/// - On other incoming messages, if they can be converted into `Job::ToJob` and include a hash,
|
|
/// then they're forwarded to the appropriate individual job.
|
|
/// - On outgoing messages from the jobs, it forwards them to the overseer.
|
|
///
|
|
/// If `err_tx` is not `None`, errors are forwarded onto that channel as they occur.
|
|
/// Otherwise, most are logged and then discarded.
|
|
async fn run<Context>(mut self, mut ctx: Context) {
|
|
loop {
|
|
select! {
|
|
incoming = ctx.recv().fuse() => {
|
|
if self.handle_incoming::<Context>(incoming, &mut ctx).await {
|
|
break;
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
// handle an incoming message. return true if we should break afterwards.
|
|
// note: this doesn't strictly need to be a separate function; it's more an administrative
|
|
// function so that we don't clutter the run loop. It could in principle be inlined directly
|
|
// into there. it should hopefully therefore be ok that it's an async function mutably borrowing
|
|
// self.
|
|
async fn handle_incoming<Context>(
|
|
&mut self,
|
|
incoming: SubsystemResult<FromOrchestra<<Context as SubsystemContext>::Message>>,
|
|
ctx: &mut Context,
|
|
) -> bool {
|
|
match incoming {
|
|
Ok(FromOrchestra::Signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate {
|
|
activated,
|
|
..
|
|
}))) => {
|
|
if let Err(err) = self.handle_new_activation(activated.map(|v| v.hash), ctx).await {
|
|
gum::warn!(target: LOG_TARGET, err = ?err, "failed to handle new activation");
|
|
}
|
|
|
|
false
|
|
},
|
|
Ok(FromOrchestra::Signal(OverseerSignal::Conclude)) => true,
|
|
Ok(FromOrchestra::Communication {
|
|
msg: CollationGenerationMessage::Initialize(config),
|
|
}) => {
|
|
if self.config.is_some() {
|
|
gum::error!(target: LOG_TARGET, "double initialization");
|
|
} else {
|
|
self.config = Some(Arc::new(config));
|
|
}
|
|
false
|
|
},
|
|
Ok(FromOrchestra::Communication {
|
|
msg: CollationGenerationMessage::Reinitialize(config),
|
|
}) => {
|
|
self.config = Some(Arc::new(config));
|
|
false
|
|
},
|
|
Ok(FromOrchestra::Communication {
|
|
msg: CollationGenerationMessage::SubmitCollation(params),
|
|
}) => {
|
|
if let Err(err) = self.handle_submit_collation(params, ctx).await {
|
|
gum::error!(target: LOG_TARGET, ?err, "Failed to submit collation");
|
|
}
|
|
|
|
false
|
|
},
|
|
Ok(FromOrchestra::Signal(OverseerSignal::BlockFinalized(..))) => false,
|
|
Err(err) => {
|
|
gum::error!(
|
|
target: LOG_TARGET,
|
|
err = ?err,
|
|
"error receiving message from subsystem context: {:?}",
|
|
err
|
|
);
|
|
true
|
|
},
|
|
}
|
|
}
|
|
|
|
async fn handle_submit_collation<Context>(
|
|
&mut self,
|
|
params: SubmitCollationParams,
|
|
ctx: &mut Context,
|
|
) -> Result<()> {
|
|
let Some(config) = &self.config else {
|
|
return Err(Error::SubmittedBeforeInit);
|
|
};
|
|
let _timer = self.metrics.time_submit_collation();
|
|
|
|
let SubmitCollationParams {
|
|
relay_parent,
|
|
collation,
|
|
parent_head,
|
|
validation_code_hash,
|
|
result_sender,
|
|
core_index,
|
|
} = params;
|
|
|
|
let mut validation_data = match request_persisted_validation_data(
|
|
relay_parent,
|
|
config.para_id,
|
|
OccupiedCoreAssumption::TimedOut,
|
|
ctx.sender(),
|
|
)
|
|
.await
|
|
.await??
|
|
{
|
|
Some(v) => v,
|
|
None => {
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
relay_parent = ?relay_parent,
|
|
our_para = %config.para_id,
|
|
"No validation data for para - does it exist at this relay-parent?",
|
|
);
|
|
return Ok(());
|
|
},
|
|
};
|
|
|
|
// We need to swap the parent-head data, but all other fields here will be correct.
|
|
validation_data.parent_head = parent_head;
|
|
|
|
let claim_queue = request_claim_queue(relay_parent, ctx.sender()).await.await??;
|
|
|
|
let session_index =
|
|
request_session_index_for_child(relay_parent, ctx.sender()).await.await??;
|
|
|
|
let session_info =
|
|
self.session_info_cache.get(relay_parent, session_index, ctx.sender()).await?;
|
|
let collation = PreparedCollation {
|
|
collation,
|
|
relay_parent,
|
|
para_id: config.para_id,
|
|
validation_data,
|
|
validation_code_hash,
|
|
n_validators: session_info.n_validators,
|
|
core_index,
|
|
session_index,
|
|
};
|
|
|
|
construct_and_distribute_receipt(
|
|
collation,
|
|
ctx.sender(),
|
|
result_sender,
|
|
&mut self.metrics,
|
|
&transpose_claim_queue(claim_queue),
|
|
)
|
|
.await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn handle_new_activation<Context>(
|
|
&mut self,
|
|
maybe_activated: Option<Hash>,
|
|
ctx: &mut Context,
|
|
) -> Result<()> {
|
|
let Some(config) = &self.config else {
|
|
return Ok(());
|
|
};
|
|
|
|
let Some(relay_parent) = maybe_activated else { return Ok(()) };
|
|
|
|
// If there is no collation function provided, bail out early.
|
|
// Important: Lookahead collator and slot based collator do not use `CollatorFn`.
|
|
if config.collator.is_none() {
|
|
return Ok(());
|
|
}
|
|
|
|
let para_id = config.para_id;
|
|
|
|
let _timer = self.metrics.time_new_activation();
|
|
|
|
let session_index =
|
|
request_session_index_for_child(relay_parent, ctx.sender()).await.await??;
|
|
|
|
let session_info =
|
|
self.session_info_cache.get(relay_parent, session_index, ctx.sender()).await?;
|
|
let n_validators = session_info.n_validators;
|
|
|
|
let claim_queue =
|
|
ClaimQueueSnapshot::from(request_claim_queue(relay_parent, ctx.sender()).await.await??);
|
|
|
|
let assigned_cores = claim_queue
|
|
.iter_all_claims()
|
|
.filter_map(|(core_idx, para_ids)| {
|
|
para_ids.iter().any(|¶_id| para_id == config.para_id).then_some(*core_idx)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
// Nothing to do if no core is assigned to us at any depth.
|
|
if assigned_cores.is_empty() {
|
|
return Ok(());
|
|
}
|
|
|
|
// We are being very optimistic here, but one of the cores could be pending availability
|
|
// for some more blocks, or even time out. We assume all cores are being freed.
|
|
|
|
let mut validation_data = match request_persisted_validation_data(
|
|
relay_parent,
|
|
para_id,
|
|
// Just use included assumption always. If there are no pending candidates it's a
|
|
// no-op.
|
|
OccupiedCoreAssumption::Included,
|
|
ctx.sender(),
|
|
)
|
|
.await
|
|
.await??
|
|
{
|
|
Some(v) => v,
|
|
None => {
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
relay_parent = ?relay_parent,
|
|
our_para = %para_id,
|
|
"validation data is not available",
|
|
);
|
|
return Ok(());
|
|
},
|
|
};
|
|
|
|
let validation_code_hash = match request_validation_code_hash(
|
|
relay_parent,
|
|
para_id,
|
|
// Just use included assumption always. If there are no pending candidates it's a
|
|
// no-op.
|
|
OccupiedCoreAssumption::Included,
|
|
ctx.sender(),
|
|
)
|
|
.await
|
|
.await??
|
|
{
|
|
Some(v) => v,
|
|
None => {
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
relay_parent = ?relay_parent,
|
|
our_para = %para_id,
|
|
"validation code hash is not found.",
|
|
);
|
|
return Ok(());
|
|
},
|
|
};
|
|
|
|
let task_config = config.clone();
|
|
let metrics = self.metrics.clone();
|
|
let mut task_sender = ctx.sender().clone();
|
|
|
|
ctx.spawn(
|
|
"chained-collation-builder",
|
|
Box::pin(async move {
|
|
let transposed_claim_queue = transpose_claim_queue(claim_queue.0.clone());
|
|
|
|
// Track used core indexes not to submit collations on the same core.
|
|
let mut used_cores = HashSet::new();
|
|
|
|
for i in 0..assigned_cores.len() {
|
|
// Get the collation.
|
|
let collator_fn = match task_config.collator.as_ref() {
|
|
Some(x) => x,
|
|
None => return,
|
|
};
|
|
|
|
let (collation, result_sender) =
|
|
match collator_fn(relay_parent, &validation_data).await {
|
|
Some(collation) => collation.into_inner(),
|
|
None => {
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
?para_id,
|
|
"collator returned no collation on collate",
|
|
);
|
|
return;
|
|
},
|
|
};
|
|
|
|
// Use the core_selector method from CandidateCommitments to extract
|
|
// CoreSelector and ClaimQueueOffset.
|
|
let mut commitments = CandidateCommitments::default();
|
|
commitments.upward_messages = collation.upward_messages.clone();
|
|
|
|
let ump_signals = match commitments.ump_signals() {
|
|
Ok(signals) => signals,
|
|
Err(err) => {
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
?para_id,
|
|
"error processing UMP signals: {}",
|
|
err
|
|
);
|
|
return;
|
|
},
|
|
};
|
|
|
|
let (cs_index, cq_offset) = ump_signals
|
|
.core_selector()
|
|
.map(|(cs_index, cq_offset)| (cs_index.0 as usize, cq_offset.0 as usize))
|
|
.unwrap_or((i, 0));
|
|
|
|
// Identify the cores to build collations on using the given claim queue offset.
|
|
let cores_to_build_on = claim_queue
|
|
.iter_claims_at_depth(cq_offset)
|
|
.filter_map(|(core_idx, para_id)| {
|
|
(para_id == task_config.para_id).then_some(core_idx)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
if cores_to_build_on.is_empty() {
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
?para_id,
|
|
"no core is assigned to para at depth {}",
|
|
cq_offset,
|
|
);
|
|
return;
|
|
}
|
|
|
|
let descriptor_core_index =
|
|
cores_to_build_on[cs_index % cores_to_build_on.len()];
|
|
|
|
// Ensure the core index has not been used before.
|
|
if used_cores.contains(&descriptor_core_index.0) {
|
|
gum::warn!(
|
|
target: LOG_TARGET,
|
|
?para_id,
|
|
"teyrchain repeatedly selected the same core index: {}",
|
|
descriptor_core_index.0,
|
|
);
|
|
return;
|
|
}
|
|
|
|
used_cores.insert(descriptor_core_index.0);
|
|
gum::trace!(
|
|
target: LOG_TARGET,
|
|
?para_id,
|
|
"selected core index: {}",
|
|
descriptor_core_index.0,
|
|
);
|
|
|
|
// Distribute the collation.
|
|
let parent_head = collation.head_data.clone();
|
|
if let Err(err) = construct_and_distribute_receipt(
|
|
PreparedCollation {
|
|
collation,
|
|
para_id,
|
|
relay_parent,
|
|
validation_data: validation_data.clone(),
|
|
validation_code_hash,
|
|
n_validators,
|
|
core_index: descriptor_core_index,
|
|
session_index,
|
|
},
|
|
&mut task_sender,
|
|
result_sender,
|
|
&metrics,
|
|
&transposed_claim_queue,
|
|
)
|
|
.await
|
|
{
|
|
gum::error!(
|
|
target: LOG_TARGET,
|
|
"Failed to construct and distribute collation: {}",
|
|
err
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Chain the collations. All else stays the same as we build the chained
|
|
// collation on same relay parent.
|
|
validation_data.parent_head = parent_head;
|
|
}
|
|
}),
|
|
)?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[overseer::subsystem(CollationGeneration, error=SubsystemError, prefix=self::overseer)]
|
|
impl<Context> CollationGenerationSubsystem {
|
|
fn start(self, ctx: Context) -> SpawnedSubsystem {
|
|
let future = async move {
|
|
self.run(ctx).await;
|
|
Ok(())
|
|
}
|
|
.boxed();
|
|
|
|
SpawnedSubsystem { name: "collation-generation-subsystem", future }
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct PerSessionInfo {
|
|
n_validators: usize,
|
|
}
|
|
|
|
struct SessionInfoCache(LruMap<SessionIndex, PerSessionInfo>);
|
|
|
|
impl SessionInfoCache {
|
|
fn new() -> Self {
|
|
Self(LruMap::new(ByLength::new(2)))
|
|
}
|
|
|
|
async fn get<Sender: SubsystemSender<RuntimeApiMessage>>(
|
|
&mut self,
|
|
relay_parent: Hash,
|
|
session_index: SessionIndex,
|
|
sender: &mut Sender,
|
|
) -> Result<PerSessionInfo> {
|
|
if let Some(info) = self.0.get(&session_index) {
|
|
return Ok(info.clone());
|
|
}
|
|
|
|
let n_validators =
|
|
request_validators(relay_parent, &mut sender.clone()).await.await??.len();
|
|
|
|
let info = PerSessionInfo { n_validators };
|
|
self.0.insert(session_index, info);
|
|
Ok(self.0.get(&session_index).expect("Just inserted").clone())
|
|
}
|
|
}
|
|
|
|
struct PreparedCollation {
|
|
collation: Collation,
|
|
para_id: ParaId,
|
|
relay_parent: Hash,
|
|
validation_data: PersistedValidationData,
|
|
validation_code_hash: ValidationCodeHash,
|
|
n_validators: usize,
|
|
core_index: CoreIndex,
|
|
session_index: SessionIndex,
|
|
}
|
|
|
|
/// Takes a prepared collation, along with its context, and produces a candidate receipt
|
|
/// which is distributed to validators.
|
|
async fn construct_and_distribute_receipt(
|
|
collation: PreparedCollation,
|
|
sender: &mut impl overseer::CollationGenerationSenderTrait,
|
|
result_sender: Option<oneshot::Sender<CollationSecondedSignal>>,
|
|
metrics: &Metrics,
|
|
transposed_claim_queue: &TransposedClaimQueue,
|
|
) -> Result<()> {
|
|
let PreparedCollation {
|
|
collation,
|
|
para_id,
|
|
relay_parent,
|
|
validation_data,
|
|
validation_code_hash,
|
|
n_validators,
|
|
core_index,
|
|
session_index,
|
|
} = collation;
|
|
|
|
let persisted_validation_data_hash = validation_data.hash();
|
|
let parent_head_data = validation_data.parent_head.clone();
|
|
let parent_head_data_hash = validation_data.parent_head.hash();
|
|
|
|
// Apply compression to the block data.
|
|
let pov = {
|
|
let pov = collation.proof_of_validity.into_compressed();
|
|
let encoded_size = pov.encoded_size();
|
|
|
|
// As long as `POV_BOMB_LIMIT` is at least `max_pov_size`, this ensures
|
|
// that honest collators never produce a PoV which is uncompressed.
|
|
//
|
|
// As such, honest collators never produce an uncompressed PoV which starts with
|
|
// a compression magic number, which would lead validators to reject the collation.
|
|
if encoded_size > validation_data.max_pov_size as usize {
|
|
return Err(Error::POVSizeExceeded(encoded_size, validation_data.max_pov_size as usize));
|
|
}
|
|
|
|
pov
|
|
};
|
|
|
|
let pov_hash = pov.hash();
|
|
|
|
let erasure_root = erasure_root(n_validators, validation_data, pov.clone())?;
|
|
|
|
let commitments = CandidateCommitments {
|
|
upward_messages: collation.upward_messages,
|
|
horizontal_messages: collation.horizontal_messages,
|
|
new_validation_code: collation.new_validation_code,
|
|
head_data: collation.head_data,
|
|
processed_downward_messages: collation.processed_downward_messages,
|
|
hrmp_watermark: collation.hrmp_watermark,
|
|
};
|
|
|
|
let receipt = {
|
|
let ccr = CommittedCandidateReceiptV2 {
|
|
descriptor: CandidateDescriptorV2::new(
|
|
para_id,
|
|
relay_parent,
|
|
core_index,
|
|
session_index,
|
|
persisted_validation_data_hash,
|
|
pov_hash,
|
|
erasure_root,
|
|
commitments.head_data.hash(),
|
|
validation_code_hash,
|
|
),
|
|
commitments: commitments.clone(),
|
|
};
|
|
|
|
ccr.parse_ump_signals(&transposed_claim_queue)
|
|
.map_err(Error::CandidateReceiptCheck)?;
|
|
|
|
ccr.to_plain()
|
|
};
|
|
|
|
gum::debug!(
|
|
target: LOG_TARGET,
|
|
candidate_hash = ?receipt.hash(),
|
|
?pov_hash,
|
|
?relay_parent,
|
|
para_id = %para_id,
|
|
?core_index,
|
|
"Candidate generated",
|
|
);
|
|
gum::trace!(
|
|
target: LOG_TARGET,
|
|
?commitments,
|
|
candidate_hash = ?receipt.hash(),
|
|
"Candidate commitments",
|
|
);
|
|
|
|
metrics.on_collation_generated();
|
|
|
|
sender
|
|
.send_message(CollatorProtocolMessage::DistributeCollation {
|
|
candidate_receipt: receipt,
|
|
parent_head_data_hash,
|
|
pov,
|
|
parent_head_data,
|
|
result_sender,
|
|
core_index,
|
|
})
|
|
.await;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn erasure_root(
|
|
n_validators: usize,
|
|
persisted_validation: PersistedValidationData,
|
|
pov: PoV,
|
|
) -> Result<Hash> {
|
|
let available_data =
|
|
AvailableData { validation_data: persisted_validation, pov: Arc::new(pov) };
|
|
|
|
let chunks = pezkuwi_erasure_coding::obtain_chunks_v1(n_validators, &available_data)?;
|
|
Ok(pezkuwi_erasure_coding::branches(&chunks).root())
|
|
}
|