// Copyright (C) Parity Technologies (UK) Ltd. and Dijital Kurdistan Tech Institute
// This file is part of Pezkuwi.
// Pezkuwi is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Pezkuwi is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Pezkuwi. If not, see .
//! Validation host - is the primary interface for this crate. It allows the clients to enqueue
//! jobs for PVF execution or preparation.
//!
//! The validation host is represented by a future/task that runs an event-loop and by a handle,
//! [`ValidationHost`], that allows communication with that event-loop.
use crate::{
artifacts::{ArtifactId, ArtifactPathId, ArtifactState, Artifacts, ArtifactsCleanupConfig},
execute::{self, PendingExecutionRequest},
metrics::Metrics,
prepare, Priority, SecurityStatus, ValidationError, LOG_TARGET,
};
use always_assert::never;
use futures::{
channel::{mpsc, oneshot},
Future, FutureExt, SinkExt, StreamExt,
};
#[cfg(feature = "test-utils")]
use pezkuwi_node_core_pvf_common::ArtifactChecksum;
use pezkuwi_node_core_pvf_common::{
error::{PrecheckResult, PrepareError},
prepare::PrepareSuccess,
pvf::PvfPrepData,
};
use pezkuwi_node_subsystem::{
messages::PvfExecKind, ActiveLeavesUpdate, SubsystemError, SubsystemResult,
};
use pezkuwi_pez_node_primitives::PoV;
use pezkuwi_primitives::{Hash, PersistedValidationData};
use pezkuwi_teyrchain_primitives::primitives::ValidationResult;
use std::{
collections::HashMap,
path::PathBuf,
sync::Arc,
time::{Duration, SystemTime},
};
/// The time period after which a failed preparation artifact is considered ready to be retried.
/// Note that we will only retry if another request comes in after this cooldown has passed.
#[cfg(not(test))]
pub const PREPARE_FAILURE_COOLDOWN: Duration = Duration::from_secs(15 * 60);
#[cfg(test)]
pub const PREPARE_FAILURE_COOLDOWN: Duration = Duration::from_millis(200);
/// The amount of times we will retry failed prepare jobs.
pub const NUM_PREPARE_RETRIES: u32 = 5;
/// The name of binary spawned to prepare a PVF artifact
pub const PREPARE_BINARY_NAME: &str = "pezkuwi-prepare-worker";
/// The name of binary spawned to execute a PVF
pub const EXECUTE_BINARY_NAME: &str = "pezkuwi-execute-worker";
/// The size of incoming message queue
pub const HOST_MESSAGE_QUEUE_SIZE: usize = 10;
/// An alias to not spell the type for the oneshot sender for the PVF execution result.
pub(crate) type ResultSender = oneshot::Sender>;
/// Transmission end used for sending the PVF preparation result.
pub(crate) type PrecheckResultSender = oneshot::Sender;
/// A handle to the async process serving the validation host requests.
#[derive(Clone)]
pub struct ValidationHost {
to_host_tx: mpsc::Sender,
/// Available security features, detected by the host during startup.
pub security_status: SecurityStatus,
}
impl ValidationHost {
/// Precheck PVF with the given code, i.e. verify that it compiles within a reasonable time
/// limit. This will prepare the PVF. The result of preparation will be sent to the provided
/// result sender.
///
/// This is async to accommodate the possibility of back-pressure. In the vast majority of
/// situations this function should return immediately.
///
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn precheck_pvf(
&mut self,
pvf: PvfPrepData,
result_tx: PrecheckResultSender,
) -> Result<(), String> {
self.to_host_tx
.send(ToHost::PrecheckPvf { pvf, result_tx })
.await
.map_err(|_| "the inner loop hung up".to_string())
}
/// Execute PVF with the given code, execution timeout, parameters and priority.
/// The result of execution will be sent to the provided result sender.
///
/// This is async to accommodate the possibility of back-pressure. In the vast majority of
/// situations this function should return immediately.
///
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn execute_pvf(
&mut self,
pvf: PvfPrepData,
exec_timeout: Duration,
pvd: Arc,
pov: Arc,
priority: Priority,
exec_kind: PvfExecKind,
result_tx: ResultSender,
) -> Result<(), String> {
self.to_host_tx
.send(ToHost::ExecutePvf(ExecutePvfInputs {
pvf,
exec_timeout,
pvd,
pov,
priority,
exec_kind,
result_tx,
}))
.await
.map_err(|_| "the inner loop hung up".to_string())
}
/// Sends a signal to the validation host requesting to prepare a list of the given PVFs.
///
/// This is async to accommodate the possibility of back-pressure. In the vast majority of
/// situations this function should return immediately.
///
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn heads_up(&mut self, active_pvfs: Vec) -> Result<(), String> {
self.to_host_tx
.send(ToHost::HeadsUp { active_pvfs })
.await
.map_err(|_| "the inner loop hung up".to_string())
}
/// Sends a signal to the validation host requesting to update best block.
///
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn update_active_leaves(
&mut self,
update: ActiveLeavesUpdate,
ancestors: Vec,
) -> Result<(), String> {
self.to_host_tx
.send(ToHost::UpdateActiveLeaves { update, ancestors })
.await
.map_err(|_| "the inner loop hung up".to_string())
}
/// Replace the artifact checksum with a new one.
///
/// Only for test purposes to imitate a corruption of the artifact on disk.
#[cfg(feature = "test-utils")]
pub async fn replace_artifact_checksum(
&mut self,
checksum: ArtifactChecksum,
new_checksum: ArtifactChecksum,
) -> Result<(), String> {
self.to_host_tx
.send(ToHost::ReplaceArtifactChecksum { checksum, new_checksum })
.await
.map_err(|_| "the inner loop hung up".to_string())
}
}
enum ToHost {
PrecheckPvf {
pvf: PvfPrepData,
result_tx: PrecheckResultSender,
},
ExecutePvf(ExecutePvfInputs),
HeadsUp {
active_pvfs: Vec,
},
UpdateActiveLeaves {
update: ActiveLeavesUpdate,
ancestors: Vec,
},
#[cfg(feature = "test-utils")]
ReplaceArtifactChecksum {
checksum: ArtifactChecksum,
new_checksum: ArtifactChecksum,
},
}
struct ExecutePvfInputs {
pvf: PvfPrepData,
exec_timeout: Duration,
pvd: Arc,
pov: Arc,
priority: Priority,
exec_kind: PvfExecKind,
result_tx: ResultSender,
}
/// Configuration for the validation host.
#[derive(Debug)]
pub struct Config {
/// The root directory where the prepared artifacts can be stored.
pub cache_path: PathBuf,
/// The version of the node. `None` can be passed to skip the version check (only for tests).
pub node_version: Option,
/// Whether the node is attempting to run as a secure validator.
pub secure_validator_mode: bool,
/// The path to the program that can be used to spawn the prepare workers.
pub prepare_worker_program_path: PathBuf,
/// The time allotted for a prepare worker to spawn and report to the host.
pub prepare_worker_spawn_timeout: Duration,
/// The maximum number of workers that can be spawned in the prepare pool for tasks with the
/// priority below critical.
pub prepare_workers_soft_max_num: usize,
/// The absolute number of workers that can be spawned in the prepare pool.
pub prepare_workers_hard_max_num: usize,
/// The path to the program that can be used to spawn the execute workers.
pub execute_worker_program_path: PathBuf,
/// The time allotted for an execute worker to spawn and report to the host.
pub execute_worker_spawn_timeout: Duration,
/// The maximum number of execute workers that can run at the same time.
pub execute_workers_max_num: usize,
}
impl Config {
/// Create a new instance of the configuration.
pub fn new(
cache_path: PathBuf,
node_version: Option,
secure_validator_mode: bool,
prepare_worker_program_path: PathBuf,
execute_worker_program_path: PathBuf,
execute_workers_max_num: usize,
prepare_workers_soft_max_num: usize,
prepare_workers_hard_max_num: usize,
) -> Self {
Self {
cache_path,
node_version,
secure_validator_mode,
prepare_worker_program_path,
prepare_worker_spawn_timeout: Duration::from_secs(3),
prepare_workers_soft_max_num,
prepare_workers_hard_max_num,
execute_worker_program_path,
execute_worker_spawn_timeout: Duration::from_secs(3),
execute_workers_max_num,
}
}
}
/// Start the validation host.
///
/// Returns a [handle][`ValidationHost`] to the started validation host and the future. The future
/// must be polled in order for validation host to function.
///
/// The future should not return normally but if it does then that indicates an unrecoverable error.
/// In that case all pending requests will be canceled, dropping the result senders and new ones
/// will be rejected.
pub async fn start(
config: Config,
metrics: Metrics,
) -> SubsystemResult<(ValidationHost, impl Future