feat: initialize Kurdistan SDK - independent fork of Polkadot SDK
This commit is contained in:
@@ -0,0 +1,474 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! PVF artifacts (final compiled code blobs).
|
||||
//!
|
||||
//! # Lifecycle of an artifact
|
||||
//!
|
||||
//! 1. During node start-up, we prune all the cached artifacts, if any.
|
||||
//!
|
||||
//! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should
|
||||
//! have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the
|
||||
//! preparation process kicks in. The execution request is stashed until after the preparation is
|
||||
//! done, and the artifact state in the host is set to [`ArtifactState::Preparing`]. Preparation
|
||||
//! goes through the preparation queue and the pool.
|
||||
//!
|
||||
//! 1. If the artifact is already being processed, we add another execution request to the
|
||||
//! existing preparation job, without starting a new one.
|
||||
//!
|
||||
//! 2. Note that if the state is [`ArtifactState::FailedToProcess`], we usually do not retry
|
||||
//! preparation, though we may under certain conditions.
|
||||
//!
|
||||
//! 3. The pool gets an available worker and instructs it to work on the given PVF. The worker
|
||||
//! starts compilation. When the worker finishes successfully, it writes the serialized artifact
|
||||
//! into a temporary file and notifies the host that it's done. The host atomically moves
|
||||
//! (renames) the temporary file to the destination filename of the artifact.
|
||||
//!
|
||||
//! 4. If the worker concluded successfully or returned an error, then the pool notifies the queue.
|
||||
//! In both cases, the queue reports to the host that the result is ready.
|
||||
//!
|
||||
//! 5. The host will react by changing the artifact state to either [`ArtifactState::Prepared`] or
|
||||
//! [`ArtifactState::FailedToProcess`] for the PVF in question. On success, the
|
||||
//! `last_time_needed` will be set to the current time. It will also dispatch the pending
|
||||
//! execution requests.
|
||||
//!
|
||||
//! 6. On success, the execution request will come through the execution queue and ultimately be
|
||||
//! processed by an execution worker. When this worker receives the request, it will read the
|
||||
//! requested artifact. If it doesn't exist it reports an internal error. A request for execution
|
||||
//! will bump the `last_time_needed` to the current time.
|
||||
//!
|
||||
//! 7. There is a separate process for pruning the prepared artifacts whose `last_time_needed` is
|
||||
//! older by a predefined parameter. This process is run very rarely (say, once a day). Once the
|
||||
//! artifact is expired it is removed from disk eagerly atomically.
|
||||
|
||||
use crate::{host::PrecheckResultSender, worker_interface::WORKER_DIR_PREFIX};
|
||||
use always_assert::always;
|
||||
use pezkuwi_node_core_pvf_common::{error::PrepareError, pvf::PvfPrepData, ArtifactChecksum};
|
||||
use pezkuwi_primitives::ExecutorParamsPrepHash;
|
||||
use pezkuwi_teyrchain_primitives::primitives::ValidationCodeHash;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
/// The extension to use for cached artifacts.
|
||||
const ARTIFACT_EXTENSION: &str = "pvf";
|
||||
|
||||
/// The prefix that artifacts used to start with under the old naming scheme.
|
||||
const ARTIFACT_OLD_PREFIX: &str = "wasmtime_";
|
||||
|
||||
pub fn generate_artifact_path(cache_path: &Path) -> PathBuf {
|
||||
let file_name = {
|
||||
use array_bytes::Hex;
|
||||
use rand::RngCore;
|
||||
let mut bytes = [0u8; 64];
|
||||
rand::thread_rng().fill_bytes(&mut bytes);
|
||||
bytes.hex("0x")
|
||||
};
|
||||
let mut artifact_path = cache_path.join(file_name);
|
||||
artifact_path.set_extension(ARTIFACT_EXTENSION);
|
||||
artifact_path
|
||||
}
|
||||
|
||||
/// Identifier of an artifact. Encodes a code hash of the PVF and a hash of preparation-related
|
||||
/// executor parameter set.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ArtifactId {
|
||||
pub(crate) code_hash: ValidationCodeHash,
|
||||
pub(crate) executor_params_prep_hash: ExecutorParamsPrepHash,
|
||||
}
|
||||
|
||||
impl ArtifactId {
|
||||
/// Creates a new artifact ID with the given hash.
|
||||
pub fn new(
|
||||
code_hash: ValidationCodeHash,
|
||||
executor_params_prep_hash: ExecutorParamsPrepHash,
|
||||
) -> Self {
|
||||
Self { code_hash, executor_params_prep_hash }
|
||||
}
|
||||
|
||||
/// Returns an artifact ID that corresponds to the PVF with given preparation-related
|
||||
/// executor parameters.
|
||||
pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self {
|
||||
Self::new(pvf.code_hash(), pvf.executor_params().prep_hash())
|
||||
}
|
||||
}
|
||||
|
||||
/// A bundle of the artifact ID and the path.
|
||||
///
|
||||
/// Rationale for having this is two-fold:
|
||||
///
|
||||
/// - While we can derive the artifact path from the artifact id, it makes sense to carry it around
|
||||
/// sometimes to avoid extra work.
|
||||
/// - At the same time, carrying only path limiting the ability for logging.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ArtifactPathId {
|
||||
pub(crate) id: ArtifactId,
|
||||
pub(crate) path: PathBuf,
|
||||
pub(crate) checksum: ArtifactChecksum,
|
||||
}
|
||||
|
||||
impl ArtifactPathId {
|
||||
pub(crate) fn new(artifact_id: ArtifactId, path: &Path, checksum: ArtifactChecksum) -> Self {
|
||||
Self { id: artifact_id, path: path.to_owned(), checksum }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ArtifactState {
|
||||
/// The artifact is ready to be used by the executor.
|
||||
///
|
||||
/// That means that the artifact should be accessible through the path obtained by the artifact
|
||||
/// id (unless, it was removed externally).
|
||||
Prepared {
|
||||
/// The checksum of the compiled artifact.
|
||||
checksum: ArtifactChecksum,
|
||||
/// The path of the compiled artifact.
|
||||
path: PathBuf,
|
||||
/// The time when the artifact was last needed.
|
||||
///
|
||||
/// This is updated when we get the heads up for this artifact or when we just discover
|
||||
/// this file.
|
||||
last_time_needed: SystemTime,
|
||||
/// Size in bytes
|
||||
size: u64,
|
||||
},
|
||||
/// A task to prepare this artifact is scheduled.
|
||||
Preparing {
|
||||
/// List of result senders that are waiting for a response.
|
||||
waiting_for_response: Vec<PrecheckResultSender>,
|
||||
/// The number of times this artifact has failed to prepare.
|
||||
num_failures: u32,
|
||||
},
|
||||
/// The code couldn't be compiled due to an error. Such artifacts
|
||||
/// never reach the executor and stay in the host's memory.
|
||||
FailedToProcess {
|
||||
/// Keep track of the last time that processing this artifact failed.
|
||||
last_time_failed: SystemTime,
|
||||
/// The number of times this artifact has failed to prepare.
|
||||
num_failures: u32,
|
||||
/// The last error encountered for preparation.
|
||||
error: PrepareError,
|
||||
},
|
||||
}
|
||||
|
||||
/// A container of all known artifact ids and their states.
|
||||
pub struct Artifacts {
|
||||
inner: HashMap<ArtifactId, ArtifactState>,
|
||||
}
|
||||
|
||||
/// Parameters we use to cleanup artifacts
|
||||
/// After we hit the cache limit we remove the least used artifacts
|
||||
/// but only if they are stale more than minimum stale time
|
||||
#[derive(Debug)]
|
||||
pub struct ArtifactsCleanupConfig {
|
||||
// Max size in bytes. Reaching it the least used artefacts are deleted
|
||||
cache_limit: u64,
|
||||
// Inactive time after which artefact is allowed to be deleted
|
||||
min_stale_time: Duration,
|
||||
}
|
||||
|
||||
impl Default for ArtifactsCleanupConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
cache_limit: 10 * 1024 * 1024 * 1024, // 10 GiB
|
||||
min_stale_time: Duration::from_secs(24 * 60 * 60), // 24 hours
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl ArtifactsCleanupConfig {
|
||||
pub fn new(cache_limit: u64, min_stale_time: Duration) -> Self {
|
||||
Self { cache_limit, min_stale_time }
|
||||
}
|
||||
}
|
||||
|
||||
impl Artifacts {
|
||||
#[cfg(test)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self { inner: HashMap::new() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn artifact_ids(&self) -> Vec<ArtifactId> {
|
||||
self.inner.keys().cloned().collect()
|
||||
}
|
||||
|
||||
#[cfg(feature = "test-utils")]
|
||||
pub fn replace_artifact_checksum(
|
||||
&mut self,
|
||||
checksum: ArtifactChecksum,
|
||||
new_checksum: ArtifactChecksum,
|
||||
) {
|
||||
for artifact in self.inner.values_mut() {
|
||||
if let ArtifactState::Prepared { checksum: c, .. } = artifact {
|
||||
if *c == checksum {
|
||||
*c = new_checksum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an empty table and the cache directory on-disk if it doesn't exist.
|
||||
pub async fn new(cache_path: &Path) -> Self {
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
let _ = tokio::fs::create_dir_all(cache_path).await;
|
||||
|
||||
// Delete any leftover artifacts and worker dirs from previous runs. We don't delete the
|
||||
// entire cache directory in case the user made a mistake and set it to e.g. their home
|
||||
// directory. This is a best-effort to do clean-up, so ignore any errors.
|
||||
for entry in fs::read_dir(cache_path).into_iter().flatten().flatten() {
|
||||
let path = entry.path();
|
||||
let Some(file_name) = path.file_name().and_then(|f| f.to_str()) else { continue };
|
||||
if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) {
|
||||
let _ = fs::remove_dir_all(path);
|
||||
} else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) ||
|
||||
file_name.starts_with(ARTIFACT_OLD_PREFIX)
|
||||
{
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
}
|
||||
|
||||
Self { inner: HashMap::new() }
|
||||
}
|
||||
|
||||
/// Returns the state of the given artifact by its ID.
|
||||
pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> {
|
||||
self.inner.get_mut(artifact_id)
|
||||
}
|
||||
|
||||
/// Inform the table about the artifact with the given ID. The state will be set to "preparing".
|
||||
///
|
||||
/// This function must be used only for brand-new artifacts and should never be used for
|
||||
/// replacing existing ones.
|
||||
pub fn insert_preparing(
|
||||
&mut self,
|
||||
artifact_id: ArtifactId,
|
||||
waiting_for_response: Vec<PrecheckResultSender>,
|
||||
) {
|
||||
// See the precondition.
|
||||
always!(self
|
||||
.inner
|
||||
.insert(artifact_id, ArtifactState::Preparing { waiting_for_response, num_failures: 0 })
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Insert an artifact with the given ID as "prepared".
|
||||
///
|
||||
/// This function should only be used to build the artifact table at startup with valid
|
||||
/// artifact caches.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn insert_prepared(
|
||||
&mut self,
|
||||
artifact_id: ArtifactId,
|
||||
path: PathBuf,
|
||||
checksum: ArtifactChecksum,
|
||||
last_time_needed: SystemTime,
|
||||
size: u64,
|
||||
) {
|
||||
// See the precondition.
|
||||
always!(self
|
||||
.inner
|
||||
.insert(artifact_id, ArtifactState::Prepared { path, checksum, last_time_needed, size })
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Remove artifact by its id.
|
||||
pub fn remove(&mut self, artifact_id: ArtifactId) -> Option<(ArtifactId, PathBuf)> {
|
||||
self.inner.remove(&artifact_id).and_then(|state| match state {
|
||||
ArtifactState::Prepared { path, .. } => Some((artifact_id, path)),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Remove artifacts older than the given TTL when the total artifact size reaches the limit
|
||||
/// and return id and path of the removed ones
|
||||
pub fn prune(&mut self, cleanup_config: &ArtifactsCleanupConfig) -> Vec<(ArtifactId, PathBuf)> {
|
||||
let mut to_remove = vec![];
|
||||
let now = SystemTime::now();
|
||||
|
||||
let mut total_size = 0;
|
||||
let mut artifact_sizes = vec![];
|
||||
|
||||
for (k, v) in self.inner.iter() {
|
||||
if let ArtifactState::Prepared { ref path, last_time_needed, size, .. } = *v {
|
||||
total_size += size;
|
||||
artifact_sizes.push((k.clone(), path.clone(), size, last_time_needed));
|
||||
}
|
||||
}
|
||||
artifact_sizes
|
||||
.sort_by_key(|&(_, _, _, last_time_needed)| std::cmp::Reverse(last_time_needed));
|
||||
|
||||
while total_size > cleanup_config.cache_limit {
|
||||
let Some((artifact_id, path, size, last_time_needed)) = artifact_sizes.pop() else {
|
||||
break;
|
||||
};
|
||||
|
||||
let used_recently = now
|
||||
.duration_since(last_time_needed)
|
||||
.map(|stale_time| stale_time < cleanup_config.min_stale_time)
|
||||
.unwrap_or(true);
|
||||
if used_recently {
|
||||
break;
|
||||
}
|
||||
|
||||
self.inner.remove(&artifact_id);
|
||||
to_remove.push((artifact_id, path));
|
||||
total_size -= size;
|
||||
}
|
||||
|
||||
to_remove
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::testing::artifact_id;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn cache_cleared_on_startup() {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
let cache_path = tempdir.path();
|
||||
|
||||
// These should be cleared.
|
||||
fs::write(cache_path.join("abcd.pvf"), "test").unwrap();
|
||||
fs::write(cache_path.join("wasmtime_..."), "test").unwrap();
|
||||
fs::create_dir(cache_path.join("worker-dir-prepare-test")).unwrap();
|
||||
|
||||
// These should not be touched.
|
||||
fs::write(cache_path.join("abcd.pvfartifact"), "test").unwrap();
|
||||
fs::write(cache_path.join("pezkuwi_..."), "test").unwrap();
|
||||
fs::create_dir(cache_path.join("worker-prepare-test")).unwrap();
|
||||
|
||||
let artifacts = Artifacts::new(cache_path).await;
|
||||
|
||||
let entries: Vec<String> = fs::read_dir(&cache_path)
|
||||
.unwrap()
|
||||
.map(|entry| entry.unwrap().file_name().into_string().unwrap())
|
||||
.collect();
|
||||
assert_eq!(entries.len(), 3);
|
||||
assert!(entries.contains(&String::from("abcd.pvfartifact")));
|
||||
assert!(entries.contains(&String::from("pezkuwi_...")));
|
||||
assert!(entries.contains(&String::from("worker-prepare-test")));
|
||||
assert_eq!(artifacts.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pruned_by_cache_size() {
|
||||
let mock_now = SystemTime::now();
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
let cache_path = tempdir.path();
|
||||
|
||||
let path1 = generate_artifact_path(cache_path);
|
||||
let path2 = generate_artifact_path(cache_path);
|
||||
let path3 = generate_artifact_path(cache_path);
|
||||
let artifact_id1 = artifact_id(1);
|
||||
let artifact_id2 = artifact_id(2);
|
||||
let artifact_id3 = artifact_id(3);
|
||||
|
||||
let mut artifacts = Artifacts::new(cache_path).await;
|
||||
let cleanup_config = ArtifactsCleanupConfig::new(1500, Duration::from_secs(0));
|
||||
|
||||
artifacts.insert_prepared(
|
||||
artifact_id1.clone(),
|
||||
path1.clone(),
|
||||
Default::default(),
|
||||
mock_now - Duration::from_secs(5),
|
||||
1024,
|
||||
);
|
||||
artifacts.insert_prepared(
|
||||
artifact_id2.clone(),
|
||||
path2.clone(),
|
||||
Default::default(),
|
||||
mock_now - Duration::from_secs(10),
|
||||
1024,
|
||||
);
|
||||
artifacts.insert_prepared(
|
||||
artifact_id3.clone(),
|
||||
path3.clone(),
|
||||
Default::default(),
|
||||
mock_now - Duration::from_secs(15),
|
||||
1024,
|
||||
);
|
||||
|
||||
let pruned = artifacts.prune(&cleanup_config);
|
||||
|
||||
assert!(artifacts.artifact_ids().contains(&artifact_id1));
|
||||
assert!(!pruned.contains(&(artifact_id1, path1)));
|
||||
assert!(!artifacts.artifact_ids().contains(&artifact_id2));
|
||||
assert!(pruned.contains(&(artifact_id2, path2)));
|
||||
assert!(!artifacts.artifact_ids().contains(&artifact_id3));
|
||||
assert!(pruned.contains(&(artifact_id3, path3)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_did_not_prune_by_cache_size_because_of_stale_time() {
|
||||
let mock_now = SystemTime::now();
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
let cache_path = tempdir.path();
|
||||
|
||||
let path1 = generate_artifact_path(cache_path);
|
||||
let path2 = generate_artifact_path(cache_path);
|
||||
let path3 = generate_artifact_path(cache_path);
|
||||
let artifact_id1 = artifact_id(1);
|
||||
let artifact_id2 = artifact_id(2);
|
||||
let artifact_id3 = artifact_id(3);
|
||||
|
||||
let mut artifacts = Artifacts::new(cache_path).await;
|
||||
let cleanup_config = ArtifactsCleanupConfig::new(1500, Duration::from_secs(12));
|
||||
|
||||
artifacts.insert_prepared(
|
||||
artifact_id1.clone(),
|
||||
path1.clone(),
|
||||
Default::default(),
|
||||
mock_now - Duration::from_secs(5),
|
||||
1024,
|
||||
);
|
||||
artifacts.insert_prepared(
|
||||
artifact_id2.clone(),
|
||||
path2.clone(),
|
||||
Default::default(),
|
||||
mock_now - Duration::from_secs(10),
|
||||
1024,
|
||||
);
|
||||
artifacts.insert_prepared(
|
||||
artifact_id3.clone(),
|
||||
path3.clone(),
|
||||
Default::default(),
|
||||
mock_now - Duration::from_secs(15),
|
||||
1024,
|
||||
);
|
||||
|
||||
let pruned = artifacts.prune(&cleanup_config);
|
||||
|
||||
assert!(artifacts.artifact_ids().contains(&artifact_id1));
|
||||
assert!(!pruned.contains(&(artifact_id1, path1)));
|
||||
assert!(artifacts.artifact_ids().contains(&artifact_id2));
|
||||
assert!(!pruned.contains(&(artifact_id2, path2)));
|
||||
assert!(!artifacts.artifact_ids().contains(&artifact_id3));
|
||||
assert!(pruned.contains(&(artifact_id3, path3)));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_core_pvf_common::error::{InternalValidationError, PrepareError};
|
||||
|
||||
/// A error raised during validation of the candidate.
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
pub enum ValidationError {
|
||||
/// Deterministic preparation issue. In practice, most of the problems should be caught by
|
||||
/// prechecking, so this may be a sign of internal conditions.
|
||||
///
|
||||
/// In principle if preparation of the `WASM` fails, the current candidate cannot be the
|
||||
/// reason for that. So we can't say whether it is invalid or not. In addition, with
|
||||
/// pre-checking enabled only valid runtimes should ever get enacted, so we can be
|
||||
/// reasonably sure that this is some local problem on the current node. However, as this
|
||||
/// particular error *seems* to indicate a deterministic error, we raise a warning.
|
||||
#[error("candidate validation: {0}")]
|
||||
Preparation(PrepareError),
|
||||
/// The error was raised because the candidate is invalid. Should vote against.
|
||||
#[error("candidate validation: {0}")]
|
||||
Invalid(#[from] InvalidCandidate),
|
||||
/// Possibly transient issue that may resolve after retries. Should vote against when retries
|
||||
/// fail.
|
||||
#[error("candidate validation: {0}")]
|
||||
PossiblyInvalid(#[from] PossiblyInvalidError),
|
||||
/// Preparation or execution issue caused by an internal condition. Should not vote against.
|
||||
#[error("candidate validation: internal: {0}")]
|
||||
Internal(#[from] InternalValidationError),
|
||||
/// The execution deadline of allowed_ancestry_len + 1 has been reached. Jobs like backing have
|
||||
/// a limited time to execute. Once the deadline is reached, the current candidate cannot be
|
||||
/// backed, regardless of its validity.
|
||||
#[error("candidate validation: execution deadline has been reached.")]
|
||||
ExecutionDeadline,
|
||||
}
|
||||
|
||||
/// A description of an error raised during executing a PVF and can be attributed to the combination
|
||||
/// of the candidate [`pezkuwi_teyrchain_primitives::primitives::ValidationParams`] and the PVF.
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
pub enum InvalidCandidate {
|
||||
/// The candidate is reported to be invalid by the execution worker. The string contains the
|
||||
/// error message.
|
||||
#[error("invalid: worker reported: {0}")]
|
||||
WorkerReportedInvalid(String),
|
||||
/// PVF execution (compilation is not included) took more time than was allotted.
|
||||
#[error("invalid: hard timeout")]
|
||||
HardTimeout,
|
||||
/// Proof-of-validity failed to decompress correctly
|
||||
#[error("invalid: PoV failed to decompress")]
|
||||
PoVDecompressionFailure,
|
||||
}
|
||||
|
||||
/// Possibly transient issue that may resolve after retries.
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
pub enum PossiblyInvalidError {
|
||||
/// The worker process (not the job) has died during validation of a candidate.
|
||||
///
|
||||
/// It's unlikely that this is caused by malicious code since workers spawn separate job
|
||||
/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
|
||||
/// case, and if the error persists, we assume it's caused by the candidate and vote against.
|
||||
#[error("possibly invalid: ambiguous worker death")]
|
||||
AmbiguousWorkerDeath,
|
||||
/// The job process (not the worker) has died for one of the following reasons:
|
||||
///
|
||||
/// (a) A seccomp violation occurred, most likely due to an attempt by malicious code to
|
||||
/// execute arbitrary code. Note that there is no foolproof way to detect this if the operator
|
||||
/// has seccomp auditing disabled.
|
||||
///
|
||||
/// (b) The host machine ran out of free memory and the OOM killer started killing the
|
||||
/// processes, and in order to save the parent it will "sacrifice child" first.
|
||||
///
|
||||
/// (c) Some other reason, perhaps transient or perhaps caused by malicious code.
|
||||
///
|
||||
/// We cannot treat this as an internal error because malicious code may have caused this.
|
||||
#[error("possibly invalid: ambiguous job death: {0}")]
|
||||
AmbiguousJobDeath(String),
|
||||
/// An unexpected error occurred in the job process and we can't be sure whether the candidate
|
||||
/// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never
|
||||
/// treat an error as internal as we would abstain from voting. This is bad because if the
|
||||
/// issue was due to the candidate, then all validators would abstain, stalling finality on the
|
||||
/// chain. So we will first retry the candidate, and if the issue persists we are forced to
|
||||
/// vote invalid.
|
||||
#[error("possibly invalid: job error: {0}")]
|
||||
JobError(String),
|
||||
/// Instantiation of the WASM module instance failed during an execution.
|
||||
/// Possibly related to local issues or dirty node update. May be retried with re-preparation.
|
||||
#[error("possibly invalid: runtime construction: {0}")]
|
||||
RuntimeConstruction(String),
|
||||
/// The artifact is corrupted, re-prepare the artifact and try again.
|
||||
#[error("possibly invalid: artifact is corrupted")]
|
||||
CorruptedArtifact,
|
||||
}
|
||||
|
||||
impl From<PrepareError> for ValidationError {
|
||||
fn from(error: PrepareError) -> Self {
|
||||
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
|
||||
// See [`PrepareError::is_deterministic`].
|
||||
if error.is_deterministic() {
|
||||
Self::Preparation(error)
|
||||
} else {
|
||||
Self::Internal(InternalValidationError::NonDeterministicPrepareError(error))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Execution part of the pipeline.
|
||||
//!
|
||||
//! The validation host [runs the queue][`start`] communicating with it by sending [`ToQueue`]
|
||||
//! messages. The queue will spawn workers in new processes. Those processes should jump to
|
||||
//! `pezkuwi_node_core_pvf_worker::execute_worker_entrypoint`.
|
||||
|
||||
mod queue;
|
||||
mod worker_interface;
|
||||
|
||||
pub use queue::{start, FromQueue, PendingExecutionRequest, ToQueue};
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,315 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Host interface to the execute worker.
|
||||
|
||||
use crate::{
|
||||
artifacts::ArtifactPathId,
|
||||
worker_interface::{
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use codec::{Decode, Encode};
|
||||
use futures::FutureExt;
|
||||
use futures_timer::Delay;
|
||||
use pezkuwi_node_core_pvf_common::{
|
||||
error::InternalValidationError,
|
||||
execute::{Handshake, WorkerError, WorkerResponse},
|
||||
worker_dir, ArtifactChecksum, SecurityStatus,
|
||||
};
|
||||
use pezkuwi_node_primitives::PoV;
|
||||
use pezkuwi_primitives::{ExecutorParams, PersistedValidationData};
|
||||
use std::{path::Path, sync::Arc, time::Duration};
|
||||
use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
executor_params: ExecutorParams,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["execute-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
|
||||
let (mut idle_worker, worker_handle) = spawn_with_program_path(
|
||||
"execute",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await?;
|
||||
send_execute_handshake(&mut idle_worker.stream, Handshake { executor_params })
|
||||
.await
|
||||
.map_err(|error| {
|
||||
let err = SpawnErr::Handshake { err: error.to_string() };
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %idle_worker.pid,
|
||||
"failed to send a handshake to the spawned worker: {}",
|
||||
error
|
||||
);
|
||||
err
|
||||
})?;
|
||||
Ok((idle_worker, worker_handle))
|
||||
}
|
||||
|
||||
/// Outcome of PVF execution.
|
||||
///
|
||||
/// PVF execution completed and the result is returned. The worker is ready for
|
||||
/// another job.
|
||||
pub struct Response {
|
||||
/// The response (valid/invalid) from the worker.
|
||||
pub worker_response: WorkerResponse,
|
||||
/// Returning the idle worker token means the worker can be reused.
|
||||
pub idle_worker: IdleWorker,
|
||||
}
|
||||
/// The idle worker token is not returned for any of these cases, meaning the worker must be
|
||||
/// terminated.
|
||||
///
|
||||
/// NOTE: Errors related to the preparation process are not expected to be encountered by the
|
||||
/// execution workers.
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
/// The execution time exceeded the hard limit. The worker is terminated.
|
||||
#[error("The communication with the worker exceeded the hard limit")]
|
||||
HardTimeout,
|
||||
/// An I/O error happened during communication with the worker. This may mean that the worker
|
||||
/// process already died. The token is not returned in any case.
|
||||
#[error("An I/O error happened during communication with the worker: {0}")]
|
||||
CommunicationErr(#[from] io::Error),
|
||||
/// The worker reported an error (can be from itself or from the job). The worker should not be
|
||||
/// reused.
|
||||
#[error("The worker reported an error: {0}")]
|
||||
WorkerError(#[from] WorkerError),
|
||||
|
||||
/// An internal error happened during the validation. Such an error is most likely related to
|
||||
/// some transient glitch.
|
||||
///
|
||||
/// Should only ever be used for errors independent of the candidate and PVF. Therefore it may
|
||||
/// be a problem with the worker, so we terminate it.
|
||||
#[error("An internal error occurred: {0}")]
|
||||
InternalError(#[from] InternalValidationError),
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
///
|
||||
/// NOTE: Not returning the idle worker token in `Outcome` will trigger the child process being
|
||||
/// killed, if it's still alive.
|
||||
pub async fn start_work(
|
||||
worker: IdleWorker,
|
||||
artifact: ArtifactPathId,
|
||||
execution_timeout: Duration,
|
||||
pvd: Arc<PersistedValidationData>,
|
||||
pov: Arc<PoV>,
|
||||
) -> Result<Response, Error> {
|
||||
let IdleWorker { mut stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"starting execute for {}",
|
||||
artifact.path.display(),
|
||||
);
|
||||
|
||||
with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
|
||||
send_request(&mut stream, pvd, pov, execution_timeout, artifact.checksum)
|
||||
.await
|
||||
.map_err(|error| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"failed to send an execute request: {}",
|
||||
error,
|
||||
);
|
||||
Error::InternalError(InternalValidationError::HostCommunication(error.to_string()))
|
||||
})?;
|
||||
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = execution_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let worker_result = futures::select! {
|
||||
worker_result = recv_result(&mut stream).fuse() => {
|
||||
match worker_result {
|
||||
Ok(result) =>
|
||||
handle_result(
|
||||
result,
|
||||
pid,
|
||||
execution_timeout,
|
||||
)
|
||||
.await,
|
||||
Err(error) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"failed to recv an execute result: {}",
|
||||
error,
|
||||
);
|
||||
|
||||
return Err(Error::CommunicationErr(error))
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = Delay::new(timeout).fuse() => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
validation_code_hash = ?artifact.id.code_hash,
|
||||
"execution worker exceeded lenient timeout for execution, child worker likely stalled",
|
||||
);
|
||||
return Err(Error::HardTimeout)
|
||||
},
|
||||
};
|
||||
|
||||
match worker_result {
|
||||
Ok(worker_response) => Ok(Response {
|
||||
worker_response,
|
||||
idle_worker: IdleWorker { stream, pid, worker_dir },
|
||||
}),
|
||||
Err(worker_error) => Err(worker_error.into()),
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
///
|
||||
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
|
||||
/// by [`with_worker_dir_setup`].
|
||||
async fn handle_result(
|
||||
worker_result: Result<WorkerResponse, WorkerError>,
|
||||
worker_pid: u32,
|
||||
execution_timeout: Duration,
|
||||
) -> Result<WorkerResponse, WorkerError> {
|
||||
if let Ok(WorkerResponse { duration, .. }) = worker_result {
|
||||
if duration > execution_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid,
|
||||
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
|
||||
duration.as_millis(),
|
||||
execution_timeout.as_millis(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
return Err(WorkerError::JobTimedOut);
|
||||
}
|
||||
}
|
||||
|
||||
worker_result
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
pid: u32,
|
||||
artifact_path: &Path,
|
||||
f: F,
|
||||
) -> Result<Response, Error>
|
||||
where
|
||||
Fut: futures::Future<Output = Result<Response, Error>>,
|
||||
F: FnOnce(WorkerDir) -> Fut,
|
||||
{
|
||||
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
|
||||
// worker cache, and the child can't access any other artifacts or gain any information from the
|
||||
// original filename.
|
||||
let link_path = worker_dir::execute_artifact(worker_dir.path());
|
||||
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to clear worker cache after the job: {}",
|
||||
err,
|
||||
);
|
||||
return Err(InternalValidationError::CouldNotCreateLink(format!("{:?}", err)).into());
|
||||
}
|
||||
|
||||
let worker_dir_path = worker_dir.path().to_owned();
|
||||
let result = f(worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Err(InternalValidationError::CouldNotClearWorkerDir {
|
||||
err: format!("{:?}", err),
|
||||
path: worker_dir_path.to_str().map(String::from),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Sends a handshake with information specific to the execute worker.
|
||||
async fn send_execute_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Result<()> {
|
||||
framed_send(stream, &handshake.encode()).await
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
pvd: Arc<PersistedValidationData>,
|
||||
pov: Arc<PoV>,
|
||||
execution_timeout: Duration,
|
||||
artifact_checksum: ArtifactChecksum,
|
||||
) -> io::Result<()> {
|
||||
let request = pezkuwi_node_core_pvf_common::execute::ExecuteRequest {
|
||||
pvd: (*pvd).clone(),
|
||||
pov: (*pov).clone(),
|
||||
execution_timeout,
|
||||
artifact_checksum,
|
||||
};
|
||||
framed_send(stream, &request.encode()).await
|
||||
}
|
||||
|
||||
async fn recv_result(stream: &mut UnixStream) -> io::Result<Result<WorkerResponse, WorkerError>> {
|
||||
let result_bytes = framed_recv(stream).await?;
|
||||
Result::<WorkerResponse, WorkerError>::decode(&mut result_bytes.as_slice()).map_err(|e| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("execute pvf recv_result: decode error: {:?}", e),
|
||||
)
|
||||
})
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,157 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
//! The PVF validation host. Responsible for coordinating preparation and execution of PVFs.
|
||||
//!
|
||||
//! For more background, refer to the Implementer's Guide: [PVF
|
||||
//! Pre-checking](https://docs.pezkuwichain.io/sdk/book/pvf-prechecking.html), [Candidate
|
||||
//! Validation](https://docs.pezkuwichain.io/sdk/book/node/utility/candidate-validation.html)
|
||||
//! and [PVF Host and Workers](https://docs.pezkuwichain.io/sdk/book/node/utility/pvf-host-and-workers.html).
|
||||
//!
|
||||
//!
|
||||
//! # Entrypoint
|
||||
//!
|
||||
//! This crate provides a simple API. You first [`start`] the validation host, which gives you the
|
||||
//! [handle][`ValidationHost`] and the future you need to poll.
|
||||
//!
|
||||
//! Then using the handle the client can send three types of requests:
|
||||
//!
|
||||
//! (a) PVF pre-checking. This takes the `Pvf` code and tries to prepare it (verify and
|
||||
//! compile) in order to pre-check its validity.
|
||||
//!
|
||||
//! (b) PVF execution. This accepts the PVF
|
||||
//! [`params`][`pezkuwi_teyrchain_primitives::primitives::ValidationParams`] and the `Pvf`
|
||||
//! code, prepares (verifies and compiles) the code, and then executes PVF with the `params`.
|
||||
//!
|
||||
//! (c) Heads up. This request allows to signal that the given PVF may be needed soon and that it
|
||||
//! should be prepared for execution.
|
||||
//!
|
||||
//! The preparation results are cached for some time after they either used or was signaled in heads
|
||||
//! up. All requests that depends on preparation of the same PVF are bundled together and will be
|
||||
//! executed as soon as the artifact is prepared.
|
||||
//!
|
||||
//! # Priority
|
||||
//!
|
||||
//! PVF execution requests can specify the [priority][`Priority`] with which the given request
|
||||
//! should be handled. Different priority levels have different effects. This is discussed below.
|
||||
//!
|
||||
//! Preparation started by a heads up signal always starts with the background priority. If there
|
||||
//! is already a request for that PVF preparation under way the priority is inherited. If after
|
||||
//! heads up, a new PVF execution request comes in with a higher priority, then the original task's
|
||||
//! priority will be adjusted to match the new one if it's larger.
|
||||
//!
|
||||
//! Priority can never go down, only up.
|
||||
//!
|
||||
//! # Under the hood
|
||||
//!
|
||||
//! ## The flow
|
||||
//!
|
||||
//! Under the hood, the validation host is built using a bunch of communicating processes, not
|
||||
//! dissimilar to actors. Each of such "processes" is a future task that contains an event loop that
|
||||
//! processes incoming messages, potentially delegating sub-tasks to other "processes".
|
||||
//!
|
||||
//! Two of these processes are queues. The first one is for preparation jobs and the second one is
|
||||
//! for execution. Both of the queues are backed by separate pools of workers of different kind.
|
||||
//!
|
||||
//! Preparation workers handle preparation requests by prevalidating and instrumenting PVF wasm
|
||||
//! code, and then passing it into the compiler, to prepare the artifact.
|
||||
//!
|
||||
//! ## Artifacts
|
||||
//!
|
||||
//! An artifact is the final product of preparation. If the preparation succeeded, then the artifact
|
||||
//! will contain the compiled code usable for quick execution by a worker later on. If the
|
||||
//! preparation failed, then no artifact is created.
|
||||
//!
|
||||
//! The artifact is saved on disk and is also tracked by an in memory table. This in memory table
|
||||
//! doesn't contain the artifact contents though, only a flag for the state of the given artifact
|
||||
//! and some associated data. If the artifact failed to process, this also includes the error.
|
||||
//!
|
||||
//! A pruning task will run at a fixed interval of time. This task will remove all artifacts that
|
||||
//! weren't used or received a heads up signal for a while.
|
||||
//!
|
||||
//! ## Execution
|
||||
//!
|
||||
//! The execute workers will be fed by the requests from the execution queue, which is basically a
|
||||
//! combination of a path to the compiled artifact and the
|
||||
//! [`params`][`pezkuwi_teyrchain_primitives::primitives::ValidationParams`].
|
||||
|
||||
mod artifacts;
|
||||
mod error;
|
||||
mod execute;
|
||||
mod host;
|
||||
mod metrics;
|
||||
mod prepare;
|
||||
mod priority;
|
||||
#[cfg(target_os = "linux")]
|
||||
mod security;
|
||||
mod worker_interface;
|
||||
|
||||
#[cfg(feature = "test-utils")]
|
||||
pub mod testing;
|
||||
|
||||
pub use error::{InvalidCandidate, PossiblyInvalidError, ValidationError};
|
||||
pub use host::{
|
||||
start, Config, ValidationHost, EXECUTE_BINARY_NAME, HOST_MESSAGE_QUEUE_SIZE,
|
||||
PREPARE_BINARY_NAME,
|
||||
};
|
||||
pub use metrics::Metrics;
|
||||
pub use priority::Priority;
|
||||
pub use worker_interface::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR};
|
||||
|
||||
// Re-export some common types.
|
||||
pub use pezkuwi_node_core_pvf_common::{
|
||||
error::{InternalValidationError, PrepareError},
|
||||
prepare::{PrepareJobKind, PrepareStats},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
|
||||
use std::{path::Path, process::Command};
|
||||
|
||||
/// The log target for this crate.
|
||||
pub const LOG_TARGET: &str = "teyrchain::pvf";
|
||||
|
||||
/// Utility to get the version of a worker, used for version checks.
|
||||
///
|
||||
/// The worker's existence at the given path must be checked separately.
|
||||
pub fn get_worker_version(worker_path: &Path) -> std::io::Result<String> {
|
||||
let worker_version = Command::new(worker_path).args(["--version"]).output()?.stdout;
|
||||
Ok(std::str::from_utf8(&worker_version)
|
||||
.expect("version is printed as a string; qed")
|
||||
.trim()
|
||||
.to_string())
|
||||
}
|
||||
|
||||
// Trying to run securely and some mandatory errors occurred.
|
||||
pub(crate) const SECURE_MODE_ERROR: &'static str =
|
||||
"🚨 Your system cannot securely run a validator. \
|
||||
\nRunning validation of malicious PVF code has a higher risk of compromising this machine.";
|
||||
// Some errors occurred when running insecurely, or some optional errors occurred when running
|
||||
// securely.
|
||||
pub(crate) const SECURE_MODE_WARNING: &'static str = "🚨 Some security issues have been detected. \
|
||||
\nRunning validation of malicious PVF code has a higher risk of compromising this machine.";
|
||||
// Message to be printed only when running securely and mandatory errors occurred.
|
||||
pub(crate) const IGNORE_SECURE_MODE_TIP: &'static str =
|
||||
"\nYou can ignore this error with the `--insecure-validator-i-know-what-i-do` \
|
||||
command line argument if you understand and accept the risks of running insecurely. \
|
||||
With this flag, security features are enabled on a best-effort basis, but not mandatory. \
|
||||
\nMore information: https://docs.pezkuwichain.io/infrastructure/running-a-validator/operational-tasks/general-management/#secure-your-validator";
|
||||
// Only Linux supports security features
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub(crate) const SECURE_LINUX_NOTE: &'static str = "\nSecure mode is enabled only for Linux \
|
||||
\nand a full secure mode is enabled only for Linux x86-64.";
|
||||
@@ -0,0 +1,436 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Prometheus metrics related to the validation host.
|
||||
|
||||
use pezkuwi_node_core_pvf_common::prepare::MemoryStats;
|
||||
use pezkuwi_node_metrics::metrics::{self, prometheus};
|
||||
use pezkuwi_node_subsystem::messages::PvfExecKind;
|
||||
|
||||
/// Validation host metrics.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Metrics(Option<MetricsInner>);
|
||||
|
||||
impl Metrics {
|
||||
/// Returns a handle to submit prepare workers metrics.
|
||||
pub(crate) fn prepare_worker(&'_ self) -> WorkerRelatedMetrics<'_> {
|
||||
WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Prepare }
|
||||
}
|
||||
|
||||
/// Returns a handle to submit execute workers metrics.
|
||||
pub(crate) fn execute_worker(&'_ self) -> WorkerRelatedMetrics<'_> {
|
||||
WorkerRelatedMetrics { metrics: self, flavor: WorkerFlavor::Execute }
|
||||
}
|
||||
|
||||
/// When preparation pipeline had a new item enqueued.
|
||||
pub(crate) fn prepare_enqueued(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.prepare_enqueued.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// When preparation pipeline concluded working on an item.
|
||||
pub(crate) fn prepare_concluded(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.prepare_concluded.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// When execution pipeline had a new item enqueued.
|
||||
pub(crate) fn execute_enqueued(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.execute_enqueued.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// When execution pipeline finished executing a request.
|
||||
pub(crate) fn execute_finished(&self) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.execute_finished.inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// Time between sending preparation request to a worker to having the response.
|
||||
pub(crate) fn time_preparation(
|
||||
&self,
|
||||
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.preparation_time.start_timer())
|
||||
}
|
||||
|
||||
/// Time between sending execution request to a worker to having the response.
|
||||
pub(crate) fn time_execution(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
|
||||
self.0.as_ref().map(|metrics| metrics.execution_time.start_timer())
|
||||
}
|
||||
|
||||
pub(crate) fn observe_execution_queued_time(&self, queued_for_millis: u32) {
|
||||
self.0.as_ref().map(|metrics| {
|
||||
metrics.execution_queued_time.observe(queued_for_millis as f64 / 1000 as f64)
|
||||
});
|
||||
}
|
||||
|
||||
/// Observe memory stats for preparation.
|
||||
#[allow(unused_variables)]
|
||||
pub(crate) fn observe_preparation_memory_metrics(&self, memory_stats: MemoryStats) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
#[cfg(target_os = "linux")]
|
||||
if let Some(max_rss) = memory_stats.max_rss {
|
||||
metrics.preparation_max_rss.observe(max_rss as f64);
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
if let Some(tracker_stats) = memory_stats.memory_tracker_stats {
|
||||
// We convert these stats from B to KB to match the unit of `ru_maxrss` from
|
||||
// `getrusage`.
|
||||
let max_resident_kb = (tracker_stats.resident / 1024) as f64;
|
||||
let max_allocated_kb = (tracker_stats.allocated / 1024) as f64;
|
||||
|
||||
metrics.preparation_max_resident.observe(max_resident_kb);
|
||||
metrics.preparation_max_allocated.observe(max_allocated_kb);
|
||||
}
|
||||
|
||||
metrics
|
||||
.preparation_peak_tracked_allocation
|
||||
.observe((memory_stats.peak_tracked_alloc / 1024) as f64);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn observe_code_size(&self, code_size: usize) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.code_size.observe(code_size as f64);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn observe_pov_size(&self, pov_size: usize, compressed: bool) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics
|
||||
.pov_size
|
||||
.with_label_values(&[if compressed { "true" } else { "false" }])
|
||||
.observe(pov_size as f64);
|
||||
}
|
||||
}
|
||||
|
||||
/// When preparation pipeline concluded working on an item.
|
||||
pub(crate) fn on_execute_kind(&self, kind: PvfExecKind) {
|
||||
if let Some(metrics) = &self.0 {
|
||||
metrics.exec_kind_selected.with_label_values(&[kind.as_str()]).inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
worker_spawning: prometheus::CounterVec<prometheus::U64>,
|
||||
worker_spawned: prometheus::CounterVec<prometheus::U64>,
|
||||
worker_retired: prometheus::CounterVec<prometheus::U64>,
|
||||
prepare_enqueued: prometheus::Counter<prometheus::U64>,
|
||||
prepare_concluded: prometheus::Counter<prometheus::U64>,
|
||||
execute_enqueued: prometheus::Counter<prometheus::U64>,
|
||||
execute_finished: prometheus::Counter<prometheus::U64>,
|
||||
preparation_time: prometheus::Histogram,
|
||||
execution_time: prometheus::Histogram,
|
||||
execution_queued_time: prometheus::Histogram,
|
||||
#[cfg(target_os = "linux")]
|
||||
preparation_max_rss: prometheus::Histogram,
|
||||
// Max. allocated memory, tracked by Jemallocator, polling-based
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
preparation_max_allocated: prometheus::Histogram,
|
||||
// Max. resident memory, tracked by Jemallocator, polling-based
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
preparation_max_resident: prometheus::Histogram,
|
||||
// Peak allocation value, tracked by tracking-allocator
|
||||
preparation_peak_tracked_allocation: prometheus::Histogram,
|
||||
pov_size: prometheus::HistogramVec,
|
||||
code_size: prometheus::Histogram,
|
||||
exec_kind_selected: prometheus::CounterVec<prometheus::U64>,
|
||||
}
|
||||
|
||||
impl metrics::Metrics for Metrics {
|
||||
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
|
||||
let inner = MetricsInner {
|
||||
worker_spawning: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_pvf_worker_spawning",
|
||||
"The total number of workers began to spawn",
|
||||
),
|
||||
&["flavor"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
worker_spawned: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_pvf_worker_spawned",
|
||||
"The total number of workers spawned successfully",
|
||||
),
|
||||
&["flavor"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
worker_retired: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_pvf_worker_retired",
|
||||
"The total number of workers retired, either killed by the host or died on duty",
|
||||
),
|
||||
&["flavor"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
prepare_enqueued: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_pvf_prepare_enqueued",
|
||||
"The total number of jobs enqueued into the preparation pipeline"
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
prepare_concluded: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_pvf_prepare_concluded",
|
||||
"The total number of jobs concluded in the preparation pipeline"
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
execute_enqueued: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_pvf_execute_enqueued",
|
||||
"The total number of jobs enqueued into the execution pipeline"
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
execute_finished: prometheus::register(
|
||||
prometheus::Counter::new(
|
||||
"pezkuwi_pvf_execute_finished",
|
||||
"The total number of jobs done in the execution pipeline"
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
preparation_time: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_preparation_time",
|
||||
"Time spent in preparing PVF artifacts in seconds",
|
||||
)
|
||||
.buckets(vec![
|
||||
// This is synchronized with the `DEFAULT_PRECHECK_PREPARATION_TIMEOUT=60s`
|
||||
// and `DEFAULT_LENIENT_PREPARATION_TIMEOUT=360s` constants found in
|
||||
// node/core/candidate-validation/src/lib.rs
|
||||
0.1,
|
||||
0.5,
|
||||
1.0,
|
||||
2.0,
|
||||
3.0,
|
||||
10.0,
|
||||
20.0,
|
||||
30.0,
|
||||
60.0,
|
||||
120.0,
|
||||
240.0,
|
||||
360.0,
|
||||
480.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
execution_time: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_execution_time",
|
||||
"Time spent in executing PVFs",
|
||||
).buckets(vec![
|
||||
// This is synchronized with `DEFAULT_APPROVAL_EXECUTION_TIMEOUT` and
|
||||
// `DEFAULT_BACKING_EXECUTION_TIMEOUT` constants in
|
||||
// node/core/candidate-validation/src/lib.rs
|
||||
0.01,
|
||||
0.025,
|
||||
0.05,
|
||||
0.1,
|
||||
0.25,
|
||||
0.5,
|
||||
1.0,
|
||||
2.0,
|
||||
3.0,
|
||||
4.0,
|
||||
5.0,
|
||||
6.0,
|
||||
8.0,
|
||||
10.0,
|
||||
12.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
execution_queued_time: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_execution_queued_time",
|
||||
"Time spent in queue waiting for PVFs execution job to be assigned",
|
||||
).buckets(vec![
|
||||
0.01,
|
||||
0.025,
|
||||
0.05,
|
||||
0.1,
|
||||
0.25,
|
||||
0.5,
|
||||
1.0,
|
||||
2.0,
|
||||
3.0,
|
||||
4.0,
|
||||
5.0,
|
||||
6.0,
|
||||
12.0,
|
||||
24.0,
|
||||
48.0,
|
||||
]),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
#[cfg(target_os = "linux")]
|
||||
preparation_max_rss: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_preparation_max_rss",
|
||||
"ru_maxrss (maximum resident set size) observed for preparation (in kilobytes)",
|
||||
).buckets(
|
||||
prometheus::exponential_buckets(8192.0, 2.0, 10)
|
||||
.expect("arguments are always valid; qed"),
|
||||
),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
preparation_max_resident: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_preparation_max_resident",
|
||||
"max resident memory observed for preparation (in kilobytes)",
|
||||
).buckets(
|
||||
prometheus::exponential_buckets(8192.0, 2.0, 10)
|
||||
.expect("arguments are always valid; qed"),
|
||||
),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
|
||||
preparation_max_allocated: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_preparation_max_allocated",
|
||||
"max allocated memory observed for preparation (in kilobytes)",
|
||||
).buckets(
|
||||
prometheus::exponential_buckets(8192.0, 2.0, 10)
|
||||
.expect("arguments are always valid; qed"),
|
||||
),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
preparation_peak_tracked_allocation: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_pvf_preparation_peak_tracked_allocation",
|
||||
"peak allocation observed for preparation (in kilobytes)",
|
||||
).buckets(
|
||||
prometheus::exponential_buckets(8192.0, 2.0, 10)
|
||||
.expect("arguments are always valid; qed"),
|
||||
),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
// The following metrics was moved here from the candidate valiidation subsystem.
|
||||
// Names are kept to avoid breaking dashboards and stuff.
|
||||
pov_size: prometheus::register(
|
||||
prometheus::HistogramVec::new(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_candidate_validation_pov_size",
|
||||
"The compressed and decompressed size of the proof of validity of a candidate",
|
||||
)
|
||||
.buckets(
|
||||
prometheus::exponential_buckets(16384.0, 2.0, 10)
|
||||
.expect("arguments are always valid; qed"),
|
||||
),
|
||||
&["compressed"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
code_size: prometheus::register(
|
||||
prometheus::Histogram::with_opts(
|
||||
prometheus::HistogramOpts::new(
|
||||
"pezkuwi_teyrchain_candidate_validation_code_size",
|
||||
"The size of the decompressed WASM validation blob used for checking a candidate",
|
||||
)
|
||||
.buckets(
|
||||
prometheus::exponential_buckets(16384.0, 2.0, 10)
|
||||
.expect("arguments are always valid; qed"),
|
||||
),
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
exec_kind_selected: prometheus::register(
|
||||
prometheus::CounterVec::new(
|
||||
prometheus::Opts::new(
|
||||
"pezkuwi_pvf_exec_kind_selected",
|
||||
"The total number of selected execute kinds",
|
||||
),
|
||||
&["priority"],
|
||||
)?,
|
||||
registry,
|
||||
)?,
|
||||
};
|
||||
Ok(Metrics(Some(inner)))
|
||||
}
|
||||
}
|
||||
|
||||
enum WorkerFlavor {
|
||||
Prepare,
|
||||
Execute,
|
||||
}
|
||||
|
||||
impl WorkerFlavor {
|
||||
fn as_label(&self) -> &'static str {
|
||||
match *self {
|
||||
WorkerFlavor::Prepare => "prepare",
|
||||
WorkerFlavor::Execute => "execute",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct WorkerRelatedMetrics<'a> {
|
||||
metrics: &'a Metrics,
|
||||
flavor: WorkerFlavor,
|
||||
}
|
||||
|
||||
impl<'a> WorkerRelatedMetrics<'a> {
|
||||
/// When the spawning of a worker started.
|
||||
pub(crate) fn on_begin_spawn(&self) {
|
||||
if let Some(metrics) = &self.metrics.0 {
|
||||
metrics.worker_spawning.with_label_values(&[self.flavor.as_label()]).inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// When the worker successfully spawned.
|
||||
pub(crate) fn on_spawned(&self) {
|
||||
if let Some(metrics) = &self.metrics.0 {
|
||||
metrics.worker_spawned.with_label_values(&[self.flavor.as_label()]).inc();
|
||||
}
|
||||
}
|
||||
|
||||
/// When the worker was killed or died.
|
||||
pub(crate) fn on_retired(&self) {
|
||||
if let Some(metrics) = &self.metrics.0 {
|
||||
metrics.worker_retired.with_label_values(&[self.flavor.as_label()]).inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Preparation part of pipeline
|
||||
//!
|
||||
//! The validation host spins up two processes: the queue (by running [`start_queue`]) and the pool
|
||||
//! (by running [`start_pool`]).
|
||||
//!
|
||||
//! The pool will spawn workers in new processes and those should execute pass control to
|
||||
//! `pezkuwi_node_core_pvf_worker::prepare_worker_entrypoint`.
|
||||
|
||||
mod pool;
|
||||
mod queue;
|
||||
mod worker_interface;
|
||||
|
||||
pub use pool::start as start_pool;
|
||||
pub use queue::{start as start_queue, FromQueue, ToQueue};
|
||||
@@ -0,0 +1,520 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::worker_interface::{self, Outcome};
|
||||
use crate::{
|
||||
metrics::Metrics,
|
||||
worker_interface::{IdleWorker, WorkerHandle},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use always_assert::never;
|
||||
use futures::{
|
||||
channel::mpsc, future::BoxFuture, stream::FuturesUnordered, Future, FutureExt, StreamExt,
|
||||
};
|
||||
use pezkuwi_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
pvf::PvfPrepData,
|
||||
SecurityStatus,
|
||||
};
|
||||
use slotmap::HopSlotMap;
|
||||
use std::{
|
||||
fmt,
|
||||
path::{Path, PathBuf},
|
||||
task::Poll,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
slotmap::new_key_type! { pub struct Worker; }
|
||||
|
||||
/// Messages that the pool handles.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum ToPool {
|
||||
/// Request a new worker to spawn.
|
||||
///
|
||||
/// This request won't fail in case if the worker cannot be created. Instead, we consider
|
||||
/// the failures transient and we try to spawn a worker after a delay.
|
||||
///
|
||||
/// [`FromPool::Spawned`] will be returned as soon as the worker is spawned.
|
||||
///
|
||||
/// The client should anticipate a [`FromPool::Rip`] message, in case the spawned worker was
|
||||
/// stopped for some reason.
|
||||
Spawn,
|
||||
|
||||
/// Kill the given worker. No-op if the given worker is not running.
|
||||
///
|
||||
/// [`FromPool::Rip`] won't be sent in this case. However, the client should be prepared to
|
||||
/// receive [`FromPool::Rip`] nonetheless, since the worker may be have been ripped before
|
||||
/// this message is processed.
|
||||
Kill(Worker),
|
||||
|
||||
/// Request the given worker to start working on the given code.
|
||||
///
|
||||
/// Once the job either succeeded or failed, a [`FromPool::Concluded`] message will be sent
|
||||
/// back. It's also possible that the worker dies before handling the message in which case
|
||||
/// [`FromPool::Rip`] will be sent back.
|
||||
///
|
||||
/// In either case, the worker is considered busy and no further `StartWork` messages should be
|
||||
/// sent until either `Concluded` or `Rip` message is received.
|
||||
StartWork { worker: Worker, pvf: PvfPrepData, cache_path: PathBuf },
|
||||
}
|
||||
|
||||
/// A message sent from pool to its client.
|
||||
#[derive(Debug)]
|
||||
pub enum FromPool {
|
||||
/// The given worker was just spawned and is ready to be used.
|
||||
Spawned(Worker),
|
||||
|
||||
/// The given worker either succeeded or failed the given job.
|
||||
Concluded {
|
||||
/// A key for retrieving the worker data from the pool.
|
||||
worker: Worker,
|
||||
/// Indicates whether the worker process was killed.
|
||||
rip: bool,
|
||||
/// [`Ok`] indicates that compiled artifact is successfully stored on disk.
|
||||
/// Otherwise, an [error](PrepareError) is supplied.
|
||||
result: PrepareResult,
|
||||
},
|
||||
|
||||
/// The given worker ceased to exist.
|
||||
Rip(Worker),
|
||||
}
|
||||
|
||||
struct WorkerData {
|
||||
idle: Option<IdleWorker>,
|
||||
handle: WorkerHandle,
|
||||
}
|
||||
|
||||
impl fmt::Debug for WorkerData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WorkerData(pid={})", self.handle.id())
|
||||
}
|
||||
}
|
||||
|
||||
enum PoolEvent {
|
||||
Spawn(IdleWorker, WorkerHandle),
|
||||
StartWork(Worker, Outcome),
|
||||
}
|
||||
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, PoolEvent>>;
|
||||
|
||||
struct Pool {
|
||||
// Some variables related to the current session.
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
|
||||
to_pool: mpsc::Receiver<ToPool>,
|
||||
from_pool: mpsc::UnboundedSender<FromPool>,
|
||||
spawned: HopSlotMap<Worker, WorkerData>,
|
||||
mux: Mux,
|
||||
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
/// A fatal error that warrants stopping the event loop of the pool.
|
||||
struct Fatal;
|
||||
|
||||
async fn run(
|
||||
Pool {
|
||||
program_path,
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool,
|
||||
mut from_pool,
|
||||
mut spawned,
|
||||
mut mux,
|
||||
metrics,
|
||||
}: Pool,
|
||||
) {
|
||||
macro_rules! break_if_fatal {
|
||||
($expr:expr) => {
|
||||
match $expr {
|
||||
Err(Fatal) => break,
|
||||
Ok(v) => v,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let mut to_pool = to_pool.fuse();
|
||||
|
||||
loop {
|
||||
futures::select! {
|
||||
to_pool = to_pool.next() => {
|
||||
let to_pool = break_if_fatal!(to_pool.ok_or(Fatal));
|
||||
handle_to_pool(
|
||||
&metrics,
|
||||
&program_path,
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.clone(),
|
||||
security_status.clone(),
|
||||
&mut spawned,
|
||||
&mut mux,
|
||||
to_pool,
|
||||
)
|
||||
}
|
||||
ev = mux.select_next_some() => {
|
||||
break_if_fatal!(handle_mux(&metrics, &mut from_pool, &mut spawned, ev))
|
||||
}
|
||||
}
|
||||
|
||||
break_if_fatal!(purge_dead(&metrics, &mut from_pool, &mut spawned).await);
|
||||
}
|
||||
}
|
||||
|
||||
async fn purge_dead(
|
||||
metrics: &Metrics,
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
) -> Result<(), Fatal> {
|
||||
let mut to_remove = vec![];
|
||||
for (worker, data) in spawned.iter_mut() {
|
||||
if data.idle.is_none() {
|
||||
// The idle token is missing, meaning this worker is now occupied: skip it. This is
|
||||
// because the worker process is observed by the work task and should it reach the
|
||||
// deadline or be terminated it will be handled by the corresponding mux event.
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Poll::Ready(()) = futures::poll!(&mut data.handle) {
|
||||
// a resolved future means that the worker has terminated. Weed it out.
|
||||
to_remove.push(worker);
|
||||
}
|
||||
}
|
||||
for w in to_remove {
|
||||
if attempt_retire(metrics, spawned, w) {
|
||||
reply(from_pool, FromPool::Rip(w))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_to_pool(
|
||||
metrics: &Metrics,
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
mux: &mut Mux,
|
||||
to_pool: ToPool,
|
||||
) {
|
||||
match to_pool {
|
||||
ToPool::Spawn => {
|
||||
gum::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
metrics.prepare_worker().on_begin_spawn();
|
||||
mux.push(
|
||||
spawn_worker_task(
|
||||
program_path.to_owned(),
|
||||
cache_path.to_owned(),
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
},
|
||||
ToPool::StartWork { worker, pvf, cache_path } => {
|
||||
if let Some(data) = spawned.get_mut(worker) {
|
||||
if let Some(idle) = data.idle.take() {
|
||||
let preparation_timer = metrics.time_preparation();
|
||||
mux.push(
|
||||
start_work_task(
|
||||
metrics.clone(),
|
||||
worker,
|
||||
idle,
|
||||
pvf,
|
||||
cache_path,
|
||||
preparation_timer,
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
} else {
|
||||
// idle token is present after spawn and after a job is concluded;
|
||||
// the precondition for `StartWork` is it should be sent only if all previous
|
||||
// work items concluded;
|
||||
// thus idle token is Some;
|
||||
// qed.
|
||||
never!("unexpected absence of the idle token in prepare pool");
|
||||
}
|
||||
} else {
|
||||
// That's a relatively normal situation since the queue may send `start_work` and
|
||||
// before receiving it the pool would report that the worker died.
|
||||
}
|
||||
},
|
||||
ToPool::Kill(worker) => {
|
||||
gum::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
|
||||
// It may be absent if it were previously already removed by `purge_dead`.
|
||||
let _ = attempt_retire(metrics, spawned, worker);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn spawn_worker_task(
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> PoolEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match worker_interface::spawn(
|
||||
&program_path,
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
node_version.as_deref(),
|
||||
security_status.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err);
|
||||
|
||||
// Assume that the failure intermittent and retry after a delay.
|
||||
Delay::new(Duration::from_secs(3)).await;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_work_task<Timer>(
|
||||
metrics: Metrics,
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: PathBuf,
|
||||
_preparation_timer: Option<Timer>,
|
||||
) -> PoolEvent {
|
||||
let outcome = worker_interface::start_work(&metrics, idle, pvf, cache_path).await;
|
||||
PoolEvent::StartWork(worker, outcome)
|
||||
}
|
||||
|
||||
fn handle_mux(
|
||||
metrics: &Metrics,
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
event: PoolEvent,
|
||||
) -> Result<(), Fatal> {
|
||||
match event {
|
||||
PoolEvent::Spawn(idle, handle) => {
|
||||
metrics.prepare_worker().on_spawned();
|
||||
|
||||
let worker = spawned.insert(WorkerData { idle: Some(idle), handle });
|
||||
|
||||
reply(from_pool, FromPool::Spawned(worker))?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
PoolEvent::StartWork(worker, outcome) => {
|
||||
// If we receive an outcome that the worker is unreachable or that an error occurred on
|
||||
// the worker, we attempt to kill the worker process.
|
||||
match outcome {
|
||||
Outcome::Concluded { worker: idle, result } =>
|
||||
handle_concluded_no_rip(from_pool, spawned, worker, idle, result),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::CreateTmpFileErr { worker: idle, err } => handle_concluded_no_rip(
|
||||
from_pool,
|
||||
spawned,
|
||||
worker,
|
||||
idle,
|
||||
Err(PrepareError::CreateTmpFile(err)),
|
||||
),
|
||||
// Return `Concluded`, but do not kill the worker since the error was on the host
|
||||
// side.
|
||||
Outcome::RenameTmpFile { worker: idle, err, src, dest } => handle_concluded_no_rip(
|
||||
from_pool,
|
||||
spawned,
|
||||
worker,
|
||||
idle,
|
||||
Err(PrepareError::RenameTmpFile { err, src, dest }),
|
||||
),
|
||||
// Could not clear worker cache. Kill the worker so other jobs can't see the data.
|
||||
Outcome::ClearWorkerDir { err } => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::ClearWorkerDir(err)),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::Unreachable => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(from_pool, FromPool::Rip(worker))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::IoErr(err) => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::IoErr(err)),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
// The worker might still be usable, but we kill it just in case.
|
||||
Outcome::JobDied { err, job_pid } => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::JobDied { err, job_pid }),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::TimedOut => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::TimedOut),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::OutOfMemory => {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(
|
||||
from_pool,
|
||||
FromPool::Concluded {
|
||||
worker,
|
||||
rip: true,
|
||||
result: Err(PrepareError::OutOfMemory),
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn reply(from_pool: &mut mpsc::UnboundedSender<FromPool>, m: FromPool) -> Result<(), Fatal> {
|
||||
from_pool.unbounded_send(m).map_err(|_| Fatal)
|
||||
}
|
||||
|
||||
/// Removes the given worker from the registry if it there. This will lead to dropping and hence
|
||||
/// to killing the worker process.
|
||||
///
|
||||
/// Returns `true` if the worker exists and was removed and the process was killed.
|
||||
///
|
||||
/// This function takes care about counting the retired workers metric.
|
||||
fn attempt_retire(
|
||||
metrics: &Metrics,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
worker: Worker,
|
||||
) -> bool {
|
||||
if spawned.remove(worker).is_some() {
|
||||
metrics.prepare_worker().on_retired();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles the case where we received a response. There potentially was an error, but not the fault
|
||||
/// of the worker as far as we know, so the worker should not be killed.
|
||||
///
|
||||
/// This function tries to put the idle worker back into the pool and then replies with
|
||||
/// `FromPool::Concluded` with `rip: false`.
|
||||
fn handle_concluded_no_rip(
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
result: PrepareResult,
|
||||
) -> Result<(), Fatal> {
|
||||
let data = match spawned.get_mut(worker) {
|
||||
None => {
|
||||
// Perhaps the worker was killed meanwhile and the result is no longer relevant. We
|
||||
// already send `Rip` when purging if we detect that the worker is dead.
|
||||
return Ok(());
|
||||
},
|
||||
Some(data) => data,
|
||||
};
|
||||
|
||||
// We just replace the idle worker that was loaned from this option during
|
||||
// the work starting.
|
||||
let old = data.idle.replace(idle);
|
||||
never!(
|
||||
old.is_some(),
|
||||
"old idle worker was taken out when starting work; we only replace it here; qed"
|
||||
);
|
||||
|
||||
reply(from_pool, FromPool::Concluded { worker, rip: false, result })?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Spins up the pool and returns the future that should be polled to make the pool functional.
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<String>,
|
||||
security_status: SecurityStatus,
|
||||
) -> (mpsc::Sender<ToPool>, mpsc::UnboundedReceiver<FromPool>, impl Future<Output = ()>) {
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
|
||||
let run = run(Pool {
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
node_version,
|
||||
security_status,
|
||||
to_pool: to_pool_rx,
|
||||
from_pool: from_pool_tx,
|
||||
spawned: HopSlotMap::with_capacity_and_key(20),
|
||||
mux: Mux::new(),
|
||||
});
|
||||
|
||||
(to_pool_tx, from_pool_rx, run)
|
||||
}
|
||||
@@ -0,0 +1,796 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! A queue that handles requests for PVF preparation.
|
||||
|
||||
use super::pool::{self, Worker};
|
||||
use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, LOG_TARGET};
|
||||
use always_assert::{always, never};
|
||||
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
|
||||
use pezkuwi_node_core_pvf_common::{error::PrepareResult, pvf::PvfPrepData};
|
||||
use std::{
|
||||
collections::{HashMap, VecDeque},
|
||||
path::PathBuf,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
use std::time::Duration;
|
||||
|
||||
/// A request to pool.
|
||||
#[derive(Debug)]
|
||||
pub enum ToQueue {
|
||||
/// This schedules preparation of the given PVF.
|
||||
///
|
||||
/// Note that it is incorrect to enqueue the same PVF again without first receiving the
|
||||
/// [`FromQueue`] response.
|
||||
Enqueue { priority: Priority, pvf: PvfPrepData },
|
||||
}
|
||||
|
||||
/// A response from queue.
|
||||
#[derive(Debug)]
|
||||
pub struct FromQueue {
|
||||
/// Identifier of an artifact.
|
||||
pub(crate) artifact_id: ArtifactId,
|
||||
/// Outcome of the PVF processing. [`Ok`] indicates that compiled artifact
|
||||
/// is successfully stored on disk. Otherwise, an
|
||||
/// [error](pezkuwi_node_core_pvf_common::error::PrepareError) is supplied.
|
||||
pub(crate) result: PrepareResult,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Limits {
|
||||
/// The maximum number of workers this pool can ever host. This is expected to be a small
|
||||
/// number, e.g. within a dozen.
|
||||
hard_capacity: usize,
|
||||
|
||||
/// The number of workers we want aim to have. If there is a critical job and we are already
|
||||
/// at `soft_capacity`, we are allowed to grow up to `hard_capacity`. Thus this should be equal
|
||||
/// or smaller than `hard_capacity`.
|
||||
soft_capacity: usize,
|
||||
}
|
||||
|
||||
impl Limits {
|
||||
/// Returns `true` if the queue is allowed to request one more worker.
|
||||
fn can_afford_one_more(&self, spawned_num: usize, critical: bool) -> bool {
|
||||
let cap = if critical { self.hard_capacity } else { self.soft_capacity };
|
||||
spawned_num < cap
|
||||
}
|
||||
|
||||
/// Offer the worker back to the pool. The passed worker ID must be considered unusable unless
|
||||
/// it wasn't taken by the pool, in which case it will be returned as `Some`.
|
||||
fn should_cull(&mut self, spawned_num: usize) -> bool {
|
||||
spawned_num > self.soft_capacity
|
||||
}
|
||||
}
|
||||
|
||||
slotmap::new_key_type! { pub struct Job; }
|
||||
|
||||
struct JobData {
|
||||
/// The priority of this job. Can be bumped.
|
||||
priority: Priority,
|
||||
pvf: PvfPrepData,
|
||||
worker: Option<Worker>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct WorkerData {
|
||||
job: Option<Job>,
|
||||
}
|
||||
|
||||
impl WorkerData {
|
||||
fn is_idle(&self) -> bool {
|
||||
self.job.is_none()
|
||||
}
|
||||
}
|
||||
|
||||
/// A queue structured like this is prone to starving, however, we don't care that much since we
|
||||
/// expect there is going to be a limited number of critical jobs and we don't really care if
|
||||
/// background starve.
|
||||
#[derive(Default)]
|
||||
struct Unscheduled {
|
||||
normal: VecDeque<Job>,
|
||||
critical: VecDeque<Job>,
|
||||
}
|
||||
|
||||
impl Unscheduled {
|
||||
fn queue_mut(&mut self, prio: Priority) -> &mut VecDeque<Job> {
|
||||
match prio {
|
||||
Priority::Normal => &mut self.normal,
|
||||
Priority::Critical => &mut self.critical,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, prio: Priority, job: Job) {
|
||||
self.queue_mut(prio).push_back(job);
|
||||
}
|
||||
|
||||
fn readd(&mut self, prio: Priority, job: Job) {
|
||||
self.queue_mut(prio).push_front(job);
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.normal.is_empty() && self.critical.is_empty()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Option<Job> {
|
||||
let mut check = |prio: Priority| self.queue_mut(prio).pop_front();
|
||||
check(Priority::Critical).or_else(|| check(Priority::Normal))
|
||||
}
|
||||
}
|
||||
|
||||
struct Queue {
|
||||
metrics: Metrics,
|
||||
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
from_queue_tx: mpsc::UnboundedSender<FromQueue>,
|
||||
|
||||
to_pool_tx: mpsc::Sender<pool::ToPool>,
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
|
||||
cache_path: PathBuf,
|
||||
limits: Limits,
|
||||
|
||||
jobs: slotmap::SlotMap<Job, JobData>,
|
||||
|
||||
/// A mapping from artifact id to a job.
|
||||
artifact_id_to_job: HashMap<ArtifactId, Job>,
|
||||
/// The registry of all workers.
|
||||
workers: slotmap::SparseSecondaryMap<Worker, WorkerData>,
|
||||
/// The number of workers requested to spawn but not yet spawned.
|
||||
spawn_inflight: usize,
|
||||
|
||||
/// The jobs that are not yet scheduled. These are waiting until the next `poll` where they are
|
||||
/// processed all at once.
|
||||
unscheduled: Unscheduled,
|
||||
}
|
||||
|
||||
/// A fatal error that warrants stopping the queue.
|
||||
struct Fatal;
|
||||
|
||||
impl Queue {
|
||||
fn new(
|
||||
metrics: Metrics,
|
||||
soft_capacity: usize,
|
||||
hard_capacity: usize,
|
||||
cache_path: PathBuf,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
from_queue_tx: mpsc::UnboundedSender<FromQueue>,
|
||||
to_pool_tx: mpsc::Sender<pool::ToPool>,
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
to_queue_rx,
|
||||
from_queue_tx,
|
||||
to_pool_tx,
|
||||
from_pool_rx,
|
||||
cache_path,
|
||||
spawn_inflight: 0,
|
||||
limits: Limits { hard_capacity, soft_capacity },
|
||||
jobs: slotmap::SlotMap::with_key(),
|
||||
unscheduled: Unscheduled::default(),
|
||||
artifact_id_to_job: HashMap::new(),
|
||||
workers: slotmap::SparseSecondaryMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(mut self) {
|
||||
macro_rules! break_if_fatal {
|
||||
($expr:expr) => {
|
||||
if let Err(Fatal) = $expr {
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
loop {
|
||||
// biased to make it behave deterministically for tests.
|
||||
futures::select_biased! {
|
||||
to_queue = self.to_queue_rx.select_next_some() =>
|
||||
break_if_fatal!(handle_to_queue(&mut self, to_queue).await),
|
||||
from_pool = self.from_pool_rx.select_next_some() =>
|
||||
break_if_fatal!(handle_from_pool(&mut self, from_pool).await),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fatal> {
|
||||
match to_queue {
|
||||
ToQueue::Enqueue { priority, pvf } => {
|
||||
handle_enqueue(queue, priority, pvf).await?;
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_enqueue(
|
||||
queue: &mut Queue,
|
||||
priority: Priority,
|
||||
pvf: PvfPrepData,
|
||||
) -> Result<(), Fatal> {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?pvf.code_hash(),
|
||||
?priority,
|
||||
preparation_timeout = ?pvf.prep_timeout(),
|
||||
"PVF is enqueued for preparation.",
|
||||
);
|
||||
queue.metrics.prepare_enqueued();
|
||||
|
||||
let artifact_id = ArtifactId::from_pvf_prep_data(&pvf);
|
||||
if never!(
|
||||
queue.artifact_id_to_job.contains_key(&artifact_id),
|
||||
"second Enqueue sent for a known artifact"
|
||||
) {
|
||||
// This function is called in response to a `Enqueue` message;
|
||||
// Precondition for `Enqueue` is that it is sent only once for a PVF;
|
||||
// Thus this should always be `false`;
|
||||
// qed.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"duplicate `enqueue` command received for {:?}",
|
||||
artifact_id,
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let job = queue.jobs.insert(JobData { priority, pvf, worker: None });
|
||||
queue.artifact_id_to_job.insert(artifact_id, job);
|
||||
|
||||
if let Some(available) = find_idle_worker(queue) {
|
||||
// This may seem not fair (w.r.t priority) on the first glance, but it should be. This is
|
||||
// because as soon as a worker finishes with the job it's immediately given the next one.
|
||||
assign(queue, available, job).await?;
|
||||
} else {
|
||||
spawn_extra_worker(queue, priority.is_critical()).await?;
|
||||
queue.unscheduled.add(priority, job);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_idle_worker(queue: &mut Queue) -> Option<Worker> {
|
||||
queue.workers.iter().filter(|(_, data)| data.is_idle()).map(|(k, _)| k).next()
|
||||
}
|
||||
|
||||
async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Result<(), Fatal> {
|
||||
use pool::FromPool;
|
||||
match from_pool {
|
||||
FromPool::Spawned(worker) => handle_worker_spawned(queue, worker).await?,
|
||||
FromPool::Concluded { worker, rip, result } =>
|
||||
handle_worker_concluded(queue, worker, rip, result).await?,
|
||||
FromPool::Rip(worker) => handle_worker_rip(queue, worker).await?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_worker_spawned(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
|
||||
queue.workers.insert(worker, WorkerData::default());
|
||||
queue.spawn_inflight -= 1;
|
||||
|
||||
if let Some(job) = queue.unscheduled.next() {
|
||||
assign(queue, worker, job).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_worker_concluded(
|
||||
queue: &mut Queue,
|
||||
worker: Worker,
|
||||
rip: bool,
|
||||
result: PrepareResult,
|
||||
) -> Result<(), Fatal> {
|
||||
queue.metrics.prepare_concluded();
|
||||
|
||||
macro_rules! never_none {
|
||||
($expr:expr) => {
|
||||
match $expr {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
// Precondition of calling this is that the `$expr` is never none;
|
||||
// Assume the conditions holds, then this never is not hit;
|
||||
// qed.
|
||||
never!("never_none, {}", stringify!($expr));
|
||||
return Ok(());
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Find out on which artifact was the worker working.
|
||||
|
||||
// workers are registered upon spawn and removed in one of the following cases:
|
||||
// 1. received rip signal
|
||||
// 2. received concluded signal with rip=true;
|
||||
// concluded signal only comes from a spawned worker and only once;
|
||||
// rip signal is not sent after conclusion with rip=true;
|
||||
// the worker should be registered;
|
||||
// this can't be None;
|
||||
// qed.
|
||||
let worker_data = never_none!(queue.workers.get_mut(worker));
|
||||
|
||||
// worker_data.job is set only by `assign` and removed only here for a worker;
|
||||
// concluded signal only comes for a worker that was previously assigned and only once;
|
||||
// the worker should have the job;
|
||||
// this can't be None;
|
||||
// qed.
|
||||
let job = never_none!(worker_data.job.take());
|
||||
|
||||
// job_data is inserted upon enqueue and removed only here;
|
||||
// as was established above, this worker was previously `assign`ed to the job;
|
||||
// that implies that the job was enqueued;
|
||||
// conclude signal only comes once;
|
||||
// we are just to remove the job for the first and the only time;
|
||||
// this can't be None;
|
||||
// qed.
|
||||
let job_data = never_none!(queue.jobs.remove(job));
|
||||
let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf);
|
||||
|
||||
queue.artifact_id_to_job.remove(&artifact_id);
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact_id.code_hash,
|
||||
?worker,
|
||||
?rip,
|
||||
"prepare worker concluded",
|
||||
);
|
||||
|
||||
reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result })?;
|
||||
|
||||
// Figure out what to do with the worker.
|
||||
if rip {
|
||||
let worker_data = queue.workers.remove(worker);
|
||||
// worker should exist, it's asserted above;
|
||||
// qed.
|
||||
always!(worker_data.is_some());
|
||||
|
||||
if !queue.unscheduled.is_empty() {
|
||||
// That is unconditionally not critical just to not accidentally fill up
|
||||
// the pool up to the hard cap.
|
||||
spawn_extra_worker(queue, false).await?;
|
||||
}
|
||||
} else if queue.limits.should_cull(queue.workers.len() + queue.spawn_inflight) {
|
||||
// We no longer need services of this worker. Kill it.
|
||||
queue.workers.remove(worker);
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::Kill(worker)).await?;
|
||||
} else {
|
||||
// see if there are more work available and schedule it.
|
||||
if let Some(job) = queue.unscheduled.next() {
|
||||
assign(queue, worker, job).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_worker_rip(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
|
||||
gum::debug!(target: LOG_TARGET, ?worker, "prepare worker ripped");
|
||||
|
||||
let worker_data = queue.workers.remove(worker);
|
||||
if let Some(WorkerData { job: Some(job), .. }) = worker_data {
|
||||
// This is an edge case where the worker ripped after we sent assignment but before it
|
||||
// was received by the pool.
|
||||
let priority = queue.jobs.get(job).map(|data| data.priority).unwrap_or_else(|| {
|
||||
// job is inserted upon enqueue and removed on concluded signal;
|
||||
// this is enclosed in the if statement that narrows the situation to before
|
||||
// conclusion;
|
||||
// that means that the job still exists and is known;
|
||||
// this path cannot be hit;
|
||||
// qed.
|
||||
never!("the job of the ripped worker must be known but it is not");
|
||||
Priority::Normal
|
||||
});
|
||||
queue.unscheduled.readd(priority, job);
|
||||
}
|
||||
|
||||
// If there are still jobs left, spawn another worker to replace the ripped one (but only if it
|
||||
// was indeed removed). That is unconditionally not critical just to not accidentally fill up
|
||||
// the pool up to the hard cap.
|
||||
if worker_data.is_some() && !queue.unscheduled.is_empty() {
|
||||
spawn_extra_worker(queue, false).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Spawns an extra worker if possible.
|
||||
async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fatal> {
|
||||
if queue
|
||||
.limits
|
||||
.can_afford_one_more(queue.workers.len() + queue.spawn_inflight, critical)
|
||||
{
|
||||
queue.spawn_inflight += 1;
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::Spawn).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attaches the work to the given worker telling the poll about the job.
|
||||
async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> {
|
||||
let job_data = &mut queue.jobs[job];
|
||||
job_data.worker = Some(worker);
|
||||
|
||||
queue.workers[worker].job = Some(job);
|
||||
|
||||
send_pool(
|
||||
&mut queue.to_pool_tx,
|
||||
pool::ToPool::StartWork {
|
||||
worker,
|
||||
pvf: job_data.pvf.clone(),
|
||||
cache_path: queue.cache_path.clone(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reply(from_queue_tx: &mut mpsc::UnboundedSender<FromQueue>, m: FromQueue) -> Result<(), Fatal> {
|
||||
from_queue_tx.unbounded_send(m).map_err(|_| {
|
||||
// The host has hung up and thus it's fatal and we should shutdown ourselves.
|
||||
Fatal
|
||||
})
|
||||
}
|
||||
|
||||
async fn send_pool(
|
||||
to_pool_tx: &mut mpsc::Sender<pool::ToPool>,
|
||||
m: pool::ToPool,
|
||||
) -> Result<(), Fatal> {
|
||||
to_pool_tx.send(m).await.map_err(|_| {
|
||||
// The pool has hung up and thus we are no longer are able to fulfill our duties. Shutdown.
|
||||
Fatal
|
||||
})
|
||||
}
|
||||
|
||||
/// Spins up the queue and returns the future that should be polled to make the queue functional.
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
soft_capacity: usize,
|
||||
hard_capacity: usize,
|
||||
cache_path: PathBuf,
|
||||
to_pool_tx: mpsc::Sender<pool::ToPool>,
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
) -> (mpsc::Sender<ToQueue>, mpsc::UnboundedReceiver<FromQueue>, impl Future<Output = ()>) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(150);
|
||||
let (from_queue_tx, from_queue_rx) = mpsc::unbounded();
|
||||
|
||||
let run = Queue::new(
|
||||
metrics,
|
||||
soft_capacity,
|
||||
hard_capacity,
|
||||
cache_path,
|
||||
to_queue_rx,
|
||||
from_queue_tx,
|
||||
to_pool_tx,
|
||||
from_pool_rx,
|
||||
)
|
||||
.run();
|
||||
|
||||
(to_queue_tx, from_queue_rx, run)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::host::tests::TEST_PREPARATION_TIMEOUT;
|
||||
use assert_matches::assert_matches;
|
||||
use futures::{future::BoxFuture, FutureExt};
|
||||
use pezkuwi_node_core_pvf_common::{error::PrepareError, prepare::PrepareSuccess};
|
||||
use slotmap::SlotMap;
|
||||
use std::task::Poll;
|
||||
|
||||
/// Creates a new PVF which artifact id can be uniquely identified by the given number.
|
||||
fn pvf(discriminator: u32) -> PvfPrepData {
|
||||
PvfPrepData::from_discriminator(discriminator)
|
||||
}
|
||||
|
||||
async fn run_until<R>(
|
||||
task: &mut (impl Future<Output = ()> + Unpin),
|
||||
mut fut: (impl Future<Output = R> + Unpin),
|
||||
) -> R {
|
||||
let start = std::time::Instant::now();
|
||||
let fut = &mut fut;
|
||||
loop {
|
||||
if start.elapsed() > std::time::Duration::from_secs(1) {
|
||||
// We expect that this will take only a couple of iterations and thus to take way
|
||||
// less than a second.
|
||||
panic!("timeout");
|
||||
}
|
||||
|
||||
if let Poll::Ready(r) = futures::poll!(&mut *fut) {
|
||||
break r;
|
||||
}
|
||||
|
||||
if futures::poll!(&mut *task).is_ready() {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Test {
|
||||
_tempdir: tempfile::TempDir,
|
||||
run: BoxFuture<'static, ()>,
|
||||
workers: SlotMap<Worker, ()>,
|
||||
from_pool_tx: mpsc::UnboundedSender<pool::FromPool>,
|
||||
to_pool_rx: mpsc::Receiver<pool::ToPool>,
|
||||
to_queue_tx: mpsc::Sender<ToQueue>,
|
||||
from_queue_rx: mpsc::UnboundedReceiver<FromQueue>,
|
||||
}
|
||||
|
||||
impl Test {
|
||||
fn new(soft_capacity: usize, hard_capacity: usize) -> Self {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
|
||||
let workers: SlotMap<Worker, ()> = SlotMap::with_key();
|
||||
|
||||
let (to_queue_tx, from_queue_rx, run) = start(
|
||||
Metrics::default(),
|
||||
soft_capacity,
|
||||
hard_capacity,
|
||||
tempdir.path().to_owned().into(),
|
||||
to_pool_tx,
|
||||
from_pool_rx,
|
||||
);
|
||||
|
||||
Self {
|
||||
_tempdir: tempdir,
|
||||
run: run.boxed(),
|
||||
workers,
|
||||
from_pool_tx,
|
||||
to_pool_rx,
|
||||
to_queue_tx,
|
||||
from_queue_rx,
|
||||
}
|
||||
}
|
||||
|
||||
fn send_queue(&mut self, to_queue: ToQueue) {
|
||||
self.to_queue_tx.send(to_queue).now_or_never().unwrap().unwrap();
|
||||
}
|
||||
|
||||
async fn poll_and_recv_from_queue(&mut self) -> FromQueue {
|
||||
let from_queue_rx = &mut self.from_queue_rx;
|
||||
run_until(&mut self.run, async { from_queue_rx.next().await.unwrap() }.boxed()).await
|
||||
}
|
||||
|
||||
fn send_from_pool(&mut self, from_pool: pool::FromPool) {
|
||||
self.from_pool_tx.send(from_pool).now_or_never().unwrap().unwrap();
|
||||
}
|
||||
|
||||
async fn poll_and_recv_to_pool(&mut self) -> pool::ToPool {
|
||||
let to_pool_rx = &mut self.to_pool_rx;
|
||||
run_until(&mut self.run, async { to_pool_rx.next().await.unwrap() }.boxed()).await
|
||||
}
|
||||
|
||||
async fn poll_ensure_to_pool_is_empty(&mut self) {
|
||||
use futures_timer::Delay;
|
||||
|
||||
let to_pool_rx = &mut self.to_pool_rx;
|
||||
run_until(
|
||||
&mut self.run,
|
||||
async {
|
||||
futures::select! {
|
||||
_ = Delay::new(Duration::from_millis(500)).fuse() => (),
|
||||
_ = to_pool_rx.next().fuse() => {
|
||||
panic!("to pool supposed to be empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
.boxed(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn properly_concludes() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1) });
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w));
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w,
|
||||
rip: false,
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
test.poll_and_recv_from_queue().await.artifact_id,
|
||||
ArtifactId::from_pvf_prep_data(&pvf(1))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn dont_spawn_over_soft_limit_unless_critical() {
|
||||
let mut test = Test::new(2, 3);
|
||||
|
||||
let priority = Priority::Normal;
|
||||
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(1) });
|
||||
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(2) });
|
||||
// Start a non-precheck preparation for this one.
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority,
|
||||
pvf: PvfPrepData::from_discriminator_and_timeout(3, TEST_PREPARATION_TIMEOUT * 3),
|
||||
});
|
||||
|
||||
// Receive only two spawns.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
let w2 = test.workers.insert(());
|
||||
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
test.send_from_pool(pool::FromPool::Spawned(w2));
|
||||
|
||||
// Get two start works.
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: false,
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
// Enqueue a critical job.
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Critical,
|
||||
pvf: PvfPrepData::from_discriminator(4),
|
||||
});
|
||||
|
||||
// 2 out of 2 are working, but there is a critical job incoming. That means that spawning
|
||||
// another worker is warranted.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cull_unwanted() {
|
||||
let mut test = Test::new(1, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: PvfPrepData::from_discriminator(1),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
let w1 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
// Enqueue a critical job, which warrants spawning over the soft limit.
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Critical,
|
||||
pvf: PvfPrepData::from_discriminator(2),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
// However, before the new worker had a chance to spawn, the first worker finishes with its
|
||||
// job. The old worker will be killed while the new worker will be let live, even though
|
||||
// it's not instantiated.
|
||||
//
|
||||
// That's a bit silly in this context, but in production there will be an entire pool up
|
||||
// to the `soft_capacity` of workers and it doesn't matter which one to cull. Either way,
|
||||
// we just check that edge case of an edge case works.
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: false,
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn worker_mass_die_out_doesnt_stall_queue() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
let priority = Priority::Normal;
|
||||
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(1) });
|
||||
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(2) });
|
||||
// Start a non-precheck preparation for this one.
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority,
|
||||
pvf: PvfPrepData::from_discriminator_and_timeout(3, TEST_PREPARATION_TIMEOUT * 3),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
let w2 = test.workers.insert(());
|
||||
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
test.send_from_pool(pool::FromPool::Spawned(w2));
|
||||
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
// Conclude worker 1 and rip it.
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: true,
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
|
||||
// Since there is still work, the queue requested one extra worker to spawn to handle the
|
||||
// remaining enqueued work items.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
assert_eq!(
|
||||
test.poll_and_recv_from_queue().await.artifact_id,
|
||||
ArtifactId::from_pvf_prep_data(&pvf(1))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn doesnt_resurrect_ripped_worker_if_no_work() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: PvfPrepData::from_discriminator(1),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: true,
|
||||
result: Err(PrepareError::IoErr("test".into())),
|
||||
});
|
||||
test.poll_ensure_to_pool_is_empty().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rip_for_start_work() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: PvfPrepData::from_discriminator(1),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
|
||||
// Now, to the interesting part. After the queue normally issues the `start_work` command to
|
||||
// the pool, before receiving the command the queue may report that the worker ripped.
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
test.send_from_pool(pool::FromPool::Rip(w1));
|
||||
|
||||
// In this case, the pool should spawn a new worker and request it to work on the item.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w2 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w2));
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,376 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Host interface to the prepare worker.
|
||||
|
||||
use crate::{
|
||||
artifacts::generate_artifact_path,
|
||||
metrics::Metrics,
|
||||
worker_interface::{
|
||||
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
|
||||
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use codec::{Decode, Encode};
|
||||
use pezkuwi_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult, PrepareWorkerResult},
|
||||
prepare::{PrepareStats, PrepareSuccess, PrepareWorkerSuccess},
|
||||
pvf::PvfPrepData,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
|
||||
use sp_core::hexdisplay::HexDisplay;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{io, net::UnixStream};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// Sends a handshake message to the worker as soon as it is spawned.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
node_version: Option<&str>,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let mut extra_args = vec!["prepare-worker"];
|
||||
if let Some(node_version) = node_version {
|
||||
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
|
||||
}
|
||||
|
||||
spawn_with_program_path(
|
||||
"prepare",
|
||||
program_path,
|
||||
cache_path,
|
||||
&extra_args,
|
||||
spawn_timeout,
|
||||
security_status,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outcome of PVF preparation.
|
||||
///
|
||||
/// If the idle worker token is not returned, it means the worker must be terminated.
|
||||
pub enum Outcome {
|
||||
/// The worker has finished the work assigned to it.
|
||||
Concluded { worker: IdleWorker, result: PrepareResult },
|
||||
/// The host tried to reach the worker but failed. This is most likely because the worked was
|
||||
/// killed by the system.
|
||||
Unreachable,
|
||||
/// The temporary file for the artifact could not be created at the given cache path.
|
||||
CreateTmpFileErr { worker: IdleWorker, err: String },
|
||||
/// The response from the worker is received, but the tmp file cannot be renamed (moved) to the
|
||||
/// final destination location.
|
||||
RenameTmpFile {
|
||||
worker: IdleWorker,
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
src: Option<String>,
|
||||
dest: Option<String>,
|
||||
},
|
||||
/// The worker cache could not be cleared for the given reason.
|
||||
ClearWorkerDir { err: String },
|
||||
/// The worker failed to finish the job until the given deadline.
|
||||
///
|
||||
/// The worker is no longer usable and should be killed.
|
||||
TimedOut,
|
||||
/// An IO error occurred while receiving the result from the worker process.
|
||||
///
|
||||
/// This doesn't return an idle worker instance, thus this worker is no longer usable.
|
||||
IoErr(String),
|
||||
/// The worker ran out of memory and is aborting. The worker should be ripped.
|
||||
OutOfMemory,
|
||||
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
|
||||
///
|
||||
/// The worker might still be usable, but we kill it just in case.
|
||||
JobDied { err: String, job_pid: i32 },
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
///
|
||||
/// NOTE: Returning the `TimedOut`, `IoErr` or `Unreachable` outcomes will trigger the child process
|
||||
/// being killed.
|
||||
pub async fn start_work(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
cache_path: PathBuf,
|
||||
) -> Outcome {
|
||||
let IdleWorker { stream, pid, worker_dir } = worker;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"starting prepare for {:?}",
|
||||
pvf,
|
||||
);
|
||||
|
||||
with_worker_dir_setup(
|
||||
worker_dir,
|
||||
stream,
|
||||
pid,
|
||||
|tmp_artifact_file, mut stream, worker_dir| async move {
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
|
||||
if let Err(err) = send_request(&mut stream, &pvf).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to send a prepare request: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::Unreachable;
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines. In that case we should
|
||||
// propagate the error to the pool.
|
||||
//
|
||||
// We use a generous timeout here. This is in addition to the one in the child process,
|
||||
// in case the child stalls. We have a wall clock timeout here in the host, but a CPU
|
||||
// timeout in the child. We want to use CPU time because it varies less than wall clock
|
||||
// time under load, but the CPU resources of the child can only be measured from the
|
||||
// parent after the child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await;
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_worker_result)) =>
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid, worker_dir },
|
||||
prepare_worker_result,
|
||||
pid,
|
||||
tmp_artifact_file,
|
||||
&cache_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {}",
|
||||
err,
|
||||
);
|
||||
Outcome::IoErr(err.to_string())
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out here on the host.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"did not recv a prepare response within the time limit",
|
||||
);
|
||||
Outcome::TimedOut
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
///
|
||||
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
|
||||
/// by [`with_worker_dir_setup`].
|
||||
async fn handle_response(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
result: PrepareWorkerResult,
|
||||
worker_pid: u32,
|
||||
tmp_file: PathBuf,
|
||||
cache_path: &Path,
|
||||
preparation_timeout: Duration,
|
||||
) -> Outcome {
|
||||
// TODO: Add `checksum` to `ArtifactPathId`. See:
|
||||
// https://github.com/pezkuwichain/pezkuwi-sdk/issues/122
|
||||
let PrepareWorkerSuccess {
|
||||
checksum,
|
||||
stats: PrepareStats { cpu_time_elapsed, memory_stats, observed_wasm_code_len },
|
||||
} = match result.clone() {
|
||||
Ok(result) => result,
|
||||
// Timed out on the child. This should already be logged by the child.
|
||||
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
|
||||
Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid },
|
||||
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
|
||||
Err(err) => return Outcome::Concluded { worker, result: Err(err) },
|
||||
};
|
||||
|
||||
metrics.observe_code_size(observed_wasm_code_len as usize);
|
||||
|
||||
if cpu_time_elapsed > preparation_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"prepare job took {}ms cpu time, exceeded preparation timeout {}ms. Clearing WIP artifact {}",
|
||||
cpu_time_elapsed.as_millis(),
|
||||
preparation_timeout.as_millis(),
|
||||
tmp_file.display(),
|
||||
);
|
||||
return Outcome::TimedOut;
|
||||
}
|
||||
|
||||
let size = match tokio::fs::metadata(cache_path).await {
|
||||
Ok(metadata) => metadata.len(),
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?cache_path,
|
||||
"failed to read size of the artifact: {}",
|
||||
err,
|
||||
);
|
||||
return Outcome::IoErr(err.to_string());
|
||||
},
|
||||
};
|
||||
|
||||
// The file name should uniquely identify the artifact even across restarts. In case the cache
|
||||
// for some reason is not cleared correctly, we cannot
|
||||
// accidentally execute an artifact compiled under a different wasmtime version, host
|
||||
// environment, etc.
|
||||
let artifact_path = generate_artifact_path(cache_path);
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"promoting WIP artifact {} to {}",
|
||||
tmp_file.display(),
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
let outcome = match tokio::fs::rename(&tmp_file, &artifact_path).await {
|
||||
Ok(()) => Outcome::Concluded {
|
||||
worker,
|
||||
result: Ok(PrepareSuccess {
|
||||
checksum,
|
||||
path: artifact_path,
|
||||
size,
|
||||
stats: PrepareStats {
|
||||
cpu_time_elapsed,
|
||||
memory_stats: memory_stats.clone(),
|
||||
observed_wasm_code_len,
|
||||
},
|
||||
}),
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
"failed to rename the artifact from {} to {}: {:?}",
|
||||
tmp_file.display(),
|
||||
artifact_path.display(),
|
||||
err,
|
||||
);
|
||||
Outcome::RenameTmpFile {
|
||||
worker,
|
||||
err: format!("{:?}", err),
|
||||
src: tmp_file.to_str().map(String::from),
|
||||
dest: artifact_path.to_str().map(String::from),
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
// If there were no errors up until now, log the memory stats for a successful preparation, if
|
||||
// available.
|
||||
metrics.observe_preparation_memory_metrics(memory_stats);
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
|
||||
/// passing the file path in, and clean up the worker cache.
|
||||
///
|
||||
/// Failure to clean up the worker cache results in an error - leaving any files here could be a
|
||||
/// security issue, and we should shut down the worker. This should be very rare.
|
||||
async fn with_worker_dir_setup<F, Fut>(
|
||||
worker_dir: WorkerDir,
|
||||
stream: UnixStream,
|
||||
pid: u32,
|
||||
f: F,
|
||||
) -> Outcome
|
||||
where
|
||||
Fut: futures::Future<Output = Outcome>,
|
||||
F: FnOnce(PathBuf, UnixStream, WorkerDir) -> Fut,
|
||||
{
|
||||
// Create the tmp file here so that the child doesn't need any file creation rights. This will
|
||||
// be cleared at the end of this function.
|
||||
let tmp_file = worker_dir::prepare_tmp_artifact(worker_dir.path());
|
||||
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"failed to create a temp file for the artifact: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::CreateTmpFileErr {
|
||||
worker: IdleWorker { stream, pid, worker_dir },
|
||||
err: format!("{:?}", err),
|
||||
};
|
||||
};
|
||||
|
||||
let worker_dir_path = worker_dir.path().to_owned();
|
||||
let outcome = f(tmp_file, stream, worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
if let Err(err) = clear_worker_dir_path(&worker_dir_path) {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir_path,
|
||||
"failed to clear worker cache after the job: {:?}",
|
||||
err,
|
||||
);
|
||||
return Outcome::ClearWorkerDir { err: format!("{:?}", err) };
|
||||
}
|
||||
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_request(stream: &mut UnixStream, pvf: &PvfPrepData) -> io::Result<()> {
|
||||
framed_send(stream, &pvf.encode()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result<PrepareWorkerResult> {
|
||||
let result = framed_recv(stream).await?;
|
||||
let result = PrepareWorkerResult::decode(&mut &result[..]).map_err(|e| {
|
||||
// We received invalid bytes from the worker.
|
||||
let bound_bytes = &result[..result.len().min(4)];
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"received unexpected response from the prepare worker: {}",
|
||||
HexDisplay::from(&bound_bytes),
|
||||
);
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("prepare pvf recv_response: failed to decode result: {:?}", e),
|
||||
)
|
||||
})?;
|
||||
Ok(result)
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use pezkuwi_node_subsystem::messages::PvfExecKind;
|
||||
|
||||
/// A priority assigned to preparation of a PVF.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Priority {
|
||||
/// Normal priority for things that do not require immediate response, but still need to be
|
||||
/// done pretty quick.
|
||||
///
|
||||
/// Backing falls into this category.
|
||||
Normal,
|
||||
/// This priority is used for requests that are required to be processed as soon as possible.
|
||||
///
|
||||
/// Disputes and approvals are on a critical path and require execution as soon as
|
||||
/// possible to not delay finality.
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl Priority {
|
||||
/// Returns `true` if `self` is `Critical`
|
||||
pub fn is_critical(self) -> bool {
|
||||
self == Priority::Critical
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PvfExecKind> for Priority {
|
||||
fn from(priority: PvfExecKind) -> Self {
|
||||
match priority {
|
||||
PvfExecKind::Dispute => Priority::Critical,
|
||||
PvfExecKind::Approval => Priority::Critical,
|
||||
PvfExecKind::BackingSystemParas(_) => Priority::Normal,
|
||||
PvfExecKind::Backing(_) => Priority::Normal,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,379 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{Config, SecurityStatus, LOG_TARGET};
|
||||
use futures::join;
|
||||
use std::{fmt, path::Path};
|
||||
|
||||
/// Run checks for supported security features.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns the set of security features that we were able to enable. If an error occurs while
|
||||
/// enabling a security feature we set the corresponding status to `false`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error only if we could not fully enforce the security level required by the current
|
||||
/// configuration.
|
||||
pub async fn check_security_status(config: &Config) -> Result<SecurityStatus, String> {
|
||||
let Config { prepare_worker_program_path, secure_validator_mode, cache_path, .. } = config;
|
||||
|
||||
let (landlock, seccomp, change_root, secure_clone) = join!(
|
||||
check_landlock(prepare_worker_program_path),
|
||||
check_seccomp(prepare_worker_program_path),
|
||||
check_can_unshare_user_namespace_and_change_root(prepare_worker_program_path, cache_path),
|
||||
check_can_do_secure_clone(prepare_worker_program_path),
|
||||
);
|
||||
|
||||
let full_security_status = FullSecurityStatus::new(
|
||||
*secure_validator_mode,
|
||||
landlock,
|
||||
seccomp,
|
||||
change_root,
|
||||
secure_clone,
|
||||
);
|
||||
let security_status = full_security_status.as_partial();
|
||||
|
||||
if full_security_status.err_occurred() {
|
||||
print_secure_mode_error_or_warning(&full_security_status);
|
||||
if !full_security_status.all_errs_allowed() {
|
||||
return Err("could not enable Secure Validator Mode; check logs".into());
|
||||
}
|
||||
}
|
||||
|
||||
if security_status.secure_validator_mode {
|
||||
gum::info!(
|
||||
target: LOG_TARGET,
|
||||
"👮♀️ Running in Secure Validator Mode. \
|
||||
It is highly recommended that you operate according to our security guidelines. \
|
||||
\nMore information: https://wiki.network.pezkuwichain.io/docs/maintain-guides-secure-validator#secure-validator-mode"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(security_status)
|
||||
}
|
||||
|
||||
/// Contains the full security status including error states.
|
||||
struct FullSecurityStatus {
|
||||
partial: SecurityStatus,
|
||||
errs: Vec<SecureModeError>,
|
||||
}
|
||||
|
||||
impl FullSecurityStatus {
|
||||
fn new(
|
||||
secure_validator_mode: bool,
|
||||
landlock: SecureModeResult,
|
||||
seccomp: SecureModeResult,
|
||||
change_root: SecureModeResult,
|
||||
secure_clone: SecureModeResult,
|
||||
) -> Self {
|
||||
Self {
|
||||
partial: SecurityStatus {
|
||||
secure_validator_mode,
|
||||
can_enable_landlock: landlock.is_ok(),
|
||||
can_enable_seccomp: seccomp.is_ok(),
|
||||
can_unshare_user_namespace_and_change_root: change_root.is_ok(),
|
||||
can_do_secure_clone: secure_clone.is_ok(),
|
||||
},
|
||||
errs: [landlock, seccomp, change_root, secure_clone]
|
||||
.into_iter()
|
||||
.filter_map(|result| result.err())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn as_partial(&self) -> SecurityStatus {
|
||||
self.partial.clone()
|
||||
}
|
||||
|
||||
fn err_occurred(&self) -> bool {
|
||||
!self.errs.is_empty()
|
||||
}
|
||||
|
||||
fn all_errs_allowed(&self) -> bool {
|
||||
!self.partial.secure_validator_mode ||
|
||||
self.errs.iter().all(|err| err.is_allowed_in_secure_mode(&self.partial))
|
||||
}
|
||||
|
||||
fn errs_string(&self) -> String {
|
||||
self.errs
|
||||
.iter()
|
||||
.map(|err| {
|
||||
format!(
|
||||
"\n - {}{}",
|
||||
if err.is_allowed_in_secure_mode(&self.partial) { "Optional: " } else { "" },
|
||||
err
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
type SecureModeResult = std::result::Result<(), SecureModeError>;
|
||||
|
||||
/// Errors related to enabling Secure Validator Mode.
|
||||
#[derive(Debug)]
|
||||
enum SecureModeError {
|
||||
CannotEnableLandlock { err: String, abi: u8 },
|
||||
CannotEnableSeccomp(String),
|
||||
CannotUnshareUserNamespaceAndChangeRoot(String),
|
||||
CannotDoSecureClone(String),
|
||||
}
|
||||
|
||||
impl SecureModeError {
|
||||
/// Whether this error is allowed with Secure Validator Mode enabled.
|
||||
fn is_allowed_in_secure_mode(&self, security_status: &SecurityStatus) -> bool {
|
||||
use SecureModeError::*;
|
||||
match self {
|
||||
// Landlock is present on relatively recent Linuxes. This is optional if the unshare
|
||||
// capability is present, providing FS sandboxing a different way.
|
||||
CannotEnableLandlock { .. } =>
|
||||
security_status.can_unshare_user_namespace_and_change_root,
|
||||
// seccomp should be present on all modern Linuxes unless it's been disabled.
|
||||
CannotEnableSeccomp(_) => false,
|
||||
// Should always be present on modern Linuxes. If not, Landlock also provides FS
|
||||
// sandboxing, so don't enforce this.
|
||||
CannotUnshareUserNamespaceAndChangeRoot(_) => security_status.can_enable_landlock,
|
||||
// We have not determined the kernel requirements for this capability, and it's also not
|
||||
// necessary for FS or networking restrictions.
|
||||
CannotDoSecureClone(_) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SecureModeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use SecureModeError::*;
|
||||
match self {
|
||||
CannotEnableLandlock{err, abi} => write!(f, "Cannot enable landlock (ABI {abi}), a Linux 5.13+ kernel security feature: {err}"),
|
||||
CannotEnableSeccomp(err) => write!(f, "Cannot enable seccomp, a Linux-specific kernel security feature: {err}"),
|
||||
CannotUnshareUserNamespaceAndChangeRoot(err) => write!(f, "Cannot unshare user namespace and change root, which are Linux-specific kernel security features: {err}"),
|
||||
CannotDoSecureClone(err) => write!(f, "Cannot call clone with all sandboxing flags, a Linux-specific kernel security features: {err}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Print an error if Secure Validator Mode and some mandatory errors occurred, warn otherwise.
|
||||
fn print_secure_mode_error_or_warning(security_status: &FullSecurityStatus) {
|
||||
let all_errs_allowed = security_status.all_errs_allowed();
|
||||
let errs_string = security_status.errs_string();
|
||||
|
||||
if all_errs_allowed {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"{}{}",
|
||||
crate::SECURE_MODE_WARNING,
|
||||
errs_string,
|
||||
);
|
||||
} else {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
"{}{}{}",
|
||||
crate::SECURE_MODE_ERROR,
|
||||
errs_string,
|
||||
crate::IGNORE_SECURE_MODE_TIP
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if we can change root to a new, sandboxed root and return an error if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
async fn check_can_unshare_user_namespace_and_change_root(
|
||||
prepare_worker_program_path: &Path,
|
||||
cache_path: &Path,
|
||||
) -> SecureModeResult {
|
||||
let cache_dir_tempdir = tempfile::Builder::new()
|
||||
.prefix("check-can-unshare-")
|
||||
.tempdir_in(cache_path)
|
||||
.map_err(|err| {
|
||||
SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(format!(
|
||||
"could not create a temporary directory in {:?}: {}",
|
||||
cache_path, err
|
||||
))
|
||||
})?;
|
||||
spawn_process_for_security_check(
|
||||
prepare_worker_program_path,
|
||||
"--check-can-unshare-user-namespace-and-change-root",
|
||||
&[cache_dir_tempdir.path()],
|
||||
)
|
||||
.await
|
||||
.map_err(|err| SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(err))
|
||||
}
|
||||
|
||||
/// Check if landlock is supported and return an error if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
async fn check_landlock(prepare_worker_program_path: &Path) -> SecureModeResult {
|
||||
let abi = pezkuwi_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
|
||||
spawn_process_for_security_check(
|
||||
prepare_worker_program_path,
|
||||
"--check-can-enable-landlock",
|
||||
std::iter::empty::<&str>(),
|
||||
)
|
||||
.await
|
||||
.map_err(|err| SecureModeError::CannotEnableLandlock { err, abi })
|
||||
}
|
||||
|
||||
/// Check if seccomp is supported and return an error if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
async fn check_seccomp(prepare_worker_program_path: &Path) -> SecureModeResult {
|
||||
spawn_process_for_security_check(
|
||||
prepare_worker_program_path,
|
||||
"--check-can-enable-seccomp",
|
||||
std::iter::empty::<&str>(),
|
||||
)
|
||||
.await
|
||||
.map_err(|err| SecureModeError::CannotEnableSeccomp(err))
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
async fn check_seccomp(_: &Path) -> SecureModeResult {
|
||||
Err(SecureModeError::CannotEnableSeccomp(
|
||||
"only supported on CPUs from the x86_64 family (usually Intel or AMD)".into(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Check if we can call `clone` with all sandboxing flags, and return an error if not.
|
||||
///
|
||||
/// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
|
||||
/// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
|
||||
/// success and -1 on failure.
|
||||
async fn check_can_do_secure_clone(prepare_worker_program_path: &Path) -> SecureModeResult {
|
||||
spawn_process_for_security_check(
|
||||
prepare_worker_program_path,
|
||||
"--check-can-do-secure-clone",
|
||||
std::iter::empty::<&str>(),
|
||||
)
|
||||
.await
|
||||
.map_err(|err| SecureModeError::CannotDoSecureClone(err))
|
||||
}
|
||||
|
||||
async fn spawn_process_for_security_check<I, S>(
|
||||
prepare_worker_program_path: &Path,
|
||||
check_arg: &'static str,
|
||||
extra_args: I,
|
||||
) -> Result<(), String>
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: AsRef<std::ffi::OsStr>,
|
||||
{
|
||||
let mut command = tokio::process::Command::new(prepare_worker_program_path);
|
||||
// Clear env vars. (In theory, running checks with different env vars could result in different
|
||||
// outcomes of the checks.)
|
||||
command.env_clear();
|
||||
// Add back any env vars we want to keep.
|
||||
if let Ok(value) = std::env::var("RUST_LOG") {
|
||||
command.env("RUST_LOG", value);
|
||||
}
|
||||
|
||||
match command.arg(check_arg).args(extra_args).output().await {
|
||||
Ok(output) if output.status.success() => Ok(()),
|
||||
Ok(output) => {
|
||||
let stderr = std::str::from_utf8(&output.stderr)
|
||||
.expect("child process writes a UTF-8 string to stderr; qed")
|
||||
.trim();
|
||||
if stderr.is_empty() {
|
||||
Err("not available".into())
|
||||
} else {
|
||||
Err(format!("not available: {}", stderr))
|
||||
}
|
||||
},
|
||||
Err(err) => Err(format!("could not start child process: {}", err)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_secure_mode_error_optionality() {
|
||||
let err = SecureModeError::CannotEnableLandlock { err: String::new(), abi: 3 };
|
||||
assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: false,
|
||||
can_enable_seccomp: false,
|
||||
can_unshare_user_namespace_and_change_root: true,
|
||||
can_do_secure_clone: true,
|
||||
}));
|
||||
assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: false,
|
||||
can_enable_seccomp: true,
|
||||
can_unshare_user_namespace_and_change_root: false,
|
||||
can_do_secure_clone: false,
|
||||
}));
|
||||
|
||||
let err = SecureModeError::CannotEnableSeccomp(String::new());
|
||||
assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: false,
|
||||
can_enable_seccomp: false,
|
||||
can_unshare_user_namespace_and_change_root: true,
|
||||
can_do_secure_clone: true,
|
||||
}));
|
||||
assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: false,
|
||||
can_enable_seccomp: true,
|
||||
can_unshare_user_namespace_and_change_root: false,
|
||||
can_do_secure_clone: false,
|
||||
}));
|
||||
|
||||
let err = SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(String::new());
|
||||
assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: true,
|
||||
can_enable_seccomp: false,
|
||||
can_unshare_user_namespace_and_change_root: false,
|
||||
can_do_secure_clone: false,
|
||||
}));
|
||||
assert!(!err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: false,
|
||||
can_enable_seccomp: true,
|
||||
can_unshare_user_namespace_and_change_root: false,
|
||||
can_do_secure_clone: false,
|
||||
}));
|
||||
|
||||
let err = SecureModeError::CannotDoSecureClone(String::new());
|
||||
assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: true,
|
||||
can_enable_landlock: true,
|
||||
can_enable_seccomp: true,
|
||||
can_unshare_user_namespace_and_change_root: true,
|
||||
can_do_secure_clone: true,
|
||||
}));
|
||||
assert!(err.is_allowed_in_secure_mode(&SecurityStatus {
|
||||
secure_validator_mode: false,
|
||||
can_enable_landlock: false,
|
||||
can_enable_seccomp: false,
|
||||
can_unshare_user_namespace_and_change_root: false,
|
||||
can_do_secure_clone: false,
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Various utilities for testing.
|
||||
|
||||
pub use crate::{
|
||||
host::{EXECUTE_BINARY_NAME, PREPARE_BINARY_NAME},
|
||||
worker_interface::{spawn_with_program_path, SpawnErr},
|
||||
};
|
||||
|
||||
use crate::{artifacts::ArtifactId, get_worker_version};
|
||||
use is_executable::IsExecutable;
|
||||
use pezkuwi_node_core_pvf_common::pvf::PvfPrepData;
|
||||
use pezkuwi_node_primitives::NODE_VERSION;
|
||||
use pezkuwi_primitives::ExecutorParams;
|
||||
use std::{
|
||||
path::PathBuf,
|
||||
sync::{Mutex, OnceLock},
|
||||
};
|
||||
|
||||
/// A function that emulates the stitches together behaviors of the preparation and the execution
|
||||
/// worker in a single synchronous function.
|
||||
pub fn validate_candidate(
|
||||
code: &[u8],
|
||||
params: &[u8],
|
||||
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
|
||||
use pezkuwi_node_core_pvf_common::executor_interface::{prepare, prevalidate};
|
||||
use pezkuwi_node_core_pvf_execute_worker::execute_artifact;
|
||||
|
||||
let code = sp_maybe_compressed_blob::decompress(code, 10 * 1024 * 1024)
|
||||
.expect("Decompressing code failed");
|
||||
|
||||
let blob = prevalidate(&code)?;
|
||||
let executor_params = ExecutorParams::default();
|
||||
let compiled_artifact_blob = prepare(blob, &executor_params)?;
|
||||
|
||||
let result = unsafe {
|
||||
// SAFETY: This is trivially safe since the artifact is obtained by calling `prepare`
|
||||
// and is written into a temporary directory in an unmodified state.
|
||||
execute_artifact(&compiled_artifact_blob, &executor_params, params)?
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Retrieves the worker paths and builds workers as needed.
|
||||
///
|
||||
/// NOTE: This should only be called in dev code (tests, benchmarks) as it relies on the relative
|
||||
/// paths of the built workers.
|
||||
pub fn build_workers_and_get_paths() -> (PathBuf, PathBuf) {
|
||||
// Only needs to be called once for the current process.
|
||||
static WORKER_PATHS: OnceLock<Mutex<(PathBuf, PathBuf)>> = OnceLock::new();
|
||||
|
||||
fn build_workers() {
|
||||
let mut build_args = vec![
|
||||
"build",
|
||||
"--package=pezkuwi",
|
||||
"--bin=pezkuwi-prepare-worker",
|
||||
"--bin=pezkuwi-execute-worker",
|
||||
];
|
||||
|
||||
if cfg!(build_profile = "release") {
|
||||
build_args.push("--release");
|
||||
}
|
||||
|
||||
let mut cargo = std::process::Command::new("cargo");
|
||||
let cmd = cargo
|
||||
// wasm runtime not needed
|
||||
.env("SKIP_WASM_BUILD", "1")
|
||||
.args(build_args)
|
||||
.stdout(std::process::Stdio::piped());
|
||||
|
||||
println!("INFO: calling `{cmd:?}`");
|
||||
let exit_status = cmd.status().expect("Failed to run the build program");
|
||||
|
||||
if !exit_status.success() {
|
||||
eprintln!("ERROR: Failed to build workers: {}", exit_status.code().unwrap());
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
let mutex = WORKER_PATHS.get_or_init(|| {
|
||||
let mut workers_path = std::env::current_exe().unwrap();
|
||||
workers_path.pop();
|
||||
workers_path.pop();
|
||||
let mut prepare_worker_path = workers_path.clone();
|
||||
prepare_worker_path.push(PREPARE_BINARY_NAME);
|
||||
let mut execute_worker_path = workers_path.clone();
|
||||
execute_worker_path.push(EXECUTE_BINARY_NAME);
|
||||
|
||||
// explain why a build happens
|
||||
if !prepare_worker_path.is_executable() {
|
||||
println!("WARN: Prepare worker does not exist or is not executable. Workers directory: {:?}", workers_path);
|
||||
}
|
||||
if !execute_worker_path.is_executable() {
|
||||
println!("WARN: Execute worker does not exist or is not executable. Workers directory: {:?}", workers_path);
|
||||
}
|
||||
if let Ok(ver) = get_worker_version(&prepare_worker_path) {
|
||||
if ver != NODE_VERSION {
|
||||
println!("WARN: Prepare worker version {ver} does not match node version {NODE_VERSION}; worker path: {prepare_worker_path:?}");
|
||||
}
|
||||
}
|
||||
if let Ok(ver) = get_worker_version(&execute_worker_path) {
|
||||
if ver != NODE_VERSION {
|
||||
println!("WARN: Execute worker version {ver} does not match node version {NODE_VERSION}; worker path: {execute_worker_path:?}");
|
||||
}
|
||||
}
|
||||
|
||||
build_workers();
|
||||
|
||||
Mutex::new((prepare_worker_path, execute_worker_path))
|
||||
});
|
||||
|
||||
let guard = mutex.lock().unwrap();
|
||||
(guard.0.clone(), guard.1.clone())
|
||||
}
|
||||
|
||||
/// Creates a new PVF which artifact id can be uniquely identified by the given number.
|
||||
pub fn artifact_id(discriminator: u32) -> ArtifactId {
|
||||
ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(discriminator))
|
||||
}
|
||||
@@ -0,0 +1,431 @@
|
||||
// Copyright (C) Parity Technologies (UK) Ltd.
|
||||
// This file is part of Pezkuwi.
|
||||
|
||||
// Pezkuwi is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Pezkuwi is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Pezkuwi. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Common logic for implementation of worker processes.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use codec::Encode;
|
||||
use futures::FutureExt as _;
|
||||
use futures_timer::Delay;
|
||||
use pezkuwi_node_core_pvf_common::{SecurityStatus, WorkerHandshake};
|
||||
use pin_project::pin_project;
|
||||
use rand::Rng;
|
||||
use std::{
|
||||
fmt, mem,
|
||||
path::{Path, PathBuf},
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::{
|
||||
io::{self, AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _, ReadBuf},
|
||||
net::{UnixListener, UnixStream},
|
||||
process,
|
||||
};
|
||||
|
||||
/// A multiple of the job timeout (in CPU time) for which we are willing to wait on the host (in
|
||||
/// wall clock time). This is lenient because CPU time may go slower than wall clock time.
|
||||
pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4;
|
||||
|
||||
/// This is publicly exposed only for integration tests.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - `debug_id`: An identifier for the process (e.g. "execute" or "prepare").
|
||||
///
|
||||
/// - `program_path`: The path to the program.
|
||||
///
|
||||
/// - `cache_path`: The path to the artifact cache.
|
||||
///
|
||||
/// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data
|
||||
/// required before the handshake, like node/worker versions for the version check. Other data
|
||||
/// should go through the handshake.
|
||||
///
|
||||
/// - `spawn_timeout`: The amount of time to wait for the child process to spawn.
|
||||
///
|
||||
/// - `security_status`: contains the detected status of security features.
|
||||
#[doc(hidden)]
|
||||
pub async fn spawn_with_program_path(
|
||||
debug_id: &'static str,
|
||||
program_path: impl Into<PathBuf>,
|
||||
cache_path: &Path,
|
||||
extra_args: &[&str],
|
||||
spawn_timeout: Duration,
|
||||
security_status: SecurityStatus,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let program_path = program_path.into();
|
||||
let worker_dir = WorkerDir::new(debug_id, cache_path).await?;
|
||||
let extra_args: Vec<String> = extra_args.iter().map(|arg| arg.to_string()).collect();
|
||||
// Hack the borrow-checker.
|
||||
let program_path_clone = program_path.clone();
|
||||
let worker_dir_clone = worker_dir.path().to_owned();
|
||||
let extra_args_clone = extra_args.clone();
|
||||
|
||||
with_transient_socket_path(debug_id, |socket_path| {
|
||||
let socket_path = socket_path.to_owned();
|
||||
|
||||
async move {
|
||||
let listener = match UnixListener::bind(&socket_path) {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => return Err(SpawnErr::Bind { socket_path, err: err.to_string() }),
|
||||
};
|
||||
|
||||
let handle =
|
||||
WorkerHandle::spawn(&program_path, &extra_args, &socket_path, &worker_dir.path())
|
||||
.map_err(|err| SpawnErr::ProcessSpawn { program_path, err: err.to_string() })?;
|
||||
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (mut stream, _) = accept_result
|
||||
.map_err(|err| SpawnErr::Accept { socket_path, err: err.to_string() })?;
|
||||
send_worker_handshake(&mut stream, WorkerHandshake { security_status })
|
||||
.await
|
||||
.map_err(|err| SpawnErr::Handshake { err: err.to_string() })?;
|
||||
Ok((IdleWorker { stream, pid: handle.id(), worker_dir }, handle))
|
||||
}
|
||||
_ = Delay::new(spawn_timeout).fuse() => Err(SpawnErr::AcceptTimeout{spawn_timeout}),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
%debug_id,
|
||||
program_path = ?program_path_clone,
|
||||
extra_args = ?extra_args_clone,
|
||||
worker_dir = ?worker_dir_clone,
|
||||
"error spawning worker: {}",
|
||||
err,
|
||||
);
|
||||
err
|
||||
})
|
||||
}
|
||||
|
||||
/// A temporary, random, free path that is necessary only to establish socket communications. If a
|
||||
/// directory exists at the path at the end of this function, it is removed then.
|
||||
async fn with_transient_socket_path<T, F, Fut>(debug_id: &'static str, f: F) -> Result<T, SpawnErr>
|
||||
where
|
||||
F: FnOnce(&Path) -> Fut,
|
||||
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
|
||||
{
|
||||
/// Returns a path under [`std::env::temp_dir`]. The path name will start with the given prefix.
|
||||
///
|
||||
/// There is only a certain number of retries. If exceeded this function will give up and return
|
||||
/// an error.
|
||||
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
|
||||
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
use rand::distributions::Alphanumeric;
|
||||
|
||||
const DISCRIMINATOR_LEN: usize = 10;
|
||||
|
||||
let mut buf = Vec::with_capacity(prefix.len() + DISCRIMINATOR_LEN);
|
||||
buf.extend(prefix.as_bytes());
|
||||
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DISCRIMINATOR_LEN));
|
||||
|
||||
let s = std::str::from_utf8(&buf)
|
||||
.expect("the string is collected from a valid utf-8 sequence; qed");
|
||||
|
||||
let mut path = dir.to_owned();
|
||||
path.push(s);
|
||||
path
|
||||
}
|
||||
|
||||
const NUM_RETRIES: usize = 50;
|
||||
|
||||
let dir = std::env::temp_dir();
|
||||
for _ in 0..NUM_RETRIES {
|
||||
let tmp_path = make_tmppath(prefix, &dir);
|
||||
if !tmp_path.exists() {
|
||||
return Ok(tmp_path);
|
||||
}
|
||||
}
|
||||
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
|
||||
}
|
||||
|
||||
let socket_path = tmppath(&format!("pvf-host-{}-", debug_id))
|
||||
.await
|
||||
.map_err(|_| SpawnErr::TmpPath)?;
|
||||
let result = f(&socket_path).await;
|
||||
|
||||
// Best effort to remove the socket file. Under normal circumstances the socket will be removed
|
||||
// by the worker. We make sure that it is removed here, just in case a failed rendezvous.
|
||||
let _ = tokio::fs::remove_file(socket_path).await;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// A struct that represents an idle worker.
|
||||
///
|
||||
/// This struct is supposed to be used as a token that is passed by move into a subroutine that
|
||||
/// initiates a job. If the worker dies on the duty, then the token is not returned.
|
||||
#[derive(Debug)]
|
||||
pub struct IdleWorker {
|
||||
/// The stream to which the child process is connected.
|
||||
pub stream: UnixStream,
|
||||
|
||||
/// The identifier of this process. Used to reset the niceness.
|
||||
pub pid: u32,
|
||||
|
||||
/// The temporary per-worker path. We clean up the worker dir between jobs and delete it when
|
||||
/// the worker dies.
|
||||
pub worker_dir: WorkerDir,
|
||||
}
|
||||
|
||||
/// This is publicly exposed only for integration tests.
|
||||
///
|
||||
/// An error happened during spawning a worker process.
|
||||
#[derive(thiserror::Error, Clone, Debug)]
|
||||
#[doc(hidden)]
|
||||
pub enum SpawnErr {
|
||||
#[error("cannot obtain a temporary path location")]
|
||||
TmpPath,
|
||||
#[error("cannot bind the socket to the given path {socket_path:?}: {err}")]
|
||||
Bind { socket_path: PathBuf, err: String },
|
||||
#[error(
|
||||
"an error happened during accepting a connection to the socket {socket_path:?}: {err}"
|
||||
)]
|
||||
Accept { socket_path: PathBuf, err: String },
|
||||
#[error("an error happened during spawning the process at path {program_path:?}: {err}")]
|
||||
ProcessSpawn { program_path: PathBuf, err: String },
|
||||
#[error("the deadline {}ms allotted for the worker spawning and connecting to the socket has elapsed", .spawn_timeout.as_millis())]
|
||||
AcceptTimeout { spawn_timeout: Duration },
|
||||
#[error("failed to send handshake after successful spawning was signaled: {err}")]
|
||||
Handshake { err: String },
|
||||
}
|
||||
|
||||
/// This is a representation of a potentially running worker. Drop it and the process will be
|
||||
/// killed.
|
||||
///
|
||||
/// A worker's handle is also a future that resolves when it's detected that the worker's process
|
||||
/// has been terminated. Since the worker is running in another process it is obviously not
|
||||
/// necessary to poll this future to make the worker run, it's only for termination detection.
|
||||
///
|
||||
/// This future relies on the fact that a child process's stdout `fd` is closed upon its
|
||||
/// termination.
|
||||
#[pin_project]
|
||||
pub struct WorkerHandle {
|
||||
child: process::Child,
|
||||
child_id: u32,
|
||||
#[pin]
|
||||
stdout: process::ChildStdout,
|
||||
program: PathBuf,
|
||||
drop_box: Box<[u8]>,
|
||||
}
|
||||
|
||||
impl WorkerHandle {
|
||||
fn spawn(
|
||||
program: impl AsRef<Path>,
|
||||
extra_args: &[String],
|
||||
socket_path: impl AsRef<Path>,
|
||||
worker_dir_path: impl AsRef<Path>,
|
||||
) -> io::Result<Self> {
|
||||
// Clear all env vars from the spawned process.
|
||||
let mut command = process::Command::new(program.as_ref());
|
||||
command.env_clear();
|
||||
|
||||
command.env("RUST_LOG", sc_tracing::logging::get_directives().join(","));
|
||||
|
||||
let mut child = command
|
||||
.args(extra_args)
|
||||
.arg("--socket-path")
|
||||
.arg(socket_path.as_ref().as_os_str())
|
||||
.arg("--worker-dir-path")
|
||||
.arg(worker_dir_path.as_ref().as_os_str())
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
|
||||
let child_id = child
|
||||
.id()
|
||||
.ok_or(io::Error::new(io::ErrorKind::Other, "could not get id of spawned process"))?;
|
||||
let stdout = child
|
||||
.stdout
|
||||
.take()
|
||||
.expect("the process spawned with piped stdout should have the stdout handle");
|
||||
|
||||
Ok(WorkerHandle {
|
||||
child,
|
||||
child_id,
|
||||
stdout,
|
||||
program: program.as_ref().to_path_buf(),
|
||||
// We don't expect the bytes to be ever read. But in case we do, we should not use a
|
||||
// buffer of a small size, because otherwise if the child process does return any data
|
||||
// we will end up issuing a syscall for each byte. We also prefer not to do allocate
|
||||
// that on the stack, since each poll the buffer will be allocated and initialized (and
|
||||
// that's due `poll_read` takes &mut [u8] and there are no guarantees that a `poll_read`
|
||||
// won't ever read from there even though that's unlikely).
|
||||
//
|
||||
// OTOH, we also don't want to be super smart here and we could just afford to allocate
|
||||
// a buffer for that here.
|
||||
drop_box: vec![0; 8192].into_boxed_slice(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the process id of this worker.
|
||||
pub fn id(&self) -> u32 {
|
||||
self.child_id
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Future for WorkerHandle {
|
||||
type Output = ();
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let me = self.project();
|
||||
// Create a `ReadBuf` here instead of storing it in `WorkerHandle` to avoid a lifetime
|
||||
// parameter on `WorkerHandle`. Creating the `ReadBuf` is fairly cheap.
|
||||
let mut read_buf = ReadBuf::new(&mut *me.drop_box);
|
||||
match futures::ready!(AsyncRead::poll_read(me.stdout, cx, &mut read_buf)) {
|
||||
Ok(()) => {
|
||||
if read_buf.filled().len() > 0 {
|
||||
// weird, we've read something. Pretend that never happened and reschedule
|
||||
// ourselves.
|
||||
cx.waker().wake_by_ref();
|
||||
Poll::Pending
|
||||
} else {
|
||||
// Nothing read means `EOF` means the child was terminated. Resolve.
|
||||
Poll::Ready(())
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
// The implementation is guaranteed to not to return `WouldBlock` and Interrupted.
|
||||
// This leaves us with legit errors which we suppose were due to termination.
|
||||
|
||||
// Log the status code.
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %me.child_id,
|
||||
status_code = ?me.child.try_wait().ok().flatten().map(|c| c.to_string()),
|
||||
"pvf worker ({}): {:?}",
|
||||
me.program.display(),
|
||||
err,
|
||||
);
|
||||
Poll::Ready(())
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for WorkerHandle {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WorkerHandle(pid={})", self.id())
|
||||
}
|
||||
}
|
||||
|
||||
/// Write some data prefixed by its length into `w`.
|
||||
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
|
||||
let len_buf = buf.len().to_le_bytes();
|
||||
w.write_all(&len_buf).await?;
|
||||
w.write_all(buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read some data prefixed by its length from `r`.
|
||||
pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
|
||||
let mut len_buf = [0u8; mem::size_of::<usize>()];
|
||||
r.read_exact(&mut len_buf).await?;
|
||||
let len = usize::from_le_bytes(len_buf);
|
||||
let mut buf = vec![0; len];
|
||||
r.read_exact(&mut buf).await?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Sends a handshake with information for the worker.
|
||||
async fn send_worker_handshake(
|
||||
stream: &mut UnixStream,
|
||||
handshake: WorkerHandshake,
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, &handshake.encode()).await
|
||||
}
|
||||
|
||||
/// A temporary worker dir that contains only files needed by the worker. The worker will change its
|
||||
/// root (the `/` directory) to this directory; it should have access to no other paths on its
|
||||
/// filesystem.
|
||||
///
|
||||
/// NOTE: This struct cleans up its associated directory when it is dropped. Therefore it should not
|
||||
/// implement `Clone`.
|
||||
///
|
||||
/// # File structure
|
||||
///
|
||||
/// The overall file structure for the PVF system is as follows. The `worker-dir-X`s are managed by
|
||||
/// this struct.
|
||||
///
|
||||
/// ```nocompile
|
||||
/// + /<cache_path>/
|
||||
/// - artifact-1
|
||||
/// - artifact-2
|
||||
/// - [...]
|
||||
/// - worker-dir-1/ (new `/` for worker-1)
|
||||
/// + socket (created by host)
|
||||
/// + tmp-artifact (created by host) (prepare-only)
|
||||
/// + artifact (link -> artifact-1) (created by host) (execute-only)
|
||||
/// - worker-dir-2/ (new `/` for worker-2)
|
||||
/// + [...]
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct WorkerDir {
|
||||
tempdir: tempfile::TempDir,
|
||||
}
|
||||
|
||||
pub const WORKER_DIR_PREFIX: &str = "worker-dir";
|
||||
|
||||
impl WorkerDir {
|
||||
/// Creates a new, empty worker dir with a random name in the given cache dir.
|
||||
pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
|
||||
let prefix = format!("{WORKER_DIR_PREFIX}-{debug_id}-");
|
||||
let tempdir = tempfile::Builder::new()
|
||||
.prefix(&prefix)
|
||||
.tempdir_in(cache_dir)
|
||||
.map_err(|_| SpawnErr::TmpPath)?;
|
||||
Ok(Self { tempdir })
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
self.tempdir.path()
|
||||
}
|
||||
}
|
||||
|
||||
// Not async since Rust has trouble with async recursion. There should be few files here anyway.
|
||||
//
|
||||
/// Clear the temporary worker dir without deleting it. Not deleting is important because the worker
|
||||
/// has mounted its own separate filesystem here.
|
||||
///
|
||||
/// Should be called right after a job has finished. We don't want jobs to have access to
|
||||
/// artifacts from previous jobs.
|
||||
pub fn clear_worker_dir_path(worker_dir_path: &Path) -> io::Result<()> {
|
||||
fn remove_dir_contents(path: &Path) -> io::Result<()> {
|
||||
for entry in std::fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if entry.file_type()?.is_dir() {
|
||||
remove_dir_contents(&path)?;
|
||||
std::fs::remove_dir(path)?;
|
||||
} else {
|
||||
std::fs::remove_file(path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Note the worker dir may not exist anymore because of the worker dying and being cleaned up.
|
||||
match remove_dir_contents(worker_dir_path) {
|
||||
Err(err) if matches!(err.kind(), io::ErrorKind::NotFound) => Ok(()),
|
||||
result => result,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user