mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-18 03:41:02 +00:00
Preserve artifact cache unless stale (#1918)
Co-authored-by: Marcin S <marcin@realemail.net>
This commit is contained in:
@@ -16,10 +16,10 @@
|
||||
|
||||
//! PVF artifacts (final compiled code blobs).
|
||||
//!
|
||||
//! # Lifecycle of an artifact
|
||||
//! # Lifecycle of an artifact
|
||||
//!
|
||||
//! 1. During node start-up, the artifacts cache is cleaned up. This means that all local artifacts
|
||||
//! stored on-disk are cleared, and we start with an empty [`Artifacts`] table.
|
||||
//! 1. During node start-up, we will check the cached artifacts, if any. The stale and corrupted
|
||||
//! ones are pruned. The valid ones are registered in the [`Artifacts`] table.
|
||||
//!
|
||||
//! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should
|
||||
//! have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the
|
||||
@@ -55,18 +55,29 @@
|
||||
//! older by a predefined parameter. This process is run very rarely (say, once a day). Once the
|
||||
//! artifact is expired it is removed from disk eagerly atomically.
|
||||
|
||||
use crate::host::PrepareResultSender;
|
||||
use crate::{host::PrecheckResultSender, LOG_TARGET};
|
||||
use always_assert::always;
|
||||
use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData};
|
||||
use polkadot_core_primitives::Hash;
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData, RUNTIME_VERSION,
|
||||
};
|
||||
use polkadot_node_primitives::NODE_VERSION;
|
||||
use polkadot_parachain_primitives::primitives::ValidationCodeHash;
|
||||
use polkadot_primitives::ExecutorParamsHash;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr as _,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
const RUNTIME_PREFIX: &str = "wasmtime_v";
|
||||
const NODE_PREFIX: &str = "polkadot_v";
|
||||
|
||||
fn artifact_prefix() -> String {
|
||||
format!("{}{}_{}{}", RUNTIME_PREFIX, RUNTIME_VERSION, NODE_PREFIX, NODE_VERSION)
|
||||
}
|
||||
|
||||
/// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ArtifactId {
|
||||
@@ -75,9 +86,6 @@ pub struct ArtifactId {
|
||||
}
|
||||
|
||||
impl ArtifactId {
|
||||
const PREFIX: &'static str = "wasmtime_";
|
||||
const NODE_VERSION_PREFIX: &'static str = "polkadot_v";
|
||||
|
||||
/// Creates a new artifact ID with the given hash.
|
||||
pub fn new(code_hash: ValidationCodeHash, executor_params_hash: ExecutorParamsHash) -> Self {
|
||||
Self { code_hash, executor_params_hash }
|
||||
@@ -88,38 +96,34 @@ impl ArtifactId {
|
||||
Self::new(pvf.code_hash(), pvf.executor_params().hash())
|
||||
}
|
||||
|
||||
/// Tries to recover the artifact id from the given file name.
|
||||
#[cfg(test)]
|
||||
pub fn from_file_name(file_name: &str) -> Option<Self> {
|
||||
use polkadot_core_primitives::Hash;
|
||||
use std::str::FromStr as _;
|
||||
|
||||
let file_name =
|
||||
file_name.strip_prefix(Self::PREFIX)?.strip_prefix(Self::NODE_VERSION_PREFIX)?;
|
||||
|
||||
// [ node version | code hash | param hash ]
|
||||
let parts: Vec<&str> = file_name.split('_').collect();
|
||||
let (_node_ver, code_hash_str, executor_params_hash_str) = (parts[0], parts[1], parts[2]);
|
||||
|
||||
let code_hash = Hash::from_str(code_hash_str).ok()?.into();
|
||||
let executor_params_hash =
|
||||
ExecutorParamsHash::from_hash(Hash::from_str(executor_params_hash_str).ok()?);
|
||||
|
||||
Some(Self { code_hash, executor_params_hash })
|
||||
}
|
||||
|
||||
/// Returns the expected path to this artifact given the root of the cache.
|
||||
pub fn path(&self, cache_path: &Path) -> PathBuf {
|
||||
/// Returns the canonical path to the concluded artifact.
|
||||
pub(crate) fn path(&self, cache_path: &Path, checksum: &str) -> PathBuf {
|
||||
let file_name = format!(
|
||||
"{}{}{}_{:#x}_{:#x}",
|
||||
Self::PREFIX,
|
||||
Self::NODE_VERSION_PREFIX,
|
||||
NODE_VERSION,
|
||||
"{}_{:#x}_{:#x}_0x{}",
|
||||
artifact_prefix(),
|
||||
self.code_hash,
|
||||
self.executor_params_hash
|
||||
self.executor_params_hash,
|
||||
checksum
|
||||
);
|
||||
cache_path.join(file_name)
|
||||
}
|
||||
|
||||
/// Tries to recover the artifact id from the given file name.
|
||||
/// Return `None` if the given file name is invalid.
|
||||
/// VALID_NAME := <PREFIX> _ <CODE_HASH> _ <PARAM_HASH> _ <CHECKSUM>
|
||||
fn from_file_name(file_name: &str) -> Option<Self> {
|
||||
let file_name = file_name.strip_prefix(&artifact_prefix())?.strip_prefix('_')?;
|
||||
let parts: Vec<&str> = file_name.split('_').collect();
|
||||
|
||||
if let [code_hash, param_hash, _checksum] = parts[..] {
|
||||
let code_hash = Hash::from_str(code_hash).ok()?.into();
|
||||
let executor_params_hash =
|
||||
ExecutorParamsHash::from_hash(Hash::from_str(param_hash).ok()?);
|
||||
return Some(Self { code_hash, executor_params_hash })
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// A bundle of the artifact ID and the path.
|
||||
@@ -136,8 +140,8 @@ pub struct ArtifactPathId {
|
||||
}
|
||||
|
||||
impl ArtifactPathId {
|
||||
pub(crate) fn new(artifact_id: ArtifactId, cache_path: &Path) -> Self {
|
||||
Self { path: artifact_id.path(cache_path), id: artifact_id }
|
||||
pub(crate) fn new(artifact_id: ArtifactId, path: &Path) -> Self {
|
||||
Self { id: artifact_id, path: path.to_owned() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,6 +152,8 @@ pub enum ArtifactState {
|
||||
/// That means that the artifact should be accessible through the path obtained by the artifact
|
||||
/// id (unless, it was removed externally).
|
||||
Prepared {
|
||||
/// The path of the compiled artifact.
|
||||
path: PathBuf,
|
||||
/// The time when the artifact was last needed.
|
||||
///
|
||||
/// This is updated when we get the heads up for this artifact or when we just discover
|
||||
@@ -159,7 +165,7 @@ pub enum ArtifactState {
|
||||
/// A task to prepare this artifact is scheduled.
|
||||
Preparing {
|
||||
/// List of result senders that are waiting for a response.
|
||||
waiting_for_response: Vec<PrepareResultSender>,
|
||||
waiting_for_response: Vec<PrecheckResultSender>,
|
||||
/// The number of times this artifact has failed to prepare.
|
||||
num_failures: u32,
|
||||
},
|
||||
@@ -177,32 +183,148 @@ pub enum ArtifactState {
|
||||
|
||||
/// A container of all known artifact ids and their states.
|
||||
pub struct Artifacts {
|
||||
artifacts: HashMap<ArtifactId, ArtifactState>,
|
||||
inner: HashMap<ArtifactId, ArtifactState>,
|
||||
}
|
||||
|
||||
impl Artifacts {
|
||||
/// Initialize a blank cache at the given path. This will clear everything present at the
|
||||
/// given path, to be populated over time.
|
||||
///
|
||||
/// The recognized artifacts will be filled in the table and unrecognized will be removed.
|
||||
pub async fn new(cache_path: &Path) -> Self {
|
||||
// First delete the entire cache. This includes artifacts and any leftover worker dirs (see
|
||||
// [`WorkerDir`]). Nodes are long-running so this should populate shortly.
|
||||
let _ = tokio::fs::remove_dir_all(cache_path).await;
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
let _ = tokio::fs::create_dir_all(cache_path).await;
|
||||
|
||||
Self { artifacts: HashMap::new() }
|
||||
#[cfg(test)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self { inner: HashMap::new() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self { artifacts: HashMap::new() }
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
/// Create an empty table and populate it with valid artifacts as [`ArtifactState::Prepared`],
|
||||
/// if any. The existing caches will be checked by their file name to determine whether they are
|
||||
/// valid, e.g., matching the current node version. The ones deemed invalid will be pruned.
|
||||
pub async fn new_and_prune(cache_path: &Path) -> Self {
|
||||
let mut artifacts = Self { inner: HashMap::new() };
|
||||
artifacts.insert_and_prune(cache_path).await;
|
||||
artifacts
|
||||
}
|
||||
|
||||
async fn insert_and_prune(&mut self, cache_path: &Path) {
|
||||
async fn is_corrupted(path: &Path) -> bool {
|
||||
let checksum = match tokio::fs::read(path).await {
|
||||
Ok(bytes) => blake3::hash(&bytes),
|
||||
Err(err) => {
|
||||
// just remove the file if we cannot read it
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"unable to read artifact {:?} when checking integrity, removing...",
|
||||
path,
|
||||
);
|
||||
return true
|
||||
},
|
||||
};
|
||||
|
||||
if let Some(file_name) = path.file_name() {
|
||||
if let Some(file_name) = file_name.to_str() {
|
||||
return !file_name.ends_with(checksum.to_hex().as_str())
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
// Insert the entry into the artifacts table if it is valid.
|
||||
// Otherwise, prune it.
|
||||
async fn insert_or_prune(
|
||||
artifacts: &mut Artifacts,
|
||||
entry: &tokio::fs::DirEntry,
|
||||
cache_path: &Path,
|
||||
) {
|
||||
let file_type = entry.file_type().await;
|
||||
let file_name = entry.file_name();
|
||||
|
||||
match file_type {
|
||||
Ok(file_type) =>
|
||||
if !file_type.is_file() {
|
||||
return
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"unable to get file type for {:?}",
|
||||
file_name,
|
||||
);
|
||||
return
|
||||
},
|
||||
}
|
||||
|
||||
if let Some(file_name) = file_name.to_str() {
|
||||
let id = ArtifactId::from_file_name(file_name);
|
||||
let path = cache_path.join(file_name);
|
||||
|
||||
if id.is_none() || is_corrupted(&path).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"discarding invalid artifact {:?}",
|
||||
&path,
|
||||
);
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
return
|
||||
}
|
||||
|
||||
if let Some(id) = id {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
"reusing existing {:?} for node version v{}",
|
||||
&path,
|
||||
NODE_VERSION,
|
||||
);
|
||||
artifacts.insert_prepared(id, path, SystemTime::now(), Default::default());
|
||||
}
|
||||
} else {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
"non-Unicode file name {:?} found in {:?}",
|
||||
file_name,
|
||||
cache_path,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that the cache path directory and all its parents are created.
|
||||
let _ = tokio::fs::create_dir_all(cache_path).await;
|
||||
|
||||
let mut dir = match tokio::fs::read_dir(cache_path).await {
|
||||
Ok(dir) => dir,
|
||||
Err(err) => {
|
||||
gum::error!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"failed to read dir {:?}",
|
||||
cache_path,
|
||||
);
|
||||
return
|
||||
},
|
||||
};
|
||||
|
||||
loop {
|
||||
match dir.next_entry().await {
|
||||
Ok(Some(entry)) => insert_or_prune(self, &entry, cache_path).await,
|
||||
Ok(None) => break,
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
?err,
|
||||
"error processing artifacts in {:?}",
|
||||
cache_path,
|
||||
);
|
||||
break
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the state of the given artifact by its ID.
|
||||
pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> {
|
||||
self.artifacts.get_mut(artifact_id)
|
||||
self.inner.get_mut(artifact_id)
|
||||
}
|
||||
|
||||
/// Inform the table about the artifact with the given ID. The state will be set to "preparing".
|
||||
@@ -212,53 +334,52 @@ impl Artifacts {
|
||||
pub fn insert_preparing(
|
||||
&mut self,
|
||||
artifact_id: ArtifactId,
|
||||
waiting_for_response: Vec<PrepareResultSender>,
|
||||
waiting_for_response: Vec<PrecheckResultSender>,
|
||||
) {
|
||||
// See the precondition.
|
||||
always!(self
|
||||
.artifacts
|
||||
.inner
|
||||
.insert(artifact_id, ArtifactState::Preparing { waiting_for_response, num_failures: 0 })
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Insert an artifact with the given ID as "prepared".
|
||||
///
|
||||
/// This function must be used only for brand-new artifacts and should never be used for
|
||||
/// replacing existing ones.
|
||||
#[cfg(test)]
|
||||
pub fn insert_prepared(
|
||||
/// This function should only be used to build the artifact table at startup with valid
|
||||
/// artifact caches.
|
||||
pub(crate) fn insert_prepared(
|
||||
&mut self,
|
||||
artifact_id: ArtifactId,
|
||||
path: PathBuf,
|
||||
last_time_needed: SystemTime,
|
||||
prepare_stats: PrepareStats,
|
||||
) {
|
||||
// See the precondition.
|
||||
always!(self
|
||||
.artifacts
|
||||
.insert(artifact_id, ArtifactState::Prepared { last_time_needed, prepare_stats })
|
||||
.inner
|
||||
.insert(artifact_id, ArtifactState::Prepared { path, last_time_needed, prepare_stats })
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Remove and retrieve the artifacts from the table that are older than the supplied
|
||||
/// Time-To-Live.
|
||||
pub fn prune(&mut self, artifact_ttl: Duration) -> Vec<ArtifactId> {
|
||||
/// Remove artifacts older than the given TTL and return id and path of the removed ones.
|
||||
pub fn prune(&mut self, artifact_ttl: Duration) -> Vec<(ArtifactId, PathBuf)> {
|
||||
let now = SystemTime::now();
|
||||
|
||||
let mut to_remove = vec![];
|
||||
for (k, v) in self.artifacts.iter() {
|
||||
if let ArtifactState::Prepared { last_time_needed, .. } = *v {
|
||||
for (k, v) in self.inner.iter() {
|
||||
if let ArtifactState::Prepared { last_time_needed, ref path, .. } = *v {
|
||||
if now
|
||||
.duration_since(last_time_needed)
|
||||
.map(|age| age > artifact_ttl)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
to_remove.push(k.clone());
|
||||
to_remove.push((k.clone(), path.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for artifact in &to_remove {
|
||||
self.artifacts.remove(artifact);
|
||||
self.inner.remove(&artifact.0);
|
||||
}
|
||||
|
||||
to_remove
|
||||
@@ -267,13 +388,72 @@ impl Artifacts {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{ArtifactId, Artifacts, NODE_VERSION};
|
||||
use super::{artifact_prefix as prefix, ArtifactId, Artifacts, NODE_VERSION, RUNTIME_VERSION};
|
||||
use polkadot_primitives::ExecutorParamsHash;
|
||||
use rand::Rng;
|
||||
use sp_core::H256;
|
||||
use std::{path::Path, str::FromStr};
|
||||
use std::{
|
||||
fs,
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
fn file_name(code_hash: &str, param_hash: &str) -> String {
|
||||
format!("wasmtime_polkadot_v{}_0x{}_0x{}", NODE_VERSION, code_hash, param_hash)
|
||||
fn rand_hash(len: usize) -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
let hex: Vec<_> = "0123456789abcdef".chars().collect();
|
||||
(0..len).map(|_| hex[rng.gen_range(0..hex.len())]).collect()
|
||||
}
|
||||
|
||||
fn file_name(code_hash: &str, param_hash: &str, checksum: &str) -> String {
|
||||
format!("{}_0x{}_0x{}_0x{}", prefix(), code_hash, param_hash, checksum)
|
||||
}
|
||||
|
||||
fn create_artifact(
|
||||
dir: impl AsRef<Path>,
|
||||
prefix: &str,
|
||||
code_hash: impl AsRef<str>,
|
||||
params_hash: impl AsRef<str>,
|
||||
) -> (PathBuf, String) {
|
||||
fn artifact_path_without_checksum(
|
||||
dir: impl AsRef<Path>,
|
||||
prefix: &str,
|
||||
code_hash: impl AsRef<str>,
|
||||
params_hash: impl AsRef<str>,
|
||||
) -> PathBuf {
|
||||
let mut path = dir.as_ref().to_path_buf();
|
||||
let file_name =
|
||||
format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),);
|
||||
path.push(file_name);
|
||||
path
|
||||
}
|
||||
|
||||
let (code_hash, params_hash) = (code_hash.as_ref(), params_hash.as_ref());
|
||||
let path = artifact_path_without_checksum(dir, prefix, code_hash, params_hash);
|
||||
let mut file = fs::File::create(&path).unwrap();
|
||||
|
||||
let content = format!("{}{}", code_hash, params_hash).into_bytes();
|
||||
file.write_all(&content).unwrap();
|
||||
let checksum = blake3::hash(&content).to_hex().to_string();
|
||||
|
||||
(path, checksum)
|
||||
}
|
||||
|
||||
fn create_rand_artifact(dir: impl AsRef<Path>, prefix: &str) -> (PathBuf, String) {
|
||||
create_artifact(dir, prefix, rand_hash(64), rand_hash(64))
|
||||
}
|
||||
|
||||
fn concluded_path(path: impl AsRef<Path>, checksum: &str) -> PathBuf {
|
||||
let path = path.as_ref();
|
||||
let mut file_name = path.file_name().unwrap().to_os_string();
|
||||
file_name.push("_0x");
|
||||
file_name.push(checksum);
|
||||
path.with_file_name(file_name)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifact_prefix() {
|
||||
assert_eq!(prefix(), format!("wasmtime_v{}_polkadot_v{}", RUNTIME_VERSION, NODE_VERSION));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -284,6 +464,7 @@ mod tests {
|
||||
let file_name = file_name(
|
||||
"0022800000000000000000000000000000000000000000000000000000000000",
|
||||
"0033900000000000000000000000000000000000000000000000000000000000",
|
||||
"00000000000000000000000000000000",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
@@ -305,40 +486,54 @@ mod tests {
|
||||
let dir = Path::new("/test");
|
||||
let code_hash = "1234567890123456789012345678901234567890123456789012345678901234";
|
||||
let params_hash = "4321098765432109876543210987654321098765432109876543210987654321";
|
||||
let file_name = file_name(code_hash, params_hash);
|
||||
let checksum = "34567890123456789012345678901234";
|
||||
let file_name = file_name(code_hash, params_hash, checksum);
|
||||
|
||||
let code_hash = H256::from_str(code_hash).unwrap();
|
||||
let params_hash = H256::from_str(params_hash).unwrap();
|
||||
let path = ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash))
|
||||
.path(dir, checksum);
|
||||
|
||||
assert_eq!(
|
||||
ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash))
|
||||
.path(dir)
|
||||
.to_str(),
|
||||
Some(format!("/test/{}", file_name).as_str()),
|
||||
);
|
||||
assert_eq!(path.to_str().unwrap(), format!("/test/{}", file_name));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn artifacts_removes_cache_on_startup() {
|
||||
let fake_cache_path = crate::worker_intf::tmppath("test-cache").await.unwrap();
|
||||
let fake_artifact_path = {
|
||||
let mut p = fake_cache_path.clone();
|
||||
p.push("wasmtime_0x1234567890123456789012345678901234567890123456789012345678901234");
|
||||
p
|
||||
};
|
||||
async fn remove_stale_cache_on_startup() {
|
||||
let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap();
|
||||
fs::create_dir_all(&cache_dir).unwrap();
|
||||
|
||||
// create a tmp cache with 1 artifact.
|
||||
// invalid prefix
|
||||
create_rand_artifact(&cache_dir, "");
|
||||
create_rand_artifact(&cache_dir, "wasmtime_polkadot_v");
|
||||
create_rand_artifact(&cache_dir, "wasmtime_v8.0.0_polkadot_v1.0.0");
|
||||
|
||||
std::fs::create_dir_all(&fake_cache_path).unwrap();
|
||||
std::fs::File::create(fake_artifact_path).unwrap();
|
||||
let prefix = prefix();
|
||||
|
||||
// this should remove it and re-create.
|
||||
// no checksum
|
||||
create_rand_artifact(&cache_dir, &prefix);
|
||||
|
||||
let p = &fake_cache_path;
|
||||
Artifacts::new(p).await;
|
||||
// invalid hashes
|
||||
let (path, checksum) = create_artifact(&cache_dir, &prefix, "000", "000001");
|
||||
let new_path = concluded_path(&path, &checksum);
|
||||
fs::rename(&path, &new_path).unwrap();
|
||||
|
||||
assert_eq!(std::fs::read_dir(&fake_cache_path).unwrap().count(), 0);
|
||||
// checksum tampered
|
||||
let (path, checksum) = create_rand_artifact(&cache_dir, &prefix);
|
||||
let new_path = concluded_path(&path, checksum.chars().rev().collect::<String>().as_str());
|
||||
fs::rename(&path, &new_path).unwrap();
|
||||
|
||||
std::fs::remove_dir_all(fake_cache_path).unwrap();
|
||||
// valid
|
||||
let (path, checksum) = create_rand_artifact(&cache_dir, &prefix);
|
||||
let new_path = concluded_path(&path, &checksum);
|
||||
fs::rename(&path, &new_path).unwrap();
|
||||
|
||||
assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7);
|
||||
|
||||
let artifacts = Artifacts::new_and_prune(&cache_dir).await;
|
||||
|
||||
assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1);
|
||||
assert_eq!(artifacts.len(), 1);
|
||||
|
||||
fs::remove_dir_all(cache_dir).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,14 +32,15 @@ use futures::{
|
||||
Future, FutureExt, SinkExt, StreamExt,
|
||||
};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
error::{PrecheckResult, PrepareError},
|
||||
prepare::PrepareSuccess,
|
||||
pvf::PvfPrepData,
|
||||
};
|
||||
use polkadot_node_subsystem::SubsystemResult;
|
||||
use polkadot_parachain_primitives::primitives::ValidationResult;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
path::PathBuf,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
@@ -63,7 +64,7 @@ pub const EXECUTE_BINARY_NAME: &str = "polkadot-execute-worker";
|
||||
pub(crate) type ResultSender = oneshot::Sender<Result<ValidationResult, ValidationError>>;
|
||||
|
||||
/// Transmission end used for sending the PVF preparation result.
|
||||
pub(crate) type PrepareResultSender = oneshot::Sender<PrepareResult>;
|
||||
pub(crate) type PrecheckResultSender = oneshot::Sender<PrecheckResult>;
|
||||
|
||||
/// A handle to the async process serving the validation host requests.
|
||||
#[derive(Clone)]
|
||||
@@ -83,7 +84,7 @@ impl ValidationHost {
|
||||
pub async fn precheck_pvf(
|
||||
&mut self,
|
||||
pvf: PvfPrepData,
|
||||
result_tx: PrepareResultSender,
|
||||
result_tx: PrecheckResultSender,
|
||||
) -> Result<(), String> {
|
||||
self.to_host_tx
|
||||
.send(ToHost::PrecheckPvf { pvf, result_tx })
|
||||
@@ -133,7 +134,7 @@ impl ValidationHost {
|
||||
}
|
||||
|
||||
enum ToHost {
|
||||
PrecheckPvf { pvf: PvfPrepData, result_tx: PrepareResultSender },
|
||||
PrecheckPvf { pvf: PvfPrepData, result_tx: PrecheckResultSender },
|
||||
ExecutePvf(ExecutePvfInputs),
|
||||
HeadsUp { active_pvfs: Vec<PvfPrepData> },
|
||||
}
|
||||
@@ -249,10 +250,9 @@ pub async fn start(
|
||||
let run_sweeper = sweeper_task(to_sweeper_rx);
|
||||
|
||||
let run_host = async move {
|
||||
let artifacts = Artifacts::new(&config.cache_path).await;
|
||||
let artifacts = Artifacts::new_and_prune(&config.cache_path).await;
|
||||
|
||||
run(Inner {
|
||||
cache_path: config.cache_path,
|
||||
cleanup_pulse_interval: Duration::from_secs(3600),
|
||||
artifact_ttl: Duration::from_secs(3600 * 24),
|
||||
artifacts,
|
||||
@@ -296,7 +296,6 @@ impl AwaitingPrepare {
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
cache_path: PathBuf,
|
||||
cleanup_pulse_interval: Duration,
|
||||
artifact_ttl: Duration,
|
||||
artifacts: Artifacts,
|
||||
@@ -317,7 +316,6 @@ struct Fatal;
|
||||
|
||||
async fn run(
|
||||
Inner {
|
||||
cache_path,
|
||||
cleanup_pulse_interval,
|
||||
artifact_ttl,
|
||||
mut artifacts,
|
||||
@@ -361,7 +359,6 @@ async fn run(
|
||||
// will notice it.
|
||||
|
||||
break_if_fatal!(handle_cleanup_pulse(
|
||||
&cache_path,
|
||||
&mut to_sweeper_tx,
|
||||
&mut artifacts,
|
||||
artifact_ttl,
|
||||
@@ -380,7 +377,6 @@ async fn run(
|
||||
// If the artifact failed before, it could be re-scheduled for preparation here if
|
||||
// the preparation failure cooldown has elapsed.
|
||||
break_if_fatal!(handle_to_host(
|
||||
&cache_path,
|
||||
&mut artifacts,
|
||||
&mut to_prepare_queue_tx,
|
||||
&mut to_execute_queue_tx,
|
||||
@@ -402,7 +398,6 @@ async fn run(
|
||||
// We could be eager in terms of reporting and plumb the result from the preparation
|
||||
// worker but we don't for the sake of simplicity.
|
||||
break_if_fatal!(handle_prepare_done(
|
||||
&cache_path,
|
||||
&mut artifacts,
|
||||
&mut to_execute_queue_tx,
|
||||
&mut awaiting_prepare,
|
||||
@@ -414,7 +409,6 @@ async fn run(
|
||||
}
|
||||
|
||||
async fn handle_to_host(
|
||||
cache_path: &Path,
|
||||
artifacts: &mut Artifacts,
|
||||
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
|
||||
execute_queue: &mut mpsc::Sender<execute::ToQueue>,
|
||||
@@ -426,15 +420,8 @@ async fn handle_to_host(
|
||||
handle_precheck_pvf(artifacts, prepare_queue, pvf, result_tx).await?;
|
||||
},
|
||||
ToHost::ExecutePvf(inputs) => {
|
||||
handle_execute_pvf(
|
||||
cache_path,
|
||||
artifacts,
|
||||
prepare_queue,
|
||||
execute_queue,
|
||||
awaiting_prepare,
|
||||
inputs,
|
||||
)
|
||||
.await?;
|
||||
handle_execute_pvf(artifacts, prepare_queue, execute_queue, awaiting_prepare, inputs)
|
||||
.await?;
|
||||
},
|
||||
ToHost::HeadsUp { active_pvfs } =>
|
||||
handle_heads_up(artifacts, prepare_queue, active_pvfs).await?,
|
||||
@@ -454,21 +441,21 @@ async fn handle_precheck_pvf(
|
||||
artifacts: &mut Artifacts,
|
||||
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
|
||||
pvf: PvfPrepData,
|
||||
result_sender: PrepareResultSender,
|
||||
result_sender: PrecheckResultSender,
|
||||
) -> Result<(), Fatal> {
|
||||
let artifact_id = ArtifactId::from_pvf_prep_data(&pvf);
|
||||
|
||||
if let Some(state) = artifacts.artifact_state_mut(&artifact_id) {
|
||||
match state {
|
||||
ArtifactState::Prepared { last_time_needed, prepare_stats } => {
|
||||
ArtifactState::Prepared { last_time_needed, .. } => {
|
||||
*last_time_needed = SystemTime::now();
|
||||
let _ = result_sender.send(Ok(prepare_stats.clone()));
|
||||
let _ = result_sender.send(Ok(()));
|
||||
},
|
||||
ArtifactState::Preparing { waiting_for_response, num_failures: _ } =>
|
||||
waiting_for_response.push(result_sender),
|
||||
ArtifactState::FailedToProcess { error, .. } => {
|
||||
// Do not retry an artifact that previously failed preparation.
|
||||
let _ = result_sender.send(PrepareResult::Err(error.clone()));
|
||||
let _ = result_sender.send(PrecheckResult::Err(error.clone()));
|
||||
},
|
||||
}
|
||||
} else {
|
||||
@@ -491,7 +478,6 @@ async fn handle_precheck_pvf(
|
||||
/// When preparing for execution, we use a more lenient timeout ([`LENIENT_PREPARATION_TIMEOUT`])
|
||||
/// than when prechecking.
|
||||
async fn handle_execute_pvf(
|
||||
cache_path: &Path,
|
||||
artifacts: &mut Artifacts,
|
||||
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
|
||||
execute_queue: &mut mpsc::Sender<execute::ToQueue>,
|
||||
@@ -504,8 +490,8 @@ async fn handle_execute_pvf(
|
||||
|
||||
if let Some(state) = artifacts.artifact_state_mut(&artifact_id) {
|
||||
match state {
|
||||
ArtifactState::Prepared { last_time_needed, .. } => {
|
||||
let file_metadata = std::fs::metadata(artifact_id.path(cache_path));
|
||||
ArtifactState::Prepared { ref path, last_time_needed, .. } => {
|
||||
let file_metadata = std::fs::metadata(path);
|
||||
|
||||
if file_metadata.is_ok() {
|
||||
*last_time_needed = SystemTime::now();
|
||||
@@ -514,7 +500,7 @@ async fn handle_execute_pvf(
|
||||
send_execute(
|
||||
execute_queue,
|
||||
execute::ToQueue::Enqueue {
|
||||
artifact: ArtifactPathId::new(artifact_id, cache_path),
|
||||
artifact: ArtifactPathId::new(artifact_id, path),
|
||||
pending_execution_request: PendingExecutionRequest {
|
||||
exec_timeout,
|
||||
params,
|
||||
@@ -677,7 +663,6 @@ async fn handle_heads_up(
|
||||
}
|
||||
|
||||
async fn handle_prepare_done(
|
||||
cache_path: &Path,
|
||||
artifacts: &mut Artifacts,
|
||||
execute_queue: &mut mpsc::Sender<execute::ToQueue>,
|
||||
awaiting_prepare: &mut AwaitingPrepare,
|
||||
@@ -718,7 +703,8 @@ async fn handle_prepare_done(
|
||||
state
|
||||
{
|
||||
for result_sender in waiting_for_response.drain(..) {
|
||||
let _ = result_sender.send(result.clone());
|
||||
let result = result.clone().map(|_| ());
|
||||
let _ = result_sender.send(result);
|
||||
}
|
||||
num_failures
|
||||
} else {
|
||||
@@ -738,16 +724,18 @@ async fn handle_prepare_done(
|
||||
continue
|
||||
}
|
||||
|
||||
// Don't send failed artifacts to the execution's queue.
|
||||
if let Err(ref error) = result {
|
||||
let _ = result_tx.send(Err(ValidationError::from(error.clone())));
|
||||
continue
|
||||
}
|
||||
let path = match &result {
|
||||
Ok(success) => success.path.clone(),
|
||||
Err(error) => {
|
||||
let _ = result_tx.send(Err(ValidationError::from(error.clone())));
|
||||
continue
|
||||
},
|
||||
};
|
||||
|
||||
send_execute(
|
||||
execute_queue,
|
||||
execute::ToQueue::Enqueue {
|
||||
artifact: ArtifactPathId::new(artifact_id.clone(), cache_path),
|
||||
artifact: ArtifactPathId::new(artifact_id.clone(), &path),
|
||||
pending_execution_request: PendingExecutionRequest {
|
||||
exec_timeout,
|
||||
params,
|
||||
@@ -760,8 +748,8 @@ async fn handle_prepare_done(
|
||||
}
|
||||
|
||||
*state = match result {
|
||||
Ok(prepare_stats) =>
|
||||
ArtifactState::Prepared { last_time_needed: SystemTime::now(), prepare_stats },
|
||||
Ok(PrepareSuccess { path, stats: prepare_stats }) =>
|
||||
ArtifactState::Prepared { path, last_time_needed: SystemTime::now(), prepare_stats },
|
||||
Err(error) => {
|
||||
let last_time_failed = SystemTime::now();
|
||||
let num_failures = *num_failures + 1;
|
||||
@@ -814,7 +802,6 @@ async fn enqueue_prepare_for_execute(
|
||||
}
|
||||
|
||||
async fn handle_cleanup_pulse(
|
||||
cache_path: &Path,
|
||||
sweeper_tx: &mut mpsc::Sender<PathBuf>,
|
||||
artifacts: &mut Artifacts,
|
||||
artifact_ttl: Duration,
|
||||
@@ -825,14 +812,13 @@ async fn handle_cleanup_pulse(
|
||||
"PVF pruning: {} artifacts reached their end of life",
|
||||
to_remove.len(),
|
||||
);
|
||||
for artifact_id in to_remove {
|
||||
for (artifact_id, path) in to_remove {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
validation_code_hash = ?artifact_id.code_hash,
|
||||
"pruning artifact",
|
||||
);
|
||||
let artifact_path = artifact_id.path(cache_path);
|
||||
sweeper_tx.send(artifact_path).await.map_err(|_| Fatal)?;
|
||||
sweeper_tx.send(path).await.map_err(|_| Fatal)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -890,7 +876,11 @@ pub(crate) mod tests {
|
||||
use crate::InvalidCandidate;
|
||||
use assert_matches::assert_matches;
|
||||
use futures::future::BoxFuture;
|
||||
use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::PrepareError,
|
||||
prepare::{PrepareStats, PrepareSuccess},
|
||||
};
|
||||
use sp_core::hexdisplay::AsBytesRef;
|
||||
|
||||
const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
|
||||
pub(crate) const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
@@ -910,12 +900,16 @@ pub(crate) mod tests {
|
||||
}
|
||||
|
||||
/// Creates a new PVF which artifact id can be uniquely identified by the given number.
|
||||
fn artifact_id(descriminator: u32) -> ArtifactId {
|
||||
ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(descriminator))
|
||||
fn artifact_id(discriminator: u32) -> ArtifactId {
|
||||
ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(discriminator))
|
||||
}
|
||||
|
||||
fn artifact_path(descriminator: u32) -> PathBuf {
|
||||
artifact_id(descriminator).path(&PathBuf::from(std::env::temp_dir())).to_owned()
|
||||
fn artifact_path(discriminator: u32) -> PathBuf {
|
||||
let pvf = PvfPrepData::from_discriminator(discriminator);
|
||||
let checksum = blake3::hash(pvf.code().as_bytes_ref());
|
||||
artifact_id(discriminator)
|
||||
.path(&PathBuf::from(std::env::temp_dir()), checksum.to_hex().as_str())
|
||||
.to_owned()
|
||||
}
|
||||
|
||||
struct Builder {
|
||||
@@ -953,8 +947,6 @@ pub(crate) mod tests {
|
||||
|
||||
impl Test {
|
||||
fn new(Builder { cleanup_pulse_interval, artifact_ttl, artifacts }: Builder) -> Self {
|
||||
let cache_path = PathBuf::from(std::env::temp_dir());
|
||||
|
||||
let (to_host_tx, to_host_rx) = mpsc::channel(10);
|
||||
let (to_prepare_queue_tx, to_prepare_queue_rx) = mpsc::channel(10);
|
||||
let (from_prepare_queue_tx, from_prepare_queue_rx) = mpsc::unbounded();
|
||||
@@ -962,7 +954,6 @@ pub(crate) mod tests {
|
||||
let (to_sweeper_tx, to_sweeper_rx) = mpsc::channel(10);
|
||||
|
||||
let run = run(Inner {
|
||||
cache_path,
|
||||
cleanup_pulse_interval,
|
||||
artifact_ttl,
|
||||
artifacts,
|
||||
@@ -1111,12 +1102,18 @@ pub(crate) mod tests {
|
||||
let mut builder = Builder::default();
|
||||
builder.cleanup_pulse_interval = Duration::from_millis(100);
|
||||
builder.artifact_ttl = Duration::from_millis(500);
|
||||
builder
|
||||
.artifacts
|
||||
.insert_prepared(artifact_id(1), mock_now, PrepareStats::default());
|
||||
builder
|
||||
.artifacts
|
||||
.insert_prepared(artifact_id(2), mock_now, PrepareStats::default());
|
||||
builder.artifacts.insert_prepared(
|
||||
artifact_id(1),
|
||||
artifact_path(1),
|
||||
mock_now,
|
||||
PrepareStats::default(),
|
||||
);
|
||||
builder.artifacts.insert_prepared(
|
||||
artifact_id(2),
|
||||
artifact_path(2),
|
||||
mock_now,
|
||||
PrepareStats::default(),
|
||||
);
|
||||
let mut test = builder.build();
|
||||
let mut host = test.host_handle();
|
||||
|
||||
@@ -1188,7 +1185,7 @@ pub(crate) mod tests {
|
||||
test.from_prepare_queue_tx
|
||||
.send(prepare::FromQueue {
|
||||
artifact_id: artifact_id(1),
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1204,7 +1201,7 @@ pub(crate) mod tests {
|
||||
test.from_prepare_queue_tx
|
||||
.send(prepare::FromQueue {
|
||||
artifact_id: artifact_id(2),
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1258,7 +1255,7 @@ pub(crate) mod tests {
|
||||
test.from_prepare_queue_tx
|
||||
.send(prepare::FromQueue {
|
||||
artifact_id: artifact_id(1),
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1371,7 +1368,7 @@ pub(crate) mod tests {
|
||||
test.from_prepare_queue_tx
|
||||
.send(prepare::FromQueue {
|
||||
artifact_id: artifact_id(2),
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1527,7 +1524,7 @@ pub(crate) mod tests {
|
||||
test.from_prepare_queue_tx
|
||||
.send(prepare::FromQueue {
|
||||
artifact_id: artifact_id(1),
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1703,7 +1700,7 @@ pub(crate) mod tests {
|
||||
test.from_prepare_queue_tx
|
||||
.send(prepare::FromQueue {
|
||||
artifact_id: artifact_id(1),
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -84,7 +84,7 @@
|
||||
//! A pruning task will run at a fixed interval of time. This task will remove all artifacts that
|
||||
//! weren't used or received a heads up signal for a while.
|
||||
//!
|
||||
//! ## Execution
|
||||
//! ## Execution
|
||||
//!
|
||||
//! The execute workers will be fed by the requests from the execution queue, which is basically a
|
||||
//! combination of a path to the compiled artifact and the
|
||||
|
||||
@@ -68,7 +68,7 @@ pub enum ToPool {
|
||||
///
|
||||
/// In either case, the worker is considered busy and no further `StartWork` messages should be
|
||||
/// sent until either `Concluded` or `Rip` message is received.
|
||||
StartWork { worker: Worker, pvf: PvfPrepData, artifact_path: PathBuf },
|
||||
StartWork { worker: Worker, pvf: PvfPrepData, cache_path: PathBuf },
|
||||
}
|
||||
|
||||
/// A message sent from pool to its client.
|
||||
@@ -232,7 +232,7 @@ fn handle_to_pool(
|
||||
.boxed(),
|
||||
);
|
||||
},
|
||||
ToPool::StartWork { worker, pvf, artifact_path } => {
|
||||
ToPool::StartWork { worker, pvf, cache_path } => {
|
||||
if let Some(data) = spawned.get_mut(worker) {
|
||||
if let Some(idle) = data.idle.take() {
|
||||
let preparation_timer = metrics.time_preparation();
|
||||
@@ -242,7 +242,7 @@ fn handle_to_pool(
|
||||
worker,
|
||||
idle,
|
||||
pvf,
|
||||
artifact_path,
|
||||
cache_path,
|
||||
preparation_timer,
|
||||
)
|
||||
.boxed(),
|
||||
@@ -303,10 +303,10 @@ async fn start_work_task<Timer>(
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
artifact_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
_preparation_timer: Option<Timer>,
|
||||
) -> PoolEvent {
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, artifact_path).await;
|
||||
let outcome = worker_intf::start_work(&metrics, idle, pvf, cache_path).await;
|
||||
PoolEvent::StartWork(worker, outcome)
|
||||
}
|
||||
|
||||
|
||||
@@ -268,12 +268,12 @@ fn find_idle_worker(queue: &mut Queue) -> Option<Worker> {
|
||||
}
|
||||
|
||||
async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Result<(), Fatal> {
|
||||
use pool::FromPool::*;
|
||||
use pool::FromPool;
|
||||
match from_pool {
|
||||
Spawned(worker) => handle_worker_spawned(queue, worker).await?,
|
||||
Concluded { worker, rip, result } =>
|
||||
FromPool::Spawned(worker) => handle_worker_spawned(queue, worker).await?,
|
||||
FromPool::Concluded { worker, rip, result } =>
|
||||
handle_worker_concluded(queue, worker, rip, result).await?,
|
||||
Rip(worker) => handle_worker_rip(queue, worker).await?,
|
||||
FromPool::Rip(worker) => handle_worker_rip(queue, worker).await?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -424,17 +424,17 @@ async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fat
|
||||
/// Attaches the work to the given worker telling the poll about the job.
|
||||
async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> {
|
||||
let job_data = &mut queue.jobs[job];
|
||||
|
||||
let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf);
|
||||
let artifact_path = artifact_id.path(&queue.cache_path);
|
||||
|
||||
job_data.worker = Some(worker);
|
||||
|
||||
queue.workers[worker].job = Some(job);
|
||||
|
||||
send_pool(
|
||||
&mut queue.to_pool_tx,
|
||||
pool::ToPool::StartWork { worker, pvf: job_data.pvf.clone(), artifact_path },
|
||||
pool::ToPool::StartWork {
|
||||
worker,
|
||||
pvf: job_data.pvf.clone(),
|
||||
cache_path: queue.cache_path.clone(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -491,7 +491,7 @@ mod tests {
|
||||
use crate::host::tests::TEST_PREPARATION_TIMEOUT;
|
||||
use assert_matches::assert_matches;
|
||||
use futures::{future::BoxFuture, FutureExt};
|
||||
use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats};
|
||||
use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareSuccess};
|
||||
use slotmap::SlotMap;
|
||||
use std::task::Poll;
|
||||
|
||||
@@ -612,7 +612,7 @@ mod tests {
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w,
|
||||
rip: false,
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
@@ -651,7 +651,7 @@ mod tests {
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: false,
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
@@ -697,7 +697,7 @@ mod tests {
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: false,
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1));
|
||||
}
|
||||
@@ -731,7 +731,7 @@ mod tests {
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: true,
|
||||
result: Ok(PrepareStats::default()),
|
||||
result: Ok(PrepareSuccess::default()),
|
||||
});
|
||||
|
||||
// Since there is still work, the queue requested one extra worker to spawn to handle the
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
//! Host interface to the prepare worker.
|
||||
|
||||
use crate::{
|
||||
artifacts::ArtifactId,
|
||||
metrics::Metrics,
|
||||
security,
|
||||
worker_intf::{
|
||||
@@ -27,8 +28,8 @@ use crate::{
|
||||
};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use polkadot_node_core_pvf_common::{
|
||||
error::{PrepareError, PrepareResult},
|
||||
prepare::PrepareStats,
|
||||
error::{PrepareError, PrepareResult, PrepareWorkerResult},
|
||||
prepare::{PrepareStats, PrepareSuccess, PrepareWorkerSuccess},
|
||||
pvf::PvfPrepData,
|
||||
worker_dir, SecurityStatus,
|
||||
};
|
||||
@@ -81,7 +82,7 @@ pub enum Outcome {
|
||||
/// final destination location.
|
||||
RenameTmpFile {
|
||||
worker: IdleWorker,
|
||||
result: PrepareResult,
|
||||
result: PrepareWorkerResult,
|
||||
err: String,
|
||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||
// conversion to `Option<String>`.
|
||||
@@ -115,7 +116,7 @@ pub async fn start_work(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
pvf: PvfPrepData,
|
||||
artifact_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
) -> Outcome {
|
||||
let IdleWorker { stream, pid, worker_dir } = worker;
|
||||
|
||||
@@ -123,8 +124,8 @@ pub async fn start_work(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
?worker_dir,
|
||||
"starting prepare for {}",
|
||||
artifact_path.display(),
|
||||
"starting prepare for {:?}",
|
||||
pvf,
|
||||
);
|
||||
|
||||
with_worker_dir_setup(
|
||||
@@ -135,7 +136,7 @@ pub async fn start_work(
|
||||
let preparation_timeout = pvf.prep_timeout();
|
||||
let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;
|
||||
|
||||
if let Err(err) = send_request(&mut stream, pvf.clone()).await {
|
||||
if let Err(err) = send_request(&mut stream, &pvf).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
@@ -159,7 +160,7 @@ pub async fn start_work(
|
||||
|
||||
match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(prepare_result)) => {
|
||||
Ok(Ok(prepare_worker_result)) => {
|
||||
// Check if any syscall violations occurred during the job. For now this is only
|
||||
// informative, as we are not enforcing the seccomp policy yet.
|
||||
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
|
||||
@@ -175,10 +176,11 @@ pub async fn start_work(
|
||||
handle_response(
|
||||
metrics,
|
||||
IdleWorker { stream, pid, worker_dir },
|
||||
prepare_result,
|
||||
prepare_worker_result,
|
||||
pid,
|
||||
tmp_artifact_file,
|
||||
artifact_path,
|
||||
&pvf,
|
||||
&cache_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await
|
||||
@@ -215,20 +217,22 @@ pub async fn start_work(
|
||||
async fn handle_response(
|
||||
metrics: &Metrics,
|
||||
worker: IdleWorker,
|
||||
result: PrepareResult,
|
||||
result: PrepareWorkerResult,
|
||||
worker_pid: u32,
|
||||
tmp_file: PathBuf,
|
||||
artifact_path: PathBuf,
|
||||
pvf: &PvfPrepData,
|
||||
cache_path: &PathBuf,
|
||||
preparation_timeout: Duration,
|
||||
) -> Outcome {
|
||||
let PrepareStats { cpu_time_elapsed, memory_stats } = match result.clone() {
|
||||
Ok(result) => result,
|
||||
// Timed out on the child. This should already be logged by the child.
|
||||
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
|
||||
Err(PrepareError::JobDied(err)) => return Outcome::JobDied(err),
|
||||
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
|
||||
Err(_) => return Outcome::Concluded { worker, result },
|
||||
};
|
||||
let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } =
|
||||
match result.clone() {
|
||||
Ok(result) => result,
|
||||
// Timed out on the child. This should already be logged by the child.
|
||||
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
|
||||
Err(PrepareError::JobDied(err)) => return Outcome::JobDied(err),
|
||||
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
|
||||
Err(err) => return Outcome::Concluded { worker, result: Err(err) },
|
||||
};
|
||||
|
||||
if cpu_time_elapsed > preparation_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
@@ -243,6 +247,9 @@ async fn handle_response(
|
||||
return Outcome::TimedOut
|
||||
}
|
||||
|
||||
let artifact_id = ArtifactId::from_pvf_prep_data(pvf);
|
||||
let artifact_path = artifact_id.path(cache_path, &checksum);
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
%worker_pid,
|
||||
@@ -252,7 +259,13 @@ async fn handle_response(
|
||||
);
|
||||
|
||||
let outcome = match tokio::fs::rename(&tmp_file, &artifact_path).await {
|
||||
Ok(()) => Outcome::Concluded { worker, result },
|
||||
Ok(()) => Outcome::Concluded {
|
||||
worker,
|
||||
result: Ok(PrepareSuccess {
|
||||
path: artifact_path,
|
||||
stats: PrepareStats { cpu_time_elapsed, memory_stats: memory_stats.clone() },
|
||||
}),
|
||||
},
|
||||
Err(err) => {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
@@ -329,14 +342,14 @@ where
|
||||
outcome
|
||||
}
|
||||
|
||||
async fn send_request(stream: &mut UnixStream, pvf: PvfPrepData) -> io::Result<()> {
|
||||
async fn send_request(stream: &mut UnixStream, pvf: &PvfPrepData) -> io::Result<()> {
|
||||
framed_send(stream, &pvf.encode()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result<PrepareResult> {
|
||||
async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result<PrepareWorkerResult> {
|
||||
let result = framed_recv(stream).await?;
|
||||
let result = PrepareResult::decode(&mut &result[..]).map_err(|e| {
|
||||
let result = PrepareWorkerResult::decode(&mut &result[..]).map_err(|e| {
|
||||
// We received invalid bytes from the worker.
|
||||
let bound_bytes = &result[..result.len().min(4)];
|
||||
gum::warn!(
|
||||
|
||||
@@ -198,7 +198,7 @@ pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
|
||||
/// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory.
|
||||
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
|
||||
let temp_dir = PathBuf::from(std::env::temp_dir());
|
||||
let temp_dir = std::env::temp_dir();
|
||||
tmppath_in(prefix, &temp_dir).await
|
||||
}
|
||||
|
||||
@@ -453,7 +453,7 @@ impl Drop for WorkerDir {
|
||||
/// artifacts from previous jobs.
|
||||
pub fn clear_worker_dir_path(worker_dir_path: &Path) -> io::Result<()> {
|
||||
fn remove_dir_contents(path: &Path) -> io::Result<()> {
|
||||
for entry in std::fs::read_dir(&path)? {
|
||||
for entry in std::fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user