PVF: Add worker check during tests and benches (#1771)

This commit is contained in:
Marcin S
2023-10-24 16:22:15 +02:00
committed by GitHub
parent 8cba5b90f2
commit e39253c022
19 changed files with 285 additions and 113 deletions
+1
View File
@@ -141,6 +141,7 @@ impl ArtifactPathId {
}
}
#[derive(Debug)]
pub enum ArtifactState {
/// The artifact is ready to be used by the executor.
///
+5 -5
View File
@@ -446,7 +446,8 @@ async fn handle_to_host(
/// This tries to prepare the PVF by compiling the WASM blob within a timeout set in
/// `PvfPrepData`.
///
/// If the prepare job failed previously, we may retry it under certain conditions.
/// We don't retry artifacts that previously failed preparation. We don't expect multiple
/// pre-checking requests.
async fn handle_precheck_pvf(
artifacts: &mut Artifacts,
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
@@ -464,8 +465,7 @@ async fn handle_precheck_pvf(
ArtifactState::Preparing { waiting_for_response, num_failures: _ } =>
waiting_for_response.push(result_sender),
ArtifactState::FailedToProcess { error, .. } => {
// Do not retry failed preparation if another pre-check request comes in. We do not
// retry pre-checking, anyway.
// Do not retry an artifact that previously failed preparation.
let _ = result_sender.send(PrepareResult::Err(error.clone()));
},
}
@@ -764,7 +764,7 @@ async fn handle_prepare_done(
let last_time_failed = SystemTime::now();
let num_failures = *num_failures + 1;
gum::warn!(
gum::error!(
target: LOG_TARGET,
?artifact_id,
time_failed = ?last_time_failed,
@@ -846,7 +846,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
gum::trace!(
target: LOG_TARGET,
?result,
"Sweeping the artifact file {}",
"Sweeped the artifact file {}",
condemned.display(),
);
},
+13
View File
@@ -116,5 +116,18 @@ pub use polkadot_node_core_pvf_common::{
SecurityStatus,
};
use std::{path::Path, process::Command};
/// The log target for this crate.
pub const LOG_TARGET: &str = "parachain::pvf";
/// Utility to get the version of a worker, used for version checks.
///
/// The worker's existence at the given path must be checked separately.
pub fn get_worker_version(worker_path: &Path) -> std::io::Result<String> {
let worker_version = Command::new(worker_path).args(["--version"]).output()?.stdout;
Ok(std::str::from_utf8(&worker_version)
.expect("version is printed as a string; qed")
.trim()
.to_string())
}
+54 -4
View File
@@ -15,13 +15,20 @@
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Various things for testing other crates.
//!
//! N.B. This is not guarded with some feature flag. Overexposing items here may affect the final
//! artifact even for production builds.
pub use crate::worker_intf::{spawn_with_program_path, SpawnErr};
pub use crate::{
host::{EXECUTE_BINARY_NAME, PREPARE_BINARY_NAME},
worker_intf::{spawn_with_program_path, SpawnErr},
};
use crate::get_worker_version;
use is_executable::IsExecutable;
use polkadot_node_primitives::NODE_VERSION;
use polkadot_primitives::ExecutorParams;
use std::{
path::PathBuf,
sync::{Mutex, OnceLock},
};
/// A function that emulates the stitches together behaviors of the preparation and the execution
/// worker in a single synchronous function.
@@ -47,3 +54,46 @@ pub fn validate_candidate(
Ok(result)
}
/// Retrieves the worker paths, checks that they exist and does a version check.
///
/// NOTE: This should only be called in dev code (tests, benchmarks) as it relies on the relative
/// paths of the built workers.
pub fn get_and_check_worker_paths() -> (PathBuf, PathBuf) {
// Only needs to be called once for the current process.
static WORKER_PATHS: OnceLock<Mutex<(PathBuf, PathBuf)>> = OnceLock::new();
let mutex = WORKER_PATHS.get_or_init(|| {
let mut workers_path = std::env::current_exe().unwrap();
workers_path.pop();
workers_path.pop();
let mut prepare_worker_path = workers_path.clone();
prepare_worker_path.push(PREPARE_BINARY_NAME);
let mut execute_worker_path = workers_path.clone();
execute_worker_path.push(EXECUTE_BINARY_NAME);
// Check that the workers are valid.
if !prepare_worker_path.is_executable() || !execute_worker_path.is_executable() {
panic!("ERROR: Workers do not exist or are not executable. Workers directory: {:?}", workers_path);
}
let worker_version =
get_worker_version(&prepare_worker_path).expect("checked for worker existence");
if worker_version != NODE_VERSION {
panic!("ERROR: Prepare worker version {worker_version} does not match node version {NODE_VERSION}; worker path: {prepare_worker_path:?}");
}
let worker_version =
get_worker_version(&execute_worker_path).expect("checked for worker existence");
if worker_version != NODE_VERSION {
panic!("ERROR: Execute worker version {worker_version} does not match node version {NODE_VERSION}; worker path: {execute_worker_path:?}");
}
// We don't want to check against the commit hash because we'd have to always rebuild
// the calling crate on every commit.
eprintln!("WARNING: Workers match the node version, but may have changed in recent commits. Please rebuild them if anything funny happens. Workers path: {workers_path:?}");
Mutex::new((prepare_worker_path, execute_worker_path))
});
let guard = mutex.lock().unwrap();
(guard.0.clone(), guard.1.clone())
}
@@ -283,6 +283,7 @@ impl WorkerHandle {
if let Ok(value) = std::env::var("RUST_LOG") {
command.env("RUST_LOG", value);
}
let mut child = command
.args(extra_args)
.arg("--socket-path")