diff --git a/polkadot/Cargo.lock b/polkadot/Cargo.lock
index 85f446c3d6..6f6d3024f2 100644
--- a/polkadot/Cargo.lock
+++ b/polkadot/Cargo.lock
@@ -7161,6 +7161,7 @@ dependencies = [
"sp-maybe-compressed-blob",
"sp-tracing",
"sp-wasm-interface",
+ "substrate-build-script-utils",
"tempfile",
"test-parachain-adder",
"test-parachain-halt",
diff --git a/polkadot/cli/src/cli.rs b/polkadot/cli/src/cli.rs
index 37083eb912..c78399788a 100644
--- a/polkadot/cli/src/cli.rs
+++ b/polkadot/cli/src/cli.rs
@@ -79,7 +79,11 @@ pub enum Subcommand {
#[derive(Debug, Parser)]
pub struct ValidationWorkerCommand {
/// The path to the validation host's socket.
+ #[arg(long)]
pub socket_path: String,
+ /// Calling node implementation version
+ #[arg(long)]
+ pub node_impl_version: String,
}
#[allow(missing_docs)]
diff --git a/polkadot/cli/src/command.rs b/polkadot/cli/src/command.rs
index 0d1a3f8163..e6eaf6f095 100644
--- a/polkadot/cli/src/command.rs
+++ b/polkadot/cli/src/command.rs
@@ -494,7 +494,10 @@ pub fn run() -> Result<()> {
#[cfg(not(target_os = "android"))]
{
- polkadot_node_core_pvf::prepare_worker_entrypoint(&cmd.socket_path);
+ polkadot_node_core_pvf::prepare_worker_entrypoint(
+ &cmd.socket_path,
+ Some(&cmd.node_impl_version),
+ );
Ok(())
}
},
@@ -513,7 +516,10 @@ pub fn run() -> Result<()> {
#[cfg(not(target_os = "android"))]
{
- polkadot_node_core_pvf::execute_worker_entrypoint(&cmd.socket_path);
+ polkadot_node_core_pvf::execute_worker_entrypoint(
+ &cmd.socket_path,
+ Some(&cmd.node_impl_version),
+ );
Ok(())
}
},
diff --git a/polkadot/node/core/pvf/Cargo.toml b/polkadot/node/core/pvf/Cargo.toml
index b6c56c6595..6478edc441 100644
--- a/polkadot/node/core/pvf/Cargo.toml
+++ b/polkadot/node/core/pvf/Cargo.toml
@@ -15,6 +15,7 @@ cpu-time = "1.0.0"
futures = "0.3.21"
futures-timer = "3.0.2"
gum = { package = "tracing-gum", path = "../../gum" }
+libc = "0.2.139"
pin-project = "1.0.9"
rand = "0.8.5"
rayon = "1.5.1"
@@ -41,8 +42,10 @@ sp-wasm-interface = { git = "https://github.com/paritytech/substrate", branch =
sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
+[build-dependencies]
+substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" }
+
[target.'cfg(target_os = "linux")'.dependencies]
-libc = "0.2.139"
tikv-jemalloc-ctl = "0.5.0"
[dev-dependencies]
diff --git a/polkadot/node/core/pvf/build.rs b/polkadot/node/core/pvf/build.rs
new file mode 100644
index 0000000000..805fa3446f
--- /dev/null
+++ b/polkadot/node/core/pvf/build.rs
@@ -0,0 +1,19 @@
+// Copyright 2017-2023 Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot. If not, see .
+
+fn main() {
+ substrate_build_script_utils::generate_cargo_keys();
+}
diff --git a/polkadot/node/core/pvf/src/execute/worker.rs b/polkadot/node/core/pvf/src/execute/worker.rs
index 523be67da2..04357d8704 100644
--- a/polkadot/node/core/pvf/src/execute/worker.rs
+++ b/polkadot/node/core/pvf/src/execute/worker.rs
@@ -47,9 +47,13 @@ pub async fn spawn(
executor_params: ExecutorParams,
spawn_timeout: Duration,
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
- let (mut idle_worker, worker_handle) =
- spawn_with_program_path("execute", program_path, &["execute-worker"], spawn_timeout)
- .await?;
+ let (mut idle_worker, worker_handle) = spawn_with_program_path(
+ "execute",
+ program_path,
+ &["execute-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")],
+ spawn_timeout,
+ )
+ .await?;
send_handshake(&mut idle_worker.stream, Handshake { executor_params })
.await
.map_err(|error| {
@@ -260,11 +264,25 @@ impl Response {
}
/// The entrypoint that the spawned execute worker should start with. The `socket_path` specifies
-/// the path to the socket used to communicate with the host.
-pub fn worker_entrypoint(socket_path: &str) {
+/// the path to the socket used to communicate with the host. The `node_version`, if `Some`,
+/// is checked against the worker version. A mismatch results in immediate worker termination.
+/// `None` is used for tests and in other situations when version check is not necessary.
+pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) {
worker_event_loop("execute", socket_path, |rt_handle, mut stream| async move {
- let handshake = recv_handshake(&mut stream).await?;
+ let worker_pid = std::process::id();
+ if let Some(version) = node_version {
+ if version != env!("SUBSTRATE_CLI_IMPL_VERSION") {
+ gum::error!(
+ target: LOG_TARGET,
+ %worker_pid,
+ "Node and worker version mismatch, node needs restarting, forcing shutdown",
+ );
+ crate::kill_parent_node_in_emergency();
+ return Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch"))
+ }
+ }
+ let handshake = recv_handshake(&mut stream).await?;
let executor = Arc::new(Executor::new(handshake.executor_params).map_err(|e| {
io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e))
})?);
@@ -273,7 +291,7 @@ pub fn worker_entrypoint(socket_path: &str) {
let (artifact_path, params, execution_timeout) = recv_request(&mut stream).await?;
gum::debug!(
target: LOG_TARGET,
- worker_pid = %std::process::id(),
+ %worker_pid,
"worker: validating artifact {}",
artifact_path.display(),
);
@@ -307,7 +325,7 @@ pub fn worker_entrypoint(socket_path: &str) {
// Log if we exceed the timeout and the other thread hasn't finished.
gum::warn!(
target: LOG_TARGET,
- worker_pid = %std::process::id(),
+ %worker_pid,
"execute job took {}ms cpu time, exceeded execute timeout {}ms",
cpu_time_elapsed.as_millis(),
execution_timeout.as_millis(),
diff --git a/polkadot/node/core/pvf/src/lib.rs b/polkadot/node/core/pvf/src/lib.rs
index 8c40bbb8b9..88134529bc 100644
--- a/polkadot/node/core/pvf/src/lib.rs
+++ b/polkadot/node/core/pvf/src/lib.rs
@@ -114,6 +114,7 @@ pub use pvf::PvfPrepData;
pub use host::{start, Config, ValidationHost};
pub use metrics::Metrics;
+pub(crate) use worker_common::kill_parent_node_in_emergency;
pub use worker_common::JOB_TIMEOUT_WALL_CLOCK_FACTOR;
pub use execute::worker_entrypoint as execute_worker_entrypoint;
diff --git a/polkadot/node/core/pvf/src/prepare/worker.rs b/polkadot/node/core/pvf/src/prepare/worker.rs
index 962ad2742b..1ccba603c1 100644
--- a/polkadot/node/core/pvf/src/prepare/worker.rs
+++ b/polkadot/node/core/pvf/src/prepare/worker.rs
@@ -52,7 +52,13 @@ pub async fn spawn(
program_path: &Path,
spawn_timeout: Duration,
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
- spawn_with_program_path("prepare", program_path, &["prepare-worker"], spawn_timeout).await
+ spawn_with_program_path(
+ "prepare",
+ program_path,
+ &["prepare-worker", "--node-impl-version", env!("SUBSTRATE_CLI_IMPL_VERSION")],
+ spawn_timeout,
+ )
+ .await
}
pub enum Outcome {
@@ -321,7 +327,9 @@ async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result io::Result) {
worker_event_loop("prepare", socket_path, |rt_handle, mut stream| async move {
+ let worker_pid = std::process::id();
+ if let Some(version) = node_version {
+ if version != env!("SUBSTRATE_CLI_IMPL_VERSION") {
+ gum::error!(
+ target: LOG_TARGET,
+ %worker_pid,
+ "Node and worker version mismatch, node needs restarting, forcing shutdown",
+ );
+ crate::kill_parent_node_in_emergency();
+ return Err(io::Error::new(io::ErrorKind::Unsupported, "Version mismatch"))
+ }
+ }
+
loop {
- let worker_pid = std::process::id();
let (pvf, dest) = recv_request(&mut stream).await?;
gum::debug!(
target: LOG_TARGET,
diff --git a/polkadot/node/core/pvf/src/testing.rs b/polkadot/node/core/pvf/src/testing.rs
index f0b6a0a60f..9ff6d5a15e 100644
--- a/polkadot/node/core/pvf/src/testing.rs
+++ b/polkadot/node/core/pvf/src/testing.rs
@@ -61,22 +61,34 @@ macro_rules! decl_puppet_worker_main {
$crate::sp_tracing::try_init_simple();
let args = std::env::args().collect::>();
- if args.len() < 2 {
+ if args.len() < 3 {
panic!("wrong number of arguments");
}
+ let mut version = None;
+ let mut socket_path: &str = "";
+
+ for i in 2..args.len() {
+ match args[i].as_ref() {
+ "--socket-path" => socket_path = args[i + 1].as_str(),
+ "--node-version" => version = Some(args[i + 1].as_str()),
+ _ => (),
+ }
+ }
+
let subcommand = &args[1];
match subcommand.as_ref() {
+ "exit" => {
+ std::process::exit(1);
+ },
"sleep" => {
std::thread::sleep(std::time::Duration::from_secs(5));
},
"prepare-worker" => {
- let socket_path = &args[2];
- $crate::prepare_worker_entrypoint(socket_path);
+ $crate::prepare_worker_entrypoint(&socket_path, version);
},
"execute-worker" => {
- let socket_path = &args[2];
- $crate::execute_worker_entrypoint(socket_path);
+ $crate::execute_worker_entrypoint(&socket_path, version);
},
other => panic!("unknown subcommand: {}", other),
}
diff --git a/polkadot/node/core/pvf/src/worker_common.rs b/polkadot/node/core/pvf/src/worker_common.rs
index 430a6950fb..3ed2994a2f 100644
--- a/polkadot/node/core/pvf/src/worker_common.rs
+++ b/polkadot/node/core/pvf/src/worker_common.rs
@@ -61,6 +61,8 @@ pub async fn spawn_with_program_path(
gum::warn!(
target: LOG_TARGET,
%debug_id,
+ ?program_path,
+ ?extra_args,
"cannot bind unix socket: {:?}",
err,
);
@@ -68,10 +70,12 @@ pub async fn spawn_with_program_path(
})?;
let handle =
- WorkerHandle::spawn(program_path, extra_args, socket_path).map_err(|err| {
+ WorkerHandle::spawn(&program_path, extra_args, socket_path).map_err(|err| {
gum::warn!(
target: LOG_TARGET,
%debug_id,
+ ?program_path,
+ ?extra_args,
"cannot spawn a worker: {:?}",
err,
);
@@ -84,6 +88,8 @@ pub async fn spawn_with_program_path(
gum::warn!(
target: LOG_TARGET,
%debug_id,
+ ?program_path,
+ ?extra_args,
"cannot accept a worker: {:?}",
err,
);
@@ -92,6 +98,14 @@ pub async fn spawn_with_program_path(
Ok((IdleWorker { stream, pid: handle.id() }, handle))
}
_ = Delay::new(spawn_timeout).fuse() => {
+ gum::warn!(
+ target: LOG_TARGET,
+ %debug_id,
+ ?program_path,
+ ?extra_args,
+ ?spawn_timeout,
+ "spawning and connecting to socket timed out",
+ );
Err(SpawnErr::AcceptTimeout)
}
}
@@ -162,6 +176,13 @@ where
F: FnMut(Handle, UnixStream) -> Fut,
Fut: futures::Future