mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-12 10:01:17 +00:00
New PVF validation host (#2710)
* Implement PVF validation host * WIP: Diener * Increase the alloted compilation time * Add more comments * Minor clean up * Apply suggestions from code review Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * Fix pruning artifact removal * Fix formatting and newlines * Fix the thread pool * Update node/core/pvf/src/executor_intf.rs Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * Remove redundant test declaration * Don't convert the path into an intermediate string * Try to workaround the test failure * Use the puppet_worker trick again * Fix a blip * Move `ensure_wasmtime_version` under the tests mod * Add a macro for puppet_workers * fix build for not real-overseer * Rename the puppet worker for adder collator * play it safe with the name of adder puppet worker * Typo: triggered * Add more comments * Do not kill exec worker on every error * Plumb Duration for timeouts * typo: critical * Add proofs * Clean unused imports * Revert "WIP: Diener" This reverts commit b9f54e513366c7a6dfdd117ac19fbdc46b900b4d. * Sync version of wasmtime * Update cargo.lock * Update Substrate * Merge fixes still * Update wasmtime version in test * bastifmt Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> * Squash spaces * Trailing new line for testing.rs * Remove controversial code * comment about biasing * Fix suggestion * Add comments * make it more clear why unwrap_err * tmpfile retry * proper proofs for claim_idle * Remove mutex from ValidationHost * Add some more logging * Extract exec timeout into a constant * Add some clarifying logging * Use blake2_256 * Clean up the merge Specifically the leftovers after removing real-overseer * Update parachain/test-parachains/adder/collator/Cargo.toml Co-authored-by: Andronik Ordian <write@reusable.software> Co-authored-by: Bastian Köcher <bkchr@users.noreply.github.com> Co-authored-by: Andronik Ordian <write@reusable.software>
This commit is contained in:
Generated
+58
-42
@@ -120,9 +120,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.34"
|
||||
version = "1.0.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf8dcb5b4bbaa28653b647d8c77bd4ed40183b48882e130c1f1ffb73de069fd7"
|
||||
checksum = "81cddc5f91628367664cc7c69714ff08deee8a3efc54623011c772544d7b2767"
|
||||
|
||||
[[package]]
|
||||
name = "approx"
|
||||
@@ -204,6 +204,16 @@ version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9"
|
||||
|
||||
[[package]]
|
||||
name = "async-attributes"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3203e79f4dd9bdda415ed03cf14dae5a2bf775c683a00f94e9cd1faf0f596e5"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
version = "1.5.1"
|
||||
@@ -293,6 +303,7 @@ version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f9f84f1280a2b436a2c77c2582602732b6c2f4321d5494d6e799e6c367859a8"
|
||||
dependencies = [
|
||||
"async-attributes",
|
||||
"async-channel",
|
||||
"async-global-executor",
|
||||
"async-io",
|
||||
@@ -5579,7 +5590,7 @@ dependencies = [
|
||||
"frame-benchmarking-cli",
|
||||
"futures 0.3.13",
|
||||
"log",
|
||||
"polkadot-parachain",
|
||||
"polkadot-node-core-pvf",
|
||||
"polkadot-service",
|
||||
"sc-cli",
|
||||
"sc-service",
|
||||
@@ -5823,8 +5834,10 @@ name = "polkadot-node-core-candidate-validation"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"assert_matches",
|
||||
"async-trait",
|
||||
"futures 0.3.13",
|
||||
"parity-scale-codec",
|
||||
"polkadot-node-core-pvf",
|
||||
"polkadot-node-primitives",
|
||||
"polkadot-node-subsystem",
|
||||
"polkadot-node-subsystem-test-helpers",
|
||||
@@ -5893,6 +5906,38 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polkadot-node-core-pvf"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"always-assert",
|
||||
"assert_matches",
|
||||
"async-process",
|
||||
"async-std",
|
||||
"futures 0.3.13",
|
||||
"futures-timer 3.0.2",
|
||||
"hex-literal",
|
||||
"libc",
|
||||
"parity-scale-codec",
|
||||
"pin-project 1.0.4",
|
||||
"polkadot-core-primitives",
|
||||
"polkadot-parachain",
|
||||
"rand 0.8.3",
|
||||
"sc-executor",
|
||||
"sc-executor-common",
|
||||
"sc-executor-wasmtime",
|
||||
"slotmap",
|
||||
"sp-core",
|
||||
"sp-externalities",
|
||||
"sp-io",
|
||||
"sp-wasm-interface",
|
||||
"tempfile",
|
||||
"test-parachain-adder",
|
||||
"test-parachain-halt",
|
||||
"tracing",
|
||||
"wasmtime-jit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polkadot-node-core-runtime-api"
|
||||
version = "0.1.0"
|
||||
@@ -6074,25 +6119,13 @@ name = "polkadot-parachain"
|
||||
version = "0.8.30"
|
||||
dependencies = [
|
||||
"derive_more",
|
||||
"futures 0.3.13",
|
||||
"libc",
|
||||
"log",
|
||||
"parity-scale-codec",
|
||||
"parity-util-mem",
|
||||
"parking_lot 0.11.1",
|
||||
"polkadot-core-primitives",
|
||||
"raw_sync",
|
||||
"sc-executor",
|
||||
"serde",
|
||||
"shared_memory",
|
||||
"sp-core",
|
||||
"sp-externalities",
|
||||
"sp-io",
|
||||
"sp-runtime",
|
||||
"sp-std",
|
||||
"sp-wasm-interface",
|
||||
"static_assertions",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7031,19 +7064,6 @@ dependencies = [
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw_sync"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a34bde3561f980a51c70495164200569a11662644fe5af017f0b5d7015688cc"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"libc",
|
||||
"nix",
|
||||
"rand 0.8.3",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rawpointer"
|
||||
version = "0.2.1"
|
||||
@@ -8704,20 +8724,6 @@ dependencies = [
|
||||
"loom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shared_memory"
|
||||
version = "0.11.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b854a362375dfe8ab12ea8a98228040d37293c988f85fbac9fa0f83336387966"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"libc",
|
||||
"nix",
|
||||
"quick-error 2.0.0",
|
||||
"rand 0.8.3",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "0.1.1"
|
||||
@@ -8778,6 +8784,15 @@ dependencies = [
|
||||
"sp-std",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slotmap"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab3003725ae562cf995f3dc82bb99e70926e09000396816765bb6d7adbe740b1"
|
||||
dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "0.6.13"
|
||||
@@ -9868,6 +9883,7 @@ dependencies = [
|
||||
"log",
|
||||
"parity-scale-codec",
|
||||
"polkadot-cli",
|
||||
"polkadot-node-core-pvf",
|
||||
"polkadot-node-primitives",
|
||||
"polkadot-node-subsystem",
|
||||
"polkadot-parachain",
|
||||
|
||||
@@ -55,6 +55,7 @@ members = [
|
||||
"node/core/chain-api",
|
||||
"node/core/proposer",
|
||||
"node/core/provisioner",
|
||||
"node/core/pvf",
|
||||
"node/core/runtime-api",
|
||||
"node/network/approval-distribution",
|
||||
"node/network/bridge",
|
||||
|
||||
@@ -22,7 +22,7 @@ wasm-bindgen-futures = { version = "0.4.19", optional = true }
|
||||
futures = "0.3.12"
|
||||
|
||||
service = { package = "polkadot-service", path = "../node/service", default-features = false, optional = true }
|
||||
polkadot-parachain = { path = "../parachain", optional = true }
|
||||
polkadot-node-core-pvf = { path = "../node/core/pvf", optional = true }
|
||||
|
||||
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
frame-benchmarking-cli = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
|
||||
@@ -39,8 +39,8 @@ sp-trie = { git = "https://github.com/paritytech/substrate", branch = "master",
|
||||
substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
[features]
|
||||
default = [ "wasmtime", "db", "cli", "full-node", "trie-memory-tracker", "polkadot-parachain" ]
|
||||
wasmtime = [ "sc-cli/wasmtime", "polkadot-parachain/wasmtime" ]
|
||||
default = [ "wasmtime", "db", "cli", "full-node", "trie-memory-tracker" ]
|
||||
wasmtime = [ "sc-cli/wasmtime" ]
|
||||
db = [ "service/db" ]
|
||||
cli = [
|
||||
"structopt",
|
||||
@@ -48,6 +48,7 @@ cli = [
|
||||
"sc-service",
|
||||
"frame-benchmarking-cli",
|
||||
"try-runtime-cli",
|
||||
"polkadot-node-core-pvf",
|
||||
]
|
||||
browser = [
|
||||
"wasm-bindgen",
|
||||
|
||||
@@ -43,8 +43,12 @@ pub enum Subcommand {
|
||||
Revert(sc_cli::RevertCmd),
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[structopt(name = "validation-worker", setting = structopt::clap::AppSettings::Hidden)]
|
||||
ValidationWorker(ValidationWorkerCommand),
|
||||
#[structopt(name = "prepare-worker", setting = structopt::clap::AppSettings::Hidden)]
|
||||
PvfPrepareWorker(ValidationWorkerCommand),
|
||||
|
||||
#[allow(missing_docs)]
|
||||
#[structopt(name = "execute-worker", setting = structopt::clap::AppSettings::Hidden)]
|
||||
PvfExecuteWorker(ValidationWorkerCommand),
|
||||
|
||||
/// The custom benchmark subcommand benchmarking runtime pallets.
|
||||
#[structopt(
|
||||
@@ -64,11 +68,8 @@ pub enum Subcommand {
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Debug, StructOpt)]
|
||||
pub struct ValidationWorkerCommand {
|
||||
/// The path that the executor can use for its caching purposes.
|
||||
pub cache_base_path: std::path::PathBuf,
|
||||
|
||||
#[allow(missing_docs)]
|
||||
pub mem_id: String,
|
||||
/// The path to the validation host's socket.
|
||||
pub socket_path: String,
|
||||
}
|
||||
|
||||
#[allow(missing_docs)]
|
||||
|
||||
@@ -256,19 +256,39 @@ pub fn run() -> Result<()> {
|
||||
Ok((cmd.run(client, backend).map_err(Error::SubstrateCli), task_manager))
|
||||
})?)
|
||||
},
|
||||
Some(Subcommand::ValidationWorker(cmd)) => {
|
||||
Some(Subcommand::PvfPrepareWorker(cmd)) => {
|
||||
let mut builder = sc_cli::LoggerBuilder::new("");
|
||||
builder.with_colors(false);
|
||||
let _ = builder.init();
|
||||
|
||||
if cfg!(feature = "browser") || cfg!(target_os = "android") {
|
||||
Err(sc_cli::Error::Input("Cannot run validation worker in browser".into()).into())
|
||||
} else {
|
||||
#[cfg(not(any(target_os = "android", feature = "browser")))]
|
||||
polkadot_parachain::wasm_executor::run_worker(
|
||||
&cmd.mem_id,
|
||||
Some(cmd.cache_base_path.clone()),
|
||||
)?;
|
||||
#[cfg(any(target_os = "android", feature = "browser"))]
|
||||
{
|
||||
return Err(
|
||||
sc_cli::Error::Input("PVF preparation workers are not supported under this platform".into()).into()
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(any(target_os = "android", feature = "browser")))]
|
||||
{
|
||||
polkadot_node_core_pvf::prepare_worker_entrypoint(&cmd.socket_path);
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
Some(Subcommand::PvfExecuteWorker(cmd)) => {
|
||||
let mut builder = sc_cli::LoggerBuilder::new("");
|
||||
builder.with_colors(false);
|
||||
let _ = builder.init();
|
||||
|
||||
#[cfg(any(target_os = "android", feature = "browser"))]
|
||||
{
|
||||
return Err(
|
||||
sc_cli::Error::Input("PVF execution workers are not supported under this platform".into()).into()
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(any(target_os = "android", feature = "browser")))]
|
||||
{
|
||||
polkadot_node_core_pvf::execute_worker_entrypoint(&cmd.socket_path);
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
|
||||
@@ -5,10 +5,10 @@ authors = ["Parity Technologies <admin@parity.io>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.42"
|
||||
futures = "0.3.12"
|
||||
tracing = "0.1.25"
|
||||
|
||||
sp-core = { package = "sp-core", git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-maybe-compressed-blob = { package = "sp-maybe-compressed-blob", git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
parity-scale-codec = { version = "2.0.0", default-features = false, features = ["bit-vec", "derive"] }
|
||||
|
||||
@@ -18,8 +18,12 @@ polkadot-node-primitives = { path = "../../primitives" }
|
||||
polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" }
|
||||
polkadot-node-subsystem-util = { path = "../../subsystem-util" }
|
||||
|
||||
[target.'cfg(not(any(target_os = "android", target_os = "unknown")))'.dependencies]
|
||||
polkadot-node-core-pvf = { path = "../pvf" }
|
||||
|
||||
[dev-dependencies]
|
||||
sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
futures = { version = "0.3.12", features = ["thread-pool"] }
|
||||
assert_matches = "1.4.0"
|
||||
polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }
|
||||
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
@@ -40,44 +40,51 @@ use polkadot_primitives::v1::{
|
||||
ValidationCode, CandidateDescriptor, PersistedValidationData,
|
||||
OccupiedCoreAssumption, Hash, CandidateCommitments,
|
||||
};
|
||||
use polkadot_parachain::wasm_executor::{
|
||||
self, IsolationStrategy, ValidationError, InvalidCandidate as WasmInvalidCandidate
|
||||
};
|
||||
use polkadot_parachain::primitives::{ValidationResult as WasmValidationResult, ValidationParams};
|
||||
use polkadot_parachain::primitives::{ValidationParams, ValidationResult as WasmValidationResult};
|
||||
use polkadot_node_core_pvf::{Pvf, ValidationHost, ValidationError, InvalidCandidate as WasmInvalidCandidate};
|
||||
|
||||
use parity_scale_codec::Encode;
|
||||
use sp_core::traits::SpawnNamed;
|
||||
|
||||
use futures::channel::oneshot;
|
||||
use futures::prelude::*;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
const LOG_TARGET: &'static str = "parachain::candidate-validation";
|
||||
|
||||
/// The candidate validation subsystem.
|
||||
pub struct CandidateValidationSubsystem<S> {
|
||||
spawn: S,
|
||||
metrics: Metrics,
|
||||
isolation_strategy: IsolationStrategy,
|
||||
/// Configuration for the candidate validation subsystem
|
||||
pub struct Config {
|
||||
/// The path where candidate validation can store compiled artifacts for PVFs.
|
||||
pub artifacts_cache_path: PathBuf,
|
||||
/// The path to the executable which can be used for spawning PVF compilation & validation
|
||||
/// workers.
|
||||
pub program_path: PathBuf,
|
||||
}
|
||||
|
||||
impl<S> CandidateValidationSubsystem<S> {
|
||||
/// The candidate validation subsystem.
|
||||
pub struct CandidateValidationSubsystem {
|
||||
metrics: Metrics,
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl CandidateValidationSubsystem {
|
||||
/// Create a new `CandidateValidationSubsystem` with the given task spawner and isolation
|
||||
/// strategy.
|
||||
///
|
||||
/// Check out [`IsolationStrategy`] to get more details.
|
||||
pub fn new(spawn: S, metrics: Metrics, isolation_strategy: IsolationStrategy) -> Self {
|
||||
CandidateValidationSubsystem { spawn, metrics, isolation_strategy }
|
||||
pub fn with_config(config: Config, metrics: Metrics) -> Self {
|
||||
CandidateValidationSubsystem { config, metrics, }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, C> Subsystem<C> for CandidateValidationSubsystem<S> where
|
||||
impl<C> Subsystem<C> for CandidateValidationSubsystem where
|
||||
C: SubsystemContext<Message = CandidateValidationMessage>,
|
||||
S: SpawnNamed + Clone + 'static,
|
||||
{
|
||||
fn start(self, ctx: C) -> SpawnedSubsystem {
|
||||
let future = run(ctx, self.spawn, self.metrics, self.isolation_strategy)
|
||||
let future = run(ctx, self.metrics, self.config.artifacts_cache_path, self.config.program_path)
|
||||
.map_err(|e| SubsystemError::with_origin("candidate-validation", e))
|
||||
.boxed();
|
||||
SpawnedSubsystem {
|
||||
@@ -87,13 +94,18 @@ impl<S, C> Subsystem<C> for CandidateValidationSubsystem<S> where
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(ctx, spawn, metrics), fields(subsystem = LOG_TARGET))]
|
||||
#[tracing::instrument(skip(ctx, metrics), fields(subsystem = LOG_TARGET))]
|
||||
async fn run(
|
||||
mut ctx: impl SubsystemContext<Message = CandidateValidationMessage>,
|
||||
spawn: impl SpawnNamed + Clone + 'static,
|
||||
metrics: Metrics,
|
||||
isolation_strategy: IsolationStrategy,
|
||||
cache_path: PathBuf,
|
||||
program_path: PathBuf,
|
||||
) -> SubsystemResult<()> {
|
||||
let (mut validation_host, task) = polkadot_node_core_pvf::start(
|
||||
polkadot_node_core_pvf::Config::new(cache_path, program_path),
|
||||
);
|
||||
ctx.spawn_blocking("pvf-validation-host", task.boxed()).await?;
|
||||
|
||||
loop {
|
||||
match ctx.recv().await? {
|
||||
FromOverseer::Signal(OverseerSignal::ActiveLeaves(_)) => {}
|
||||
@@ -109,10 +121,9 @@ async fn run(
|
||||
|
||||
let res = spawn_validate_from_chain_state(
|
||||
&mut ctx,
|
||||
isolation_strategy.clone(),
|
||||
&mut validation_host,
|
||||
descriptor,
|
||||
pov,
|
||||
spawn.clone(),
|
||||
&metrics,
|
||||
).await;
|
||||
|
||||
@@ -133,14 +144,12 @@ async fn run(
|
||||
) => {
|
||||
let _timer = metrics.time_validate_from_exhaustive();
|
||||
|
||||
let res = spawn_validate_exhaustive(
|
||||
&mut ctx,
|
||||
isolation_strategy.clone(),
|
||||
let res = validate_candidate_exhaustive(
|
||||
&mut validation_host,
|
||||
persisted_validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
pov,
|
||||
spawn.clone(),
|
||||
&metrics,
|
||||
).await;
|
||||
|
||||
@@ -268,13 +277,16 @@ async fn find_assumed_validation_data(
|
||||
Ok(AssumptionCheckOutcome::DoesNotMatch)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip(ctx, pov, spawn, metrics), fields(subsystem = LOG_TARGET))]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(ctx, validation_host, pov, metrics),
|
||||
fields(subsystem = LOG_TARGET),
|
||||
)]
|
||||
async fn spawn_validate_from_chain_state(
|
||||
ctx: &mut impl SubsystemContext<Message = CandidateValidationMessage>,
|
||||
isolation_strategy: IsolationStrategy,
|
||||
validation_host: &mut ValidationHost,
|
||||
descriptor: CandidateDescriptor,
|
||||
pov: Arc<PoV>,
|
||||
spawn: impl SpawnNamed + 'static,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<Result<ValidationResult, ValidationFailed>> {
|
||||
let (validation_data, validation_code) =
|
||||
@@ -293,14 +305,12 @@ async fn spawn_validate_from_chain_state(
|
||||
}
|
||||
};
|
||||
|
||||
let validation_result = spawn_validate_exhaustive(
|
||||
ctx,
|
||||
isolation_strategy,
|
||||
let validation_result = validate_candidate_exhaustive(
|
||||
validation_host,
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor.clone(),
|
||||
pov,
|
||||
spawn,
|
||||
metrics,
|
||||
)
|
||||
.await;
|
||||
@@ -330,35 +340,132 @@ async fn spawn_validate_from_chain_state(
|
||||
validation_result
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip(ctx, validation_code, pov, spawn, metrics), fields(subsystem = LOG_TARGET))]
|
||||
async fn spawn_validate_exhaustive(
|
||||
ctx: &mut impl SubsystemContext<Message = CandidateValidationMessage>,
|
||||
isolation_strategy: IsolationStrategy,
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(validation_backend, validation_code, pov, metrics),
|
||||
fields(subsystem = LOG_TARGET),
|
||||
)]
|
||||
async fn validate_candidate_exhaustive(
|
||||
mut validation_backend: impl ValidationBackend,
|
||||
persisted_validation_data: PersistedValidationData,
|
||||
validation_code: ValidationCode,
|
||||
descriptor: CandidateDescriptor,
|
||||
pov: Arc<PoV>,
|
||||
spawn: impl SpawnNamed + 'static,
|
||||
metrics: &Metrics,
|
||||
) -> SubsystemResult<Result<ValidationResult, ValidationFailed>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let metrics = metrics.clone();
|
||||
let fut = async move {
|
||||
let res = validate_candidate_exhaustive::<RealValidationBackend, _>(
|
||||
isolation_strategy,
|
||||
persisted_validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
pov,
|
||||
spawn,
|
||||
&metrics,
|
||||
);
|
||||
let _timer = metrics.time_validate_candidate_exhaustive();
|
||||
|
||||
let _ = tx.send(res);
|
||||
if let Err(e) = perform_basic_checks(
|
||||
&descriptor,
|
||||
persisted_validation_data.max_pov_size,
|
||||
&*pov,
|
||||
&validation_code,
|
||||
) {
|
||||
return Ok(Ok(ValidationResult::Invalid(e)));
|
||||
}
|
||||
|
||||
let raw_validation_code = match sp_maybe_compressed_blob::decompress(
|
||||
&validation_code.0,
|
||||
VALIDATION_CODE_BOMB_LIMIT,
|
||||
) {
|
||||
Ok(code) => code,
|
||||
Err(e) => {
|
||||
tracing::debug!(target: LOG_TARGET, err=?e, "Invalid validation code");
|
||||
|
||||
// If the validation code is invalid, the candidate certainly is.
|
||||
return Ok(Ok(ValidationResult::Invalid(InvalidCandidate::CodeDecompressionFailure)));
|
||||
}
|
||||
};
|
||||
|
||||
ctx.spawn_blocking("blocking-candidate-validation-task", fut.boxed()).await?;
|
||||
rx.await.map_err(Into::into)
|
||||
let raw_block_data = match sp_maybe_compressed_blob::decompress(
|
||||
&pov.block_data.0,
|
||||
POV_BOMB_LIMIT,
|
||||
) {
|
||||
Ok(block_data) => BlockData(block_data.to_vec()),
|
||||
Err(e) => {
|
||||
tracing::debug!(target: LOG_TARGET, err=?e, "Invalid PoV code");
|
||||
|
||||
// If the PoV is invalid, the candidate certainly is.
|
||||
return Ok(Ok(ValidationResult::Invalid(InvalidCandidate::PoVDecompressionFailure)));
|
||||
}
|
||||
};
|
||||
|
||||
let params = ValidationParams {
|
||||
parent_head: persisted_validation_data.parent_head.clone(),
|
||||
block_data: raw_block_data,
|
||||
relay_parent_number: persisted_validation_data.relay_parent_number,
|
||||
relay_parent_storage_root: persisted_validation_data.relay_parent_storage_root,
|
||||
};
|
||||
|
||||
let result =
|
||||
validation_backend.validate_candidate(
|
||||
raw_validation_code.to_vec(),
|
||||
params
|
||||
)
|
||||
.await;
|
||||
|
||||
let result = match result {
|
||||
Err(ValidationError::InternalError(e)) => Err(ValidationFailed(e)),
|
||||
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout)) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::WorkerReportedError(e))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e))),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbigiousWorkerDeath)) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError("ambigious worker death".to_string()))),
|
||||
|
||||
Ok(res) => {
|
||||
if res.head_data.hash() != descriptor.para_head {
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ParaHeadHashMismatch))
|
||||
} else {
|
||||
let outputs = CandidateCommitments {
|
||||
head_data: res.head_data,
|
||||
upward_messages: res.upward_messages,
|
||||
horizontal_messages: res.horizontal_messages,
|
||||
new_validation_code: res.new_validation_code,
|
||||
processed_downward_messages: res.processed_downward_messages,
|
||||
hrmp_watermark: res.hrmp_watermark,
|
||||
};
|
||||
Ok(ValidationResult::Valid(outputs, persisted_validation_data))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
trait ValidationBackend {
|
||||
async fn validate_candidate(
|
||||
&mut self,
|
||||
raw_validation_code: Vec<u8>,
|
||||
params: ValidationParams
|
||||
) -> Result<WasmValidationResult, ValidationError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ValidationBackend for &'_ mut ValidationHost {
|
||||
async fn validate_candidate(
|
||||
&mut self,
|
||||
raw_validation_code: Vec<u8>,
|
||||
params: ValidationParams
|
||||
) -> Result<WasmValidationResult, ValidationError> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
if let Err(err) = self.execute_pvf(
|
||||
Pvf::from_code(raw_validation_code),
|
||||
params.encode(),
|
||||
polkadot_node_core_pvf::Priority::Normal,
|
||||
tx,
|
||||
).await {
|
||||
return Err(ValidationError::InternalError(format!("cannot send pvf to the validation host: {:?}", err)));
|
||||
}
|
||||
|
||||
let validation_result = rx
|
||||
.await
|
||||
.map_err(|_| ValidationError::InternalError("validation was cancelled".into()))?;
|
||||
|
||||
validation_result
|
||||
}
|
||||
}
|
||||
|
||||
/// Does basic checks of a candidate. Provide the encoded PoV-block. Returns `Ok` if basic checks
|
||||
@@ -393,126 +500,6 @@ fn perform_basic_checks(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
trait ValidationBackend {
|
||||
type Arg;
|
||||
|
||||
fn validate<S: SpawnNamed + 'static>(
|
||||
arg: Self::Arg,
|
||||
raw_validation_code: &[u8],
|
||||
params: ValidationParams,
|
||||
spawn: S,
|
||||
) -> Result<WasmValidationResult, ValidationError>;
|
||||
}
|
||||
|
||||
struct RealValidationBackend;
|
||||
|
||||
impl ValidationBackend for RealValidationBackend {
|
||||
type Arg = IsolationStrategy;
|
||||
|
||||
fn validate<S: SpawnNamed + 'static>(
|
||||
isolation_strategy: IsolationStrategy,
|
||||
raw_validation_code: &[u8],
|
||||
params: ValidationParams,
|
||||
spawn: S,
|
||||
) -> Result<WasmValidationResult, ValidationError> {
|
||||
wasm_executor::validate_candidate(
|
||||
&raw_validation_code,
|
||||
params,
|
||||
&isolation_strategy,
|
||||
spawn,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Validates the candidate from exhaustive parameters.
|
||||
///
|
||||
/// Sends the result of validation on the channel once complete.
|
||||
#[tracing::instrument(level = "trace", skip(backend_arg, validation_code, pov, spawn, metrics), fields(subsystem = LOG_TARGET))]
|
||||
fn validate_candidate_exhaustive<B: ValidationBackend, S: SpawnNamed + 'static>(
|
||||
backend_arg: B::Arg,
|
||||
persisted_validation_data: PersistedValidationData,
|
||||
validation_code: ValidationCode,
|
||||
descriptor: CandidateDescriptor,
|
||||
pov: Arc<PoV>,
|
||||
spawn: S,
|
||||
metrics: &Metrics,
|
||||
) -> Result<ValidationResult, ValidationFailed> {
|
||||
let _timer = metrics.time_validate_candidate_exhaustive();
|
||||
|
||||
if let Err(e) = perform_basic_checks(
|
||||
&descriptor,
|
||||
persisted_validation_data.max_pov_size,
|
||||
&*pov,
|
||||
&validation_code,
|
||||
) {
|
||||
return Ok(ValidationResult::Invalid(e))
|
||||
}
|
||||
|
||||
let raw_validation_code = match sp_maybe_compressed_blob::decompress(
|
||||
&validation_code.0,
|
||||
VALIDATION_CODE_BOMB_LIMIT,
|
||||
) {
|
||||
Ok(code) => code,
|
||||
Err(e) => {
|
||||
tracing::debug!(target: LOG_TARGET, err=?e, "Invalid validation code");
|
||||
|
||||
// If the validation code is invalid, the candidate certainly is.
|
||||
return Ok(ValidationResult::Invalid(InvalidCandidate::CodeDecompressionFailure));
|
||||
}
|
||||
};
|
||||
|
||||
let raw_block_data = match sp_maybe_compressed_blob::decompress(
|
||||
&pov.block_data.0,
|
||||
POV_BOMB_LIMIT,
|
||||
) {
|
||||
Ok(block_data) => BlockData(block_data.to_vec()),
|
||||
Err(e) => {
|
||||
tracing::debug!(target: LOG_TARGET, err=?e, "Invalid PoV code");
|
||||
|
||||
// If the PoV is invalid, the candidate certainly is.
|
||||
return Ok(ValidationResult::Invalid(InvalidCandidate::PoVDecompressionFailure));
|
||||
}
|
||||
};
|
||||
|
||||
let params = ValidationParams {
|
||||
parent_head: persisted_validation_data.parent_head.clone(),
|
||||
block_data: raw_block_data,
|
||||
relay_parent_number: persisted_validation_data.relay_parent_number,
|
||||
relay_parent_storage_root: persisted_validation_data.relay_parent_storage_root,
|
||||
};
|
||||
|
||||
match B::validate(backend_arg, &raw_validation_code, params, spawn) {
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::Timeout)) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::ParamsTooLarge(l, _))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ParamsTooLarge(l as u64))),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::CodeTooLarge(l, _))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::CodeTooLarge(l as u64))),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::BadReturn)) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn)),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::WasmExecutor(e))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e.to_string()))),
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::ExternalWasmExecutor(e))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e.to_string()))),
|
||||
Err(ValidationError::Internal(e)) => Err(ValidationFailed(e.to_string())),
|
||||
Ok(res) => {
|
||||
if res.head_data.hash() != descriptor.para_head {
|
||||
return Ok(ValidationResult::Invalid(InvalidCandidate::ParaHeadHashMismatch));
|
||||
}
|
||||
|
||||
let outputs = CandidateCommitments {
|
||||
head_data: res.head_data,
|
||||
upward_messages: res.upward_messages,
|
||||
horizontal_messages: res.horizontal_messages,
|
||||
new_validation_code: res.new_validation_code,
|
||||
processed_downward_messages: res.processed_downward_messages,
|
||||
hrmp_watermark: res.hrmp_watermark,
|
||||
};
|
||||
Ok(ValidationResult::Valid(outputs, persisted_validation_data))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsInner {
|
||||
validation_requests: prometheus::CounterVec<prometheus::U64>,
|
||||
@@ -605,7 +592,7 @@ impl metrics::Metrics for Metrics {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use super::*;
|
||||
use polkadot_node_subsystem_test_helpers as test_helpers;
|
||||
use polkadot_primitives::v1::{HeadData, UpwardMessage};
|
||||
use sp_core::testing::TaskExecutor;
|
||||
@@ -613,25 +600,6 @@ mod tests {
|
||||
use assert_matches::assert_matches;
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
|
||||
struct MockValidationBackend;
|
||||
|
||||
struct MockValidationArg {
|
||||
result: Result<WasmValidationResult, ValidationError>,
|
||||
}
|
||||
|
||||
impl ValidationBackend for MockValidationBackend {
|
||||
type Arg = MockValidationArg;
|
||||
|
||||
fn validate<S: SpawnNamed + 'static>(
|
||||
arg: Self::Arg,
|
||||
_raw_validation_code: &[u8],
|
||||
_params: ValidationParams,
|
||||
_spawn: S,
|
||||
) -> Result<WasmValidationResult, ValidationError> {
|
||||
arg.result
|
||||
}
|
||||
}
|
||||
|
||||
fn collator_sign(descriptor: &mut CandidateDescriptor, collator: Sr25519Keyring) {
|
||||
descriptor.collator = collator.public().into();
|
||||
let payload = polkadot_primitives::v1::collator_signature_payload(
|
||||
@@ -924,6 +892,29 @@ mod tests {
|
||||
executor::block_on(test_fut);
|
||||
}
|
||||
|
||||
struct MockValidatorBackend {
|
||||
result: Result<WasmValidationResult, ValidationError>,
|
||||
}
|
||||
|
||||
impl MockValidatorBackend {
|
||||
fn with_hardcoded_result(result: Result<WasmValidationResult, ValidationError>) -> Self {
|
||||
Self {
|
||||
result,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ValidationBackend for MockValidatorBackend {
|
||||
async fn validate_candidate(
|
||||
&mut self,
|
||||
_raw_validation_code: Vec<u8>,
|
||||
_params: ValidationParams
|
||||
) -> Result<WasmValidationResult, ValidationError> {
|
||||
self.result.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn candidate_validation_ok_is_ok() {
|
||||
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() };
|
||||
@@ -955,15 +946,16 @@ mod tests {
|
||||
hrmp_watermark: 0,
|
||||
};
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg { result: Ok(validation_result) },
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(Ok(validation_result)),
|
||||
validation_data.clone(),
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
).unwrap();
|
||||
))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(v, ValidationResult::Valid(outputs, used_validation_data) => {
|
||||
assert_eq!(outputs.head_data, HeadData(vec![1, 1, 1]));
|
||||
@@ -995,21 +987,20 @@ mod tests {
|
||||
);
|
||||
assert!(check.is_ok());
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg {
|
||||
result: Err(ValidationError::InvalidCandidate(
|
||||
WasmInvalidCandidate::BadReturn
|
||||
))
|
||||
},
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbigiousWorkerDeath))
|
||||
),
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
).unwrap();
|
||||
))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::BadReturn));
|
||||
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::ExecutionError(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1032,19 +1023,17 @@ mod tests {
|
||||
);
|
||||
assert!(check.is_ok());
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg {
|
||||
result: Err(ValidationError::InvalidCandidate(
|
||||
WasmInvalidCandidate::Timeout
|
||||
))
|
||||
},
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout)),
|
||||
),
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
);
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)));
|
||||
}
|
||||
@@ -1069,19 +1058,18 @@ mod tests {
|
||||
);
|
||||
assert_matches!(check, Err(InvalidCandidate::CodeHashMismatch));
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg {
|
||||
result: Err(ValidationError::InvalidCandidate(
|
||||
WasmInvalidCandidate::BadReturn
|
||||
))
|
||||
},
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(
|
||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout)),
|
||||
),
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
).unwrap();
|
||||
))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::CodeHashMismatch));
|
||||
}
|
||||
@@ -1115,15 +1103,15 @@ mod tests {
|
||||
hrmp_watermark: 0,
|
||||
};
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg { result: Ok(validation_result) },
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(Ok(validation_result)),
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
);
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(v, Ok(ValidationResult::Valid(_, _)));
|
||||
}
|
||||
@@ -1157,15 +1145,15 @@ mod tests {
|
||||
hrmp_watermark: 0,
|
||||
};
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg { result: Ok(validation_result) },
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(Ok(validation_result)),
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
);
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(
|
||||
v,
|
||||
@@ -1206,15 +1194,15 @@ mod tests {
|
||||
hrmp_watermark: 0,
|
||||
};
|
||||
|
||||
let v = validate_candidate_exhaustive::<MockValidationBackend, _>(
|
||||
MockValidationArg { result: Ok(validation_result) },
|
||||
let v = executor::block_on(validate_candidate_exhaustive(
|
||||
MockValidatorBackend::with_hardcoded_result(Ok(validation_result)),
|
||||
validation_data,
|
||||
validation_code,
|
||||
descriptor,
|
||||
Arc::new(pov),
|
||||
TaskExecutor::new(),
|
||||
&Default::default(),
|
||||
);
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
assert_matches!(
|
||||
v,
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
[package]
|
||||
name = "polkadot-node-core-pvf"
|
||||
version = "0.1.0"
|
||||
authors = ["Parity Technologies <admin@parity.io>"]
|
||||
edition = "2018"
|
||||
|
||||
[[bin]]
|
||||
name = "puppet_worker"
|
||||
path = "bin/puppet_worker.rs"
|
||||
|
||||
[dependencies]
|
||||
always-assert = "0.1"
|
||||
async-std = { version = "1.8.0", features = ["attributes"] }
|
||||
async-process = "1.0.1"
|
||||
assert_matches = "1.4.0"
|
||||
futures = "0.3.12"
|
||||
futures-timer = "3.0.2"
|
||||
libc = "0.2.81"
|
||||
slotmap = "1.0"
|
||||
tracing = "0.1.22"
|
||||
pin-project = "1.0.4"
|
||||
rand = "0.8.3"
|
||||
parity-scale-codec = { version = "2.0.0", default-features = false, features = ["derive"] }
|
||||
polkadot-parachain = { path = "../../../parachain" }
|
||||
polkadot-core-primitives = { path = "../../../core-primitives" }
|
||||
sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sp-wasm-interface = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
[dev-dependencies]
|
||||
adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" }
|
||||
halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" }
|
||||
hex-literal = "0.3.1"
|
||||
tempfile = "3.2.0"
|
||||
|
||||
# PVF execution leverages compiled artifacts provided by wasmtime. The contents of the artifacts
|
||||
# depends on the version of wasmtime. In this crate we persist the artifacts on disk so we should
|
||||
# be careful about the updates. In order to handle this, we depend on the wasmtime version here
|
||||
# that we think is used by the sc-executor. If wasmtime is updated in Substrate and wasn't updated
|
||||
# here then there will be linking errors like
|
||||
#
|
||||
# `multiple definitions of `set_vmctx_memory`.
|
||||
#
|
||||
# or similar, because wasmtime exports these symbols and does not support multiple versions compiled
|
||||
# in at the same time.
|
||||
#
|
||||
# Another safeguard is a test `ensure_wasmtime_version` that will fail on each bump and prompt the
|
||||
# developer to correspondingly act upon the change.
|
||||
wasmtime-jit = "0.24"
|
||||
@@ -0,0 +1,17 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
polkadot_node_core_pvf::decl_puppet_worker_main!();
|
||||
@@ -0,0 +1,311 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use always_assert::always;
|
||||
use async_std::{
|
||||
io,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use polkadot_core_primitives::Hash;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
use parity_scale_codec::{Encode, Decode};
|
||||
use futures::StreamExt;
|
||||
|
||||
/// A final product of preparation process. Contains either a ready to run compiled artifact or
|
||||
/// a description what went wrong.
|
||||
#[derive(Encode, Decode)]
|
||||
pub enum Artifact {
|
||||
/// During the prevalidation stage of preparation an issue was found with the PVF.
|
||||
PrevalidationErr(String),
|
||||
/// Compilation failed for the given PVF.
|
||||
PreparationErr(String),
|
||||
/// This state indicates that the process assigned to prepare the artifact wasn't responsible
|
||||
/// or were killed. This state is reported by the validation host (not by the worker).
|
||||
DidntMakeIt,
|
||||
/// The PVF passed all the checks and is ready for execution.
|
||||
Compiled { compiled_artifact: Vec<u8> },
|
||||
}
|
||||
|
||||
impl Artifact {
|
||||
/// Serializes this struct into a byte buffer.
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
self.encode()
|
||||
}
|
||||
|
||||
/// Deserialize the given byte buffer to an artifact.
|
||||
pub fn deserialize(mut bytes: &[u8]) -> Result<Self, String> {
|
||||
Artifact::decode(&mut bytes).map_err(|e| format!("{:?}", e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Identifier of an artifact. Right now it only encodes a code hash of the PVF. But if we get to
|
||||
/// multiple engine implementations the artifact ID should include the engine type as well.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct ArtifactId {
|
||||
code_hash: Hash,
|
||||
}
|
||||
|
||||
impl ArtifactId {
|
||||
const PREFIX: &'static str = "wasmtime_1_";
|
||||
|
||||
/// Creates a new artifact ID with the given hash.
|
||||
pub fn new(code_hash: Hash) -> Self {
|
||||
Self { code_hash }
|
||||
}
|
||||
|
||||
/// Tries to recover the artifact id from the given file name.
|
||||
pub fn from_file_name(file_name: &str) -> Option<Self> {
|
||||
use std::str::FromStr as _;
|
||||
|
||||
let file_name = file_name.strip_prefix(Self::PREFIX)?;
|
||||
let code_hash = Hash::from_str(file_name).ok()?;
|
||||
|
||||
Some(Self { code_hash })
|
||||
}
|
||||
|
||||
/// Returns the expected path to this artifact given the root of the cache.
|
||||
pub fn path(&self, cache_path: &Path) -> PathBuf {
|
||||
let file_name = format!("{}{}", Self::PREFIX, self.code_hash.to_string());
|
||||
cache_path.join(file_name)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ArtifactState {
|
||||
/// The artifact is ready to be used by the executor.
|
||||
///
|
||||
/// That means that the artifact should be accessible through the path obtained by the artifact
|
||||
/// id (unless, it was removed externally).
|
||||
Prepared {
|
||||
/// The time when the artifact was the last time needed.
|
||||
///
|
||||
/// This is updated when we get the heads up for this artifact or when we just discover
|
||||
/// this file.
|
||||
last_time_needed: SystemTime,
|
||||
},
|
||||
/// A task to prepare this artifact is scheduled.
|
||||
Preparing,
|
||||
}
|
||||
|
||||
/// A container of all known artifact ids and their states.
|
||||
pub struct Artifacts {
|
||||
artifacts: HashMap<ArtifactId, ArtifactState>,
|
||||
}
|
||||
|
||||
impl Artifacts {
|
||||
/// Scan the given cache root for the artifacts.
|
||||
///
|
||||
/// The recognized artifacts will be filled in the table and unrecognized will be removed.
|
||||
pub async fn new(cache_path: &Path) -> Self {
|
||||
// Make sure that the cache path directory and all it's parents are created.
|
||||
let _ = async_std::fs::create_dir_all(cache_path).await;
|
||||
|
||||
let artifacts = match scan_for_known_artifacts(cache_path).await {
|
||||
Ok(a) => a,
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"unable to seed the artifacts in memory cache: {:?}. Starting with a clean one",
|
||||
err,
|
||||
);
|
||||
HashMap::new()
|
||||
}
|
||||
};
|
||||
|
||||
Self { artifacts }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
artifacts: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the state of the given artifact by its ID.
|
||||
pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> {
|
||||
self.artifacts.get_mut(artifact_id)
|
||||
}
|
||||
|
||||
/// Inform the table about the artifact with the given ID. The state will be set to "preparing".
|
||||
///
|
||||
/// This function must be used only for brand new artifacts and should never be used for
|
||||
/// replacing existing ones.
|
||||
pub fn insert_preparing(&mut self, artifact_id: ArtifactId) {
|
||||
// See the precondition.
|
||||
always!(self
|
||||
.artifacts
|
||||
.insert(artifact_id, ArtifactState::Preparing)
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Insert an artifact with the given ID as "prepared".
|
||||
///
|
||||
/// This function must be used only for brand new artifacts and should never be used for
|
||||
/// replacing existing ones.
|
||||
#[cfg(test)]
|
||||
pub fn insert_prepared(&mut self, artifact_id: ArtifactId, last_time_needed: SystemTime) {
|
||||
// See the precondition.
|
||||
always!(self
|
||||
.artifacts
|
||||
.insert(artifact_id, ArtifactState::Prepared { last_time_needed })
|
||||
.is_none());
|
||||
}
|
||||
|
||||
/// Remove and retrive the artifacts from the table that are older than the supplied Time-To-Live.
|
||||
pub fn prune(&mut self, artifact_ttl: Duration) -> Vec<ArtifactId> {
|
||||
let now = SystemTime::now();
|
||||
|
||||
let mut to_remove = vec![];
|
||||
for (k, v) in self.artifacts.iter() {
|
||||
if let ArtifactState::Prepared {
|
||||
last_time_needed, ..
|
||||
} = *v {
|
||||
if now
|
||||
.duration_since(last_time_needed)
|
||||
.map(|age| age > artifact_ttl)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
to_remove.push(k.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for artifact in &to_remove {
|
||||
self.artifacts.remove(artifact);
|
||||
}
|
||||
|
||||
to_remove
|
||||
}
|
||||
}
|
||||
|
||||
/// Goes over all files in the given directory, collecting all recognizable artifacts. All files
|
||||
/// that do not look like artifacts are removed.
|
||||
///
|
||||
/// All recognized artifacts will be created with the current datetime.
|
||||
async fn scan_for_known_artifacts(
|
||||
cache_path: &Path,
|
||||
) -> io::Result<HashMap<ArtifactId, ArtifactState>> {
|
||||
let mut result = HashMap::new();
|
||||
let now = SystemTime::now();
|
||||
|
||||
let mut dir = async_std::fs::read_dir(cache_path).await?;
|
||||
while let Some(res) = dir.next().await {
|
||||
let entry = res?;
|
||||
|
||||
if entry.file_type().await?.is_dir() {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} is a dir, and dirs do not belong to us. Removing",
|
||||
entry.path().display(),
|
||||
);
|
||||
let _ = async_std::fs::remove_dir_all(entry.path()).await;
|
||||
}
|
||||
|
||||
let path = entry.path();
|
||||
let file_name = match path.file_name() {
|
||||
None => {
|
||||
// A file without a file name? Weird, just skip it.
|
||||
continue;
|
||||
}
|
||||
Some(file_name) => file_name,
|
||||
};
|
||||
|
||||
let file_name = match file_name.to_str() {
|
||||
None => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} is not utf-8. Removing",
|
||||
path.display(),
|
||||
);
|
||||
let _ = async_std::fs::remove_file(&path).await;
|
||||
continue;
|
||||
}
|
||||
Some(file_name) => file_name,
|
||||
};
|
||||
|
||||
let artifact_id = match ArtifactId::from_file_name(file_name) {
|
||||
None => {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} is not a recognized artifact. Removing",
|
||||
path.display(),
|
||||
);
|
||||
let _ = async_std::fs::remove_file(&path).await;
|
||||
continue;
|
||||
}
|
||||
Some(artifact_id) => artifact_id,
|
||||
};
|
||||
|
||||
// A sanity check so that we really can access the artifact through the artifact id.
|
||||
if artifact_id.path(cache_path).is_file().await {
|
||||
result.insert(
|
||||
artifact_id,
|
||||
ArtifactState::Prepared {
|
||||
last_time_needed: now,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"{} is not accessible by artifact_id {:?}",
|
||||
cache_path.display(),
|
||||
artifact_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ArtifactId;
|
||||
|
||||
#[test]
|
||||
fn ensure_wasmtime_version() {
|
||||
assert_eq!(
|
||||
wasmtime_jit::VERSION,
|
||||
"0.24.0",
|
||||
"wasmtime version is updated. Check the prefix.",
|
||||
);
|
||||
// If the version bump is significant, change `ArtifactId::PREFIX`.
|
||||
//
|
||||
// If in doubt bump it. This will lead to removal of the existing artifacts in the on-disk cache
|
||||
// and recompilation.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_name() {
|
||||
assert!(ArtifactId::from_file_name("").is_none());
|
||||
assert!(ArtifactId::from_file_name("junk").is_none());
|
||||
|
||||
assert_eq!(
|
||||
ArtifactId::from_file_name(
|
||||
"wasmtime_1_0x0022800000000000000000000000000000000000000000000000000000000000"
|
||||
),
|
||||
Some(ArtifactId::new(
|
||||
hex_literal::hex![
|
||||
"0022800000000000000000000000000000000000000000000000000000000000"
|
||||
]
|
||||
.into()
|
||||
)),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
/// A error raised during validation of the candidate.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ValidationError {
|
||||
/// The error was raised because the candidate is invalid.
|
||||
InvalidCandidate(InvalidCandidate),
|
||||
/// This error is raised due to inability to serve the request.
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
/// A description of an error raised during executing a PVF and can be attributed to the combination
|
||||
/// of the candidate [`polkadot_parachain::primitives::ValidationParams`] and the PVF.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum InvalidCandidate {
|
||||
/// The failure is reported by the worker. The string contains the error message.
|
||||
///
|
||||
/// This also includes the errors reported by the preparation pipeline.
|
||||
WorkerReportedError(String),
|
||||
/// The worker has died during validation of a candidate. That may fall in one of the following
|
||||
/// categories, which we cannot distinguish programmatically:
|
||||
///
|
||||
/// (a) Some sort of transient glitch caused the worker process to abort. An example would be that
|
||||
/// the host machine ran out of free memory and the OOM killer started killing the processes,
|
||||
/// and in order to save the parent it will "sacrifice child" first.
|
||||
///
|
||||
/// (b) The candidate triggered a code path that has lead to the process death. For example,
|
||||
/// the PVF found a way to consume unbounded amount of resources and then it either exceeded
|
||||
/// an rlimit (if set) or, again, invited OOM killer. Another possibility is a bug in
|
||||
/// wasmtime allowed the PVF to gain control over the execution worker.
|
||||
///
|
||||
/// We attribute such an event to an invalid candidate in either case.
|
||||
///
|
||||
/// The rationale for this is that a glitch may lead to unfair rejecting candidate by a single
|
||||
/// validator. If the glitch is somewhat more persistant the validator will reject all candidate
|
||||
/// thrown at it and hopefully the operator notices it by decreased reward performance of the
|
||||
/// validator. On the other hand, if the worker died because of (b) we would have better chances
|
||||
/// to stop the attack.
|
||||
AmbigiousWorkerDeath,
|
||||
/// PVF execution (compilation is not included) took more time than was allotted.
|
||||
HardTimeout,
|
||||
}
|
||||
+10
-13
@@ -1,4 +1,4 @@
|
||||
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
@@ -14,17 +14,14 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
mod adder;
|
||||
mod wasm_executor;
|
||||
//! Execution part of the pipeline.
|
||||
//!
|
||||
//! The validation host [runs the queue][`start`] communicating with it by sending [`ToQueue`]
|
||||
//! messages. The queue will spawn workers in new processes. Those processes should jump to
|
||||
//! [`worker_entrypoint`].
|
||||
|
||||
use parachain::wasm_executor::run_worker;
|
||||
mod queue;
|
||||
mod worker;
|
||||
|
||||
// This is not an actual test, but rather an entry point for out-of process WASM executor.
|
||||
// When executing tests the executor spawns currently executing binary, which happens to be test binary.
|
||||
// It then passes "validation_worker" on CLI effectivly making rust test executor to run this single test.
|
||||
#[test]
|
||||
fn validation_worker() {
|
||||
if let Some(id) = std::env::args().find(|a| a.starts_with("/shmem_")) {
|
||||
run_worker(&id, None).unwrap()
|
||||
}
|
||||
}
|
||||
pub use queue::{ToQueue, start};
|
||||
pub use worker::worker_entrypoint;
|
||||
@@ -0,0 +1,344 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! A queue that handles requests for PVF execution.
|
||||
|
||||
use crate::{
|
||||
worker_common::{IdleWorker, WorkerHandle},
|
||||
host::ResultSender,
|
||||
LOG_TARGET, InvalidCandidate, ValidationError,
|
||||
};
|
||||
use super::worker::Outcome;
|
||||
use std::{collections::VecDeque, fmt, time::Duration};
|
||||
use futures::{
|
||||
Future, FutureExt,
|
||||
channel::mpsc,
|
||||
future::BoxFuture,
|
||||
stream::{FuturesUnordered, StreamExt as _},
|
||||
};
|
||||
use async_std::path::PathBuf;
|
||||
use slotmap::HopSlotMap;
|
||||
|
||||
slotmap::new_key_type! { struct Worker; }
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ToQueue {
|
||||
Enqueue {
|
||||
artifact_path: PathBuf,
|
||||
params: Vec<u8>,
|
||||
result_tx: ResultSender,
|
||||
},
|
||||
}
|
||||
|
||||
struct ExecuteJob {
|
||||
artifact_path: PathBuf,
|
||||
params: Vec<u8>,
|
||||
result_tx: ResultSender,
|
||||
}
|
||||
|
||||
struct WorkerData {
|
||||
idle: Option<IdleWorker>,
|
||||
handle: WorkerHandle,
|
||||
}
|
||||
|
||||
impl fmt::Debug for WorkerData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WorkerData(pid={})", self.handle.id())
|
||||
}
|
||||
}
|
||||
|
||||
struct Workers {
|
||||
/// The registry of running workers.
|
||||
running: HopSlotMap<Worker, WorkerData>,
|
||||
|
||||
/// The number of spawning but not yet spawned workers.
|
||||
spawn_inflight: usize,
|
||||
|
||||
/// The maximum number of workers queue can have at once.
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl Workers {
|
||||
fn can_afford_one_more(&self) -> bool {
|
||||
self.spawn_inflight + self.running.len() < self.capacity
|
||||
}
|
||||
|
||||
fn find_available(&self) -> Option<Worker> {
|
||||
self.running
|
||||
.iter()
|
||||
.find_map(|d| if d.1.idle.is_some() { Some(d.0) } else { None })
|
||||
}
|
||||
|
||||
/// Find the associated data by the worker token and extract it's [`IdleWorker`] token.
|
||||
///
|
||||
/// Returns `None` if either worker is not recognized or idle token is absent.
|
||||
fn claim_idle(&mut self, worker: Worker) -> Option<IdleWorker> {
|
||||
self
|
||||
.running
|
||||
.get_mut(worker)?
|
||||
.idle
|
||||
.take()
|
||||
}
|
||||
}
|
||||
|
||||
enum QueueEvent {
|
||||
Spawn((IdleWorker, WorkerHandle)),
|
||||
StartWork(Worker, Outcome, ResultSender),
|
||||
}
|
||||
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, QueueEvent>>;
|
||||
|
||||
struct Queue {
|
||||
/// The receiver that receives messages to the pool.
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
|
||||
program_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
|
||||
/// The queue of jobs that are waiting for a worker to pick up.
|
||||
queue: VecDeque<ExecuteJob>,
|
||||
workers: Workers,
|
||||
mux: Mux,
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
fn new(
|
||||
program_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
) -> Self {
|
||||
Self {
|
||||
program_path,
|
||||
spawn_timeout,
|
||||
to_queue_rx,
|
||||
queue: VecDeque::new(),
|
||||
mux: Mux::new(),
|
||||
workers: Workers {
|
||||
running: HopSlotMap::with_capacity_and_key(10),
|
||||
spawn_inflight: 0,
|
||||
capacity: worker_capacity,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(mut self) {
|
||||
loop {
|
||||
futures::select! {
|
||||
to_queue = self.to_queue_rx.next() => {
|
||||
if let Some(to_queue) = to_queue {
|
||||
handle_to_queue(&mut self, to_queue);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ev = self.mux.select_next_some() => handle_mux(&mut self, ev).await,
|
||||
}
|
||||
|
||||
purge_dead(&mut self.workers).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn purge_dead(workers: &mut Workers) {
|
||||
let mut to_remove = vec![];
|
||||
for (worker, data) in workers.running.iter_mut() {
|
||||
if futures::poll!(&mut data.handle).is_ready() {
|
||||
// a resolved future means that the worker has terminated. Weed it out.
|
||||
to_remove.push(worker);
|
||||
}
|
||||
}
|
||||
for w in to_remove {
|
||||
let _ = workers.running.remove(w);
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) {
|
||||
let ToQueue::Enqueue {
|
||||
artifact_path,
|
||||
params,
|
||||
result_tx,
|
||||
} = to_queue;
|
||||
|
||||
let job = ExecuteJob {
|
||||
artifact_path,
|
||||
params,
|
||||
result_tx,
|
||||
};
|
||||
|
||||
if let Some(available) = queue.workers.find_available() {
|
||||
assign(queue, available, job);
|
||||
} else {
|
||||
if queue.workers.can_afford_one_more() {
|
||||
spawn_extra_worker(queue);
|
||||
}
|
||||
queue.queue.push_back(job);
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_mux(queue: &mut Queue, event: QueueEvent) {
|
||||
match event {
|
||||
QueueEvent::Spawn((idle, handle)) => {
|
||||
queue.workers.spawn_inflight -= 1;
|
||||
|
||||
let worker = queue.workers.running.insert(WorkerData {
|
||||
idle: Some(idle),
|
||||
handle,
|
||||
});
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
}
|
||||
}
|
||||
QueueEvent::StartWork(worker, outcome, result_tx) => {
|
||||
handle_job_finish(queue, worker, outcome, result_tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If there are pending jobs in the queue, schedules the next of them onto the just freed up
|
||||
/// worker. Otherwise, puts back into the available workers list.
|
||||
fn handle_job_finish(queue: &mut Queue, worker: Worker, outcome: Outcome, result_tx: ResultSender) {
|
||||
let (idle_worker, result) = match outcome {
|
||||
Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
idle_worker,
|
||||
} => {
|
||||
// TODO: propagate the soft timeout
|
||||
drop(duration_ms);
|
||||
|
||||
(Some(idle_worker), Ok(result_descriptor))
|
||||
}
|
||||
Outcome::InvalidCandidate { err, idle_worker } => (
|
||||
Some(idle_worker),
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::WorkerReportedError(err),
|
||||
)),
|
||||
),
|
||||
Outcome::InternalError { err, idle_worker } => (
|
||||
Some(idle_worker),
|
||||
Err(ValidationError::InternalError(err)),
|
||||
),
|
||||
Outcome::HardTimeout => (
|
||||
None,
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::HardTimeout,
|
||||
)),
|
||||
),
|
||||
Outcome::IoErr => (
|
||||
None,
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::AmbigiousWorkerDeath,
|
||||
)),
|
||||
),
|
||||
};
|
||||
|
||||
// First we send the result. It may fail due the other end of the channel being dropped, that's
|
||||
// legitimate and we don't treat that as an error.
|
||||
let _ = result_tx.send(result);
|
||||
|
||||
// Then, we should deal with the worker:
|
||||
//
|
||||
// - if the `idle_worker` token was returned we should either schedule the next task or just put
|
||||
// it back so that the next incoming job will be able to claim it
|
||||
//
|
||||
// - if the `idle_worker` token was consumed, all the metadata pertaining to that worker should
|
||||
// be removed.
|
||||
if let Some(idle_worker) = idle_worker {
|
||||
if let Some(data) = queue.workers.running.get_mut(worker) {
|
||||
data.idle = Some(idle_worker);
|
||||
|
||||
if let Some(job) = queue.queue.pop_front() {
|
||||
assign(queue, worker, job);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Note it's possible that the worker was purged already by `purge_dead`
|
||||
queue.workers.running.remove(worker);
|
||||
|
||||
if !queue.queue.is_empty() {
|
||||
// The worker has died and we still have work we have to do. Request an extra worker.
|
||||
//
|
||||
// That can potentially overshoot, but that should be OK.
|
||||
spawn_extra_worker(queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn spawn_extra_worker(queue: &mut Queue) {
|
||||
queue
|
||||
.mux
|
||||
.push(spawn_worker_task(queue.program_path.clone(), queue.spawn_timeout).boxed());
|
||||
queue.workers.spawn_inflight += 1;
|
||||
}
|
||||
|
||||
async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> QueueEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match super::worker::spawn(&program_path, spawn_timeout).await {
|
||||
Ok((idle, handle)) => break QueueEvent::Spawn((idle, handle)),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"failed to spawn an execute worker: {:?}",
|
||||
err,
|
||||
);
|
||||
|
||||
// Assume that the failure intermittent and retry after a delay.
|
||||
Delay::new(Duration::from_secs(3)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Ask the given worker to perform the given job.
|
||||
///
|
||||
/// The worker must be running and idle.
|
||||
fn assign(queue: &mut Queue, worker: Worker, job: ExecuteJob) {
|
||||
let idle = queue
|
||||
.workers
|
||||
.claim_idle(worker)
|
||||
.expect(
|
||||
"this caller must supply a worker which is idle and running;
|
||||
thus claim_idle cannot return None;
|
||||
qed."
|
||||
);
|
||||
queue.mux.push(
|
||||
async move {
|
||||
let outcome = super::worker::start_work(idle, job.artifact_path, job.params).await;
|
||||
QueueEvent::StartWork(worker, outcome, job.result_tx)
|
||||
}
|
||||
.boxed(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn start(
|
||||
program_path: PathBuf,
|
||||
worker_capacity: usize,
|
||||
spawn_timeout: Duration,
|
||||
) -> (mpsc::Sender<ToQueue>, impl Future<Output = ()>) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(20);
|
||||
let run = Queue::new(
|
||||
program_path,
|
||||
worker_capacity,
|
||||
spawn_timeout,
|
||||
to_queue_rx,
|
||||
)
|
||||
.run();
|
||||
(to_queue_tx, run)
|
||||
}
|
||||
@@ -0,0 +1,272 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
artifacts::Artifact,
|
||||
LOG_TARGET,
|
||||
executor_intf::TaskExecutor,
|
||||
worker_common::{
|
||||
IdleWorker, SpawnErr, WorkerHandle, bytes_to_path, framed_recv, framed_send, path_to_bytes,
|
||||
spawn_with_program_path, worker_event_loop,
|
||||
},
|
||||
};
|
||||
use std::time::{Duration, Instant};
|
||||
use async_std::{
|
||||
io,
|
||||
os::unix::net::UnixStream,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use futures::FutureExt;
|
||||
use futures_timer::Delay;
|
||||
use polkadot_parachain::primitives::ValidationResult;
|
||||
use parity_scale_codec::{Encode, Decode};
|
||||
|
||||
const EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> execute-worker <socket-path>` invocation.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
spawn_with_program_path(
|
||||
"execute",
|
||||
program_path,
|
||||
&["execute-worker"],
|
||||
spawn_timeout,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outcome of PVF execution.
|
||||
pub enum Outcome {
|
||||
/// PVF execution completed successfully and the result is returned. The worker is ready for
|
||||
/// another job.
|
||||
Ok {
|
||||
result_descriptor: ValidationResult,
|
||||
duration_ms: u64,
|
||||
idle_worker: IdleWorker,
|
||||
},
|
||||
/// The candidate validation failed. It may be for example because the preparation process
|
||||
/// produced an error or the wasm execution triggered a trap.
|
||||
InvalidCandidate {
|
||||
err: String,
|
||||
idle_worker: IdleWorker,
|
||||
},
|
||||
/// An internal error happened during the validation. Such an error is most likely related to
|
||||
/// some transient glitch.
|
||||
InternalError {
|
||||
err: String,
|
||||
idle_worker: IdleWorker,
|
||||
},
|
||||
/// The execution time exceeded the hard limit. The worker is terminated.
|
||||
HardTimeout,
|
||||
/// An I/O error happened during communication with the worker. This may mean that the worker
|
||||
/// process already died. The token is not returned in any case.
|
||||
IoErr,
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
pub async fn start_work(
|
||||
worker: IdleWorker,
|
||||
artifact_path: PathBuf,
|
||||
validation_params: Vec<u8>,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"starting execute for {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
if send_request(&mut stream, &artifact_path, &validation_params).await.is_err() {
|
||||
return Outcome::IoErr;
|
||||
}
|
||||
|
||||
let response = futures::select! {
|
||||
response = recv_response(&mut stream).fuse() => {
|
||||
match response {
|
||||
Err(_err) => return Outcome::IoErr,
|
||||
Ok(response) => response,
|
||||
}
|
||||
},
|
||||
_ = Delay::new(EXECUTION_TIMEOUT).fuse() => return Outcome::HardTimeout,
|
||||
};
|
||||
|
||||
match response {
|
||||
Response::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
} => Outcome::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
idle_worker: IdleWorker { stream, pid },
|
||||
},
|
||||
Response::InvalidCandidate(err) => Outcome::InvalidCandidate {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid },
|
||||
},
|
||||
Response::InternalError(err) => Outcome::InternalError {
|
||||
err,
|
||||
idle_worker: IdleWorker { stream, pid },
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
artifact_path: &Path,
|
||||
validation_params: &[u8],
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, path_to_bytes(artifact_path)).await?;
|
||||
framed_send(stream, validation_params).await
|
||||
}
|
||||
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<(PathBuf, Vec<u8>)> {
|
||||
let artifact_path = framed_recv(stream).await?;
|
||||
let artifact_path = bytes_to_path(&artifact_path).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"execute pvf recv_request: non utf-8 artifact path".to_string(),
|
||||
)
|
||||
})?;
|
||||
let params = framed_recv(stream).await?;
|
||||
Ok((artifact_path, params))
|
||||
}
|
||||
|
||||
async fn send_response(stream: &mut UnixStream, response: Response) -> io::Result<()> {
|
||||
framed_send(stream, &response.encode()).await
|
||||
}
|
||||
|
||||
async fn recv_response(stream: &mut UnixStream) -> io::Result<Response> {
|
||||
let response_bytes = framed_recv(stream).await?;
|
||||
Response::decode(&mut &response_bytes[..]).map_err(|e| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("execute pvf recv_response: decode error: {:?}", e),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Encode, Decode)]
|
||||
enum Response {
|
||||
Ok {
|
||||
result_descriptor: ValidationResult,
|
||||
duration_ms: u64,
|
||||
},
|
||||
InvalidCandidate(String),
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
impl Response {
|
||||
fn format_invalid(ctx: &'static str, msg: &str) -> Self {
|
||||
if msg.is_empty() {
|
||||
Self::InvalidCandidate(ctx.to_string())
|
||||
} else {
|
||||
Self::InvalidCandidate(format!("{}: {}", ctx, msg))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The entrypoint that the spawned execute worker should start with. The socket_path specifies
|
||||
/// the path to the socket used to communicate with the host.
|
||||
pub fn worker_entrypoint(socket_path: &str) {
|
||||
worker_event_loop("execute", socket_path, |mut stream| async move {
|
||||
let executor = TaskExecutor::new().map_err(|e| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("cannot create task executor: {}", e),
|
||||
)
|
||||
})?;
|
||||
loop {
|
||||
let (artifact_path, params) = recv_request(&mut stream).await?;
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: validating artifact {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
let response = validate_using_artifact(&artifact_path, ¶ms, &executor).await;
|
||||
send_response(&mut stream, response).await?;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn validate_using_artifact(
|
||||
artifact_path: &Path,
|
||||
params: &[u8],
|
||||
spawner: &TaskExecutor,
|
||||
) -> Response {
|
||||
let artifact_bytes = match async_std::fs::read(artifact_path).await {
|
||||
Err(e) => {
|
||||
return Response::InternalError(format!(
|
||||
"failed to read the artifact at {}: {:?}",
|
||||
artifact_path.display(),
|
||||
e,
|
||||
))
|
||||
}
|
||||
Ok(b) => b,
|
||||
};
|
||||
|
||||
let artifact = match Artifact::deserialize(&artifact_bytes) {
|
||||
Err(e) => return Response::InternalError(format!("artifact deserialization: {:?}", e)),
|
||||
Ok(a) => a,
|
||||
};
|
||||
|
||||
let compiled_artifact = match &artifact {
|
||||
Artifact::PrevalidationErr(msg) => {
|
||||
return Response::format_invalid("prevalidation", msg);
|
||||
}
|
||||
Artifact::PreparationErr(msg) => {
|
||||
return Response::format_invalid("preparation", msg);
|
||||
}
|
||||
Artifact::DidntMakeIt => {
|
||||
return Response::format_invalid("preparation timeout", "");
|
||||
}
|
||||
|
||||
Artifact::Compiled { compiled_artifact } => compiled_artifact,
|
||||
};
|
||||
|
||||
let validation_started_at = Instant::now();
|
||||
let descriptor_bytes =
|
||||
match crate::executor_intf::execute(compiled_artifact, params, spawner.clone()) {
|
||||
Err(err) => {
|
||||
return Response::format_invalid("execute", &err.to_string());
|
||||
}
|
||||
Ok(d) => d,
|
||||
};
|
||||
|
||||
let duration_ms = validation_started_at.elapsed().as_millis() as u64;
|
||||
|
||||
let result_descriptor = match ValidationResult::decode(&mut &descriptor_bytes[..]) {
|
||||
Err(err) => {
|
||||
return Response::InvalidCandidate(format!(
|
||||
"validation result decoding failed: {}",
|
||||
err
|
||||
))
|
||||
}
|
||||
Ok(r) => r,
|
||||
};
|
||||
|
||||
Response::Ok {
|
||||
result_descriptor,
|
||||
duration_ms,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Interface to the Substrate Executor
|
||||
|
||||
use std::any::{TypeId, Any};
|
||||
use sc_executor_common::{
|
||||
runtime_blob::RuntimeBlob,
|
||||
wasm_runtime::{InvokeMethod, WasmModule as _},
|
||||
};
|
||||
use sc_executor_wasmtime::{Config, Semantics};
|
||||
use sp_core::{
|
||||
storage::{ChildInfo, TrackedStorageKey},
|
||||
};
|
||||
use sp_wasm_interface::HostFunctions as _;
|
||||
|
||||
const CONFIG: Config = Config {
|
||||
// TODO: Make sure we don't use more than 1GB: https://github.com/paritytech/polkadot/issues/699
|
||||
heap_pages: 1024,
|
||||
allow_missing_func_imports: true,
|
||||
cache_path: None,
|
||||
semantics: Semantics {
|
||||
fast_instance_reuse: false,
|
||||
stack_depth_metering: false,
|
||||
},
|
||||
};
|
||||
|
||||
/// Runs the prevaldation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
|
||||
pub fn prevalidate(code: &[u8]) -> Result<RuntimeBlob, sc_executor_common::error::WasmError> {
|
||||
let blob = RuntimeBlob::new(code)?;
|
||||
// It's assumed this function will take care of any prevalidation logic
|
||||
// that needs to be done.
|
||||
//
|
||||
// Do nothing for now.
|
||||
Ok(blob)
|
||||
}
|
||||
|
||||
/// Runs preparation on the given runtime blob. If successful, it returns a serialized compiled
|
||||
/// artifact which can then be used to pass into [`execute`].
|
||||
pub fn prepare(blob: RuntimeBlob) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
|
||||
sc_executor_wasmtime::prepare_runtime_artifact(blob, &CONFIG.semantics)
|
||||
}
|
||||
|
||||
/// Executes the given PVF in the form of a compiled artifact and returns the result of execution
|
||||
/// upon success.
|
||||
pub fn execute(
|
||||
compiled_artifact: &[u8],
|
||||
params: &[u8],
|
||||
spawner: impl sp_core::traits::SpawnNamed + 'static,
|
||||
) -> Result<Vec<u8>, sc_executor_common::error::Error> {
|
||||
let mut extensions = sp_externalities::Extensions::new();
|
||||
|
||||
extensions.register(sp_core::traits::TaskExecutorExt::new(spawner));
|
||||
|
||||
let mut ext = ValidationExternalities(extensions);
|
||||
|
||||
sc_executor::with_externalities_safe(&mut ext, || {
|
||||
let runtime = sc_executor_wasmtime::create_runtime(
|
||||
sc_executor_wasmtime::CodeSupplyMode::Artifact { compiled_artifact },
|
||||
CONFIG,
|
||||
HostFunctions::host_functions(),
|
||||
)?;
|
||||
runtime
|
||||
.new_instance()?
|
||||
.call(InvokeMethod::Export("validate_block"), params)
|
||||
})?
|
||||
}
|
||||
|
||||
type HostFunctions = sp_io::SubstrateHostFunctions;
|
||||
|
||||
/// The validation externalities that will panic on any storage related access.
|
||||
struct ValidationExternalities(sp_externalities::Extensions);
|
||||
|
||||
impl sp_externalities::Externalities for ValidationExternalities {
|
||||
fn storage(&self, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_hash(&self, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("storage_hash: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn child_storage_hash(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("child_storage_hash: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn child_storage(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("child_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn kill_child_storage(&mut self, _: &ChildInfo, _: Option<u32>) -> (bool, u32) {
|
||||
panic!("kill_child_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn clear_prefix(&mut self, _: &[u8]) {
|
||||
panic!("clear_prefix: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn clear_child_prefix(&mut self, _: &ChildInfo, _: &[u8]) {
|
||||
panic!("clear_child_prefix: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) {
|
||||
panic!("place_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) {
|
||||
panic!("place_child_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_root(&mut self) -> Vec<u8> {
|
||||
panic!("storage_root: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn child_storage_root(&mut self, _: &ChildInfo) -> Vec<u8> {
|
||||
panic!("child_storage_root: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_changes_root(&mut self, _: &[u8]) -> Result<Option<Vec<u8>>, ()> {
|
||||
panic!("storage_changes_root: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn next_child_storage_key(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("next_child_storage_key: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn next_storage_key(&self, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("next_storage_key: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_append(&mut self, _key: Vec<u8>, _value: Vec<u8>) {
|
||||
panic!("storage_append: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_start_transaction(&mut self) {
|
||||
panic!("storage_start_transaction: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_rollback_transaction(&mut self) -> Result<(), ()> {
|
||||
panic!("storage_rollback_transaction: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_commit_transaction(&mut self) -> Result<(), ()> {
|
||||
panic!("storage_commit_transaction: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn wipe(&mut self) {
|
||||
panic!("wipe: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn commit(&mut self) {
|
||||
panic!("commit: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn read_write_count(&self) -> (u32, u32, u32, u32) {
|
||||
panic!("read_write_count: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn reset_read_write_count(&mut self) {
|
||||
panic!("reset_read_write_count: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn get_whitelist(&self) -> Vec<TrackedStorageKey> {
|
||||
panic!("get_whitelist: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) {
|
||||
panic!("set_whitelist: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) {
|
||||
panic!("set_offchain_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
}
|
||||
|
||||
impl sp_externalities::ExtensionStore for ValidationExternalities {
|
||||
fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> {
|
||||
self.0.get_mut(type_id)
|
||||
}
|
||||
|
||||
fn register_extension_with_type_id(
|
||||
&mut self,
|
||||
type_id: TypeId,
|
||||
extension: Box<dyn sp_externalities::Extension>,
|
||||
) -> Result<(), sp_externalities::Error> {
|
||||
self.0.register_with_type_id(type_id, extension)
|
||||
}
|
||||
|
||||
fn deregister_extension_by_type_id(
|
||||
&mut self,
|
||||
type_id: TypeId,
|
||||
) -> Result<(), sp_externalities::Error> {
|
||||
if self.0.deregister(type_id) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of `SpawnNamed` on top of a futures' thread pool.
|
||||
///
|
||||
/// This is a light handle meaning it will only clone the handle not create a new thread pool.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct TaskExecutor(futures::executor::ThreadPool);
|
||||
|
||||
impl TaskExecutor {
|
||||
pub(crate) fn new() -> Result<Self, String> {
|
||||
futures::executor::ThreadPoolBuilder::new()
|
||||
.pool_size(4)
|
||||
.name_prefix("pvf-task-executor")
|
||||
.create()
|
||||
.map_err(|e| e.to_string())
|
||||
.map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl sp_core::traits::SpawnNamed for TaskExecutor {
|
||||
fn spawn_blocking(&self, _: &'static str, future: futures::future::BoxFuture<'static, ()>) {
|
||||
self.0.spawn_ok(future);
|
||||
}
|
||||
|
||||
fn spawn(&self, _: &'static str, future: futures::future::BoxFuture<'static, ()>) {
|
||||
self.0.spawn_ok(future);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,100 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
//! A crate that implements PVF validation host.
|
||||
//!
|
||||
//! This crate provides a simple API. You first [`start`] the validation host, which gives you the
|
||||
//! [handle][`ValidationHost`] and the future you need to poll.
|
||||
//!
|
||||
//! Then using the handle the client can send two types of requests:
|
||||
//!
|
||||
//! (a) PVF execution. This accepts the PVF [params][`polkadot_parachain::primitives::ValidationParams`]
|
||||
//! and the PVF [code][`Pvf`], prepares (verifies and compiles) the code, and then executes PVF
|
||||
//! with the params.
|
||||
//!
|
||||
//! (b) Heads up. This request allows to signal that the given PVF may be needed soon and that it
|
||||
//! should be prepared for execution.
|
||||
//!
|
||||
//! The preparation results are cached for some time after they either used or was signalled in heads up.
|
||||
//! All requests that depends on preparation of the same PVF are bundled together and will be executed
|
||||
//! as soon as the artifact is prepared.
|
||||
//!
|
||||
//! # Priority
|
||||
//!
|
||||
//! PVF execution requests can specify the [priority][`Priority`] with which the given request should
|
||||
//! be handled. Different priority levels have different effects. This is discussed below.
|
||||
//!
|
||||
//! Preparation started by a heads up signal always starts in with the background priority. If there
|
||||
//! is already a request for that PVF preparation under way the priority is inherited. If after heads
|
||||
//! up, a new PVF execution request comes in with a higher priority, then the original task's priority
|
||||
//! will be adjusted to match the new one if it's larger.
|
||||
//!
|
||||
//! Priority can never go down, only up.
|
||||
//!
|
||||
//! # Under the hood
|
||||
//!
|
||||
//! Under the hood, the validation host is built using a bunch of communicating processes, not
|
||||
//! dissimilar to actors. Each of such "processes" is a future task that contains an event loop that
|
||||
//! processes incoming messages, potentially delegating sub-tasks to other "processes".
|
||||
//!
|
||||
//! Two of these processes are queues. The first one is for preparation jobs and the second one is for
|
||||
//! execution. Both of the queues are backed by separate pools of workers of different kind.
|
||||
//!
|
||||
//! Preparation workers handle preparation requests by preverifying and instrumenting PVF wasm code,
|
||||
//! and then passing it into the compiler, to prepare the artifact.
|
||||
//!
|
||||
//! Artifact is a final product of preparation. If the preparation succeeded, then the artifact will
|
||||
//! contain the compiled code usable for quick execution by a worker later on.
|
||||
//!
|
||||
//! If the preparation failed, then the worker will still write the artifact with the error message.
|
||||
//! We save the artifact with the error so that we don't try to prepare the artifacts that are broken
|
||||
//! repeatedly.
|
||||
//!
|
||||
//! The artifact is saved on disk and is also tracked by an in memory table. This in memory table
|
||||
//! doesn't contain the artifact contents though, only a flag that the given artifact is compiled.
|
||||
//!
|
||||
//! The execute workers will be fed by the requests from the execution queue, which is basically a
|
||||
//! combination of a path to the compiled artifact and the
|
||||
//! [params][`polkadot_parachain::primitives::ValidationParams`].
|
||||
//!
|
||||
//! Each fixed interval of time a pruning task will run. This task will remove all artifacts that
|
||||
//! weren't used or received a heads up signal for a while.
|
||||
|
||||
mod artifacts;
|
||||
mod error;
|
||||
mod execute;
|
||||
mod executor_intf;
|
||||
mod host;
|
||||
mod prepare;
|
||||
mod priority;
|
||||
mod pvf;
|
||||
mod worker_common;
|
||||
|
||||
#[doc(hidden)]
|
||||
pub mod testing;
|
||||
|
||||
pub use error::{ValidationError, InvalidCandidate};
|
||||
pub use priority::Priority;
|
||||
pub use pvf::Pvf;
|
||||
|
||||
pub use host::{start, Config, ValidationHost};
|
||||
|
||||
pub use execute::worker_entrypoint as execute_worker_entrypoint;
|
||||
pub use prepare::worker_entrypoint as prepare_worker_entrypoint;
|
||||
|
||||
const LOG_TARGET: &str = "parachain::pvf";
|
||||
@@ -0,0 +1,31 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Preparation part of pipeline
|
||||
//!
|
||||
//! The validation host spins up two processes: the queue (by running [`start_queue`]) and the pool
|
||||
//! (by running [`start_pool`]).
|
||||
//!
|
||||
//! The pool will spawn workers in new processes and those should execute pass control to
|
||||
//! [`worker_entrypoint`].
|
||||
|
||||
mod pool;
|
||||
mod queue;
|
||||
mod worker;
|
||||
|
||||
pub use queue::{ToQueue, FromQueue, start as start_queue};
|
||||
pub use pool::start as start_pool;
|
||||
pub use worker::worker_entrypoint;
|
||||
@@ -0,0 +1,336 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
worker_common::{IdleWorker, WorkerHandle},
|
||||
LOG_TARGET,
|
||||
};
|
||||
use super::{
|
||||
worker::{self, Outcome},
|
||||
};
|
||||
use std::{fmt, sync::Arc, task::Poll, time::Duration};
|
||||
use async_std::path::{Path, PathBuf};
|
||||
use futures::{
|
||||
Future, FutureExt, StreamExt, channel::mpsc, future::BoxFuture, stream::FuturesUnordered,
|
||||
};
|
||||
use slotmap::HopSlotMap;
|
||||
use assert_matches::assert_matches;
|
||||
use always_assert::never;
|
||||
|
||||
slotmap::new_key_type! { pub struct Worker; }
|
||||
|
||||
/// Messages that the pool handles.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum ToPool {
|
||||
/// Request a new worker to spawn.
|
||||
///
|
||||
/// This request won't fail in case if the worker cannot be created. Instead, we consider
|
||||
/// the failures transient and we try to spawn a worker after a delay.
|
||||
///
|
||||
/// [`FromPool::Spawned`] will be returned as soon as the worker is spawned.
|
||||
///
|
||||
/// The client should anticipate a [`FromPool::Rip`] message, in case the spawned worker was
|
||||
/// stopped for some reason.
|
||||
Spawn,
|
||||
|
||||
/// Kill the given worker. No-op if the given worker is not running.
|
||||
///
|
||||
/// [`FromPool::Rip`] won't be sent in this case. However, the client should be prepared to
|
||||
/// receive [`FromPool::Rip`] nonetheless, since the worker may be have been ripped before
|
||||
/// this message is processed.
|
||||
Kill(Worker),
|
||||
|
||||
/// If the given worker was started with the background priority, then it will be raised up to
|
||||
/// normal priority. Otherwise, it's no-op.
|
||||
BumpPriority(Worker),
|
||||
|
||||
/// Request the given worker to start working on the given code.
|
||||
///
|
||||
/// Once the job either succeeded or failed, a [`FromPool::Concluded`] message will be sent back.
|
||||
///
|
||||
/// This should not be sent again until the concluded message is received.
|
||||
StartWork {
|
||||
worker: Worker,
|
||||
code: Arc<Vec<u8>>,
|
||||
artifact_path: PathBuf,
|
||||
background_priority: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// A message sent from pool to its client.
|
||||
#[derive(Debug)]
|
||||
pub enum FromPool {
|
||||
/// The given worker was just spawned and is ready to be used.
|
||||
Spawned(Worker),
|
||||
|
||||
/// The given worker either succeeded or failed the given job. Under any circumstances the
|
||||
/// artifact file has been written. The bool says whether the worker ripped.
|
||||
Concluded(Worker, bool),
|
||||
|
||||
/// The given worker ceased to exist.
|
||||
Rip(Worker),
|
||||
}
|
||||
|
||||
struct WorkerData {
|
||||
idle: Option<IdleWorker>,
|
||||
handle: WorkerHandle,
|
||||
}
|
||||
|
||||
impl fmt::Debug for WorkerData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WorkerData(pid={})", self.handle.id())
|
||||
}
|
||||
}
|
||||
|
||||
enum PoolEvent {
|
||||
Spawn(IdleWorker, WorkerHandle),
|
||||
StartWork(Worker, Outcome),
|
||||
}
|
||||
|
||||
type Mux = FuturesUnordered<BoxFuture<'static, PoolEvent>>;
|
||||
|
||||
struct Pool {
|
||||
program_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
to_pool: mpsc::Receiver<ToPool>,
|
||||
from_pool: mpsc::UnboundedSender<FromPool>,
|
||||
spawned: HopSlotMap<Worker, WorkerData>,
|
||||
mux: Mux,
|
||||
}
|
||||
|
||||
/// A fatal error that warrants stopping the event loop of the pool.
|
||||
struct Fatal;
|
||||
|
||||
async fn run(
|
||||
Pool {
|
||||
program_path,
|
||||
spawn_timeout,
|
||||
to_pool,
|
||||
mut from_pool,
|
||||
mut spawned,
|
||||
mut mux,
|
||||
}: Pool,
|
||||
) {
|
||||
macro_rules! break_if_fatal {
|
||||
($expr:expr) => {
|
||||
match $expr {
|
||||
Err(Fatal) => break,
|
||||
Ok(v) => v,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let mut to_pool = to_pool.fuse();
|
||||
|
||||
loop {
|
||||
futures::select! {
|
||||
to_pool = to_pool.next() => {
|
||||
let to_pool = break_if_fatal!(to_pool.ok_or(Fatal));
|
||||
handle_to_pool(
|
||||
&program_path,
|
||||
spawn_timeout,
|
||||
&mut spawned,
|
||||
&mut mux,
|
||||
to_pool,
|
||||
)
|
||||
}
|
||||
ev = mux.select_next_some() => break_if_fatal!(handle_mux(&mut from_pool, &mut spawned, ev)),
|
||||
}
|
||||
|
||||
break_if_fatal!(purge_dead(&mut from_pool, &mut spawned).await);
|
||||
}
|
||||
}
|
||||
|
||||
async fn purge_dead(
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
) -> Result<(), Fatal> {
|
||||
let mut to_remove = vec![];
|
||||
for (worker, data) in spawned.iter_mut() {
|
||||
if data.idle.is_none() {
|
||||
// The idle token is missing, meaning this worker is now occupied: skip it. This is
|
||||
// because the worker process is observed by the work task and should it reach the
|
||||
// deadline or be terminated it will be handled by the corresponding mux event.
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Poll::Ready(()) = futures::poll!(&mut data.handle) {
|
||||
// a resolved future means that the worker has terminated. Weed it out.
|
||||
to_remove.push(worker);
|
||||
}
|
||||
}
|
||||
for w in to_remove {
|
||||
let _ = spawned.remove(w);
|
||||
reply(from_pool, FromPool::Rip(w))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_to_pool(
|
||||
program_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
mux: &mut Mux,
|
||||
to_pool: ToPool,
|
||||
) {
|
||||
match to_pool {
|
||||
ToPool::Spawn => {
|
||||
mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed());
|
||||
}
|
||||
ToPool::StartWork {
|
||||
worker,
|
||||
code,
|
||||
artifact_path,
|
||||
background_priority,
|
||||
} => {
|
||||
if let Some(data) = spawned.get_mut(worker) {
|
||||
if let Some(idle) = data.idle.take() {
|
||||
mux.push(
|
||||
start_work_task(worker, idle, code, artifact_path, background_priority)
|
||||
.boxed(),
|
||||
);
|
||||
} else {
|
||||
// idle token is present after spawn and after a job is concluded;
|
||||
// the precondition for `StartWork` is it should be sent only if all previous work
|
||||
// items concluded;
|
||||
// thus idle token is Some;
|
||||
// qed.
|
||||
never!("unexpected abscence of the idle token in prepare pool");
|
||||
}
|
||||
} else {
|
||||
// That's a relatively normal situation since the queue may send `start_work` and
|
||||
// before receiving it the pool would report that the worker died.
|
||||
}
|
||||
}
|
||||
ToPool::Kill(worker) => {
|
||||
// It may be absent if it were previously already removed by `purge_dead`.
|
||||
let _ = spawned.remove(worker);
|
||||
}
|
||||
ToPool::BumpPriority(worker) => {
|
||||
if let Some(data) = spawned.get(worker) {
|
||||
worker::bump_priority(&data.handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> PoolEvent {
|
||||
use futures_timer::Delay;
|
||||
|
||||
loop {
|
||||
match worker::spawn(&program_path, spawn_timeout).await {
|
||||
Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"failed to spawn a prepare worker: {:?}",
|
||||
err,
|
||||
);
|
||||
|
||||
// Assume that the failure intermittent and retry after a delay.
|
||||
Delay::new(Duration::from_secs(3)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_work_task(
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
code: Arc<Vec<u8>>,
|
||||
artifact_path: PathBuf,
|
||||
background_priority: bool,
|
||||
) -> PoolEvent {
|
||||
let outcome = worker::start_work(idle, code, artifact_path, background_priority).await;
|
||||
PoolEvent::StartWork(worker, outcome)
|
||||
}
|
||||
|
||||
fn handle_mux(
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
event: PoolEvent,
|
||||
) -> Result<(), Fatal> {
|
||||
match event {
|
||||
PoolEvent::Spawn(idle, handle) => {
|
||||
let worker = spawned.insert(WorkerData {
|
||||
idle: Some(idle),
|
||||
handle,
|
||||
});
|
||||
|
||||
reply(from_pool, FromPool::Spawned(worker))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
PoolEvent::StartWork(worker, outcome) => {
|
||||
match outcome {
|
||||
Outcome::Concluded(idle) => {
|
||||
let data = match spawned.get_mut(worker) {
|
||||
None => {
|
||||
// Perhaps the worker was killed meanwhile and the result is no longer
|
||||
// relevant.
|
||||
return Ok(());
|
||||
}
|
||||
Some(data) => data,
|
||||
};
|
||||
|
||||
// We just replace the idle worker that was loaned from this option during
|
||||
// the work starting.
|
||||
let old = data.idle.replace(idle);
|
||||
assert_matches!(old, None, "attempt to overwrite an idle worker");
|
||||
|
||||
reply(from_pool, FromPool::Concluded(worker, false))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Outcome::DidntMakeIt => {
|
||||
if let Some(_data) = spawned.remove(worker) {
|
||||
reply(from_pool, FromPool::Concluded(worker, true))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reply(from_pool: &mut mpsc::UnboundedSender<FromPool>, m: FromPool) -> Result<(), Fatal> {
|
||||
from_pool.unbounded_send(m).map_err(|_| Fatal)
|
||||
}
|
||||
|
||||
/// Spins up the pool and returns the future that should be polled to make the pool functional.
|
||||
pub fn start(
|
||||
program_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
) -> (
|
||||
mpsc::Sender<ToPool>,
|
||||
mpsc::UnboundedReceiver<FromPool>,
|
||||
impl Future<Output = ()>,
|
||||
) {
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
|
||||
let run = run(Pool {
|
||||
program_path,
|
||||
spawn_timeout,
|
||||
to_pool: to_pool_rx,
|
||||
from_pool: from_pool_tx,
|
||||
spawned: HopSlotMap::with_capacity_and_key(20),
|
||||
mux: Mux::new(),
|
||||
});
|
||||
|
||||
(to_pool_tx, from_pool_rx, run)
|
||||
}
|
||||
@@ -0,0 +1,894 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! A queue that handles requests for PVF preparation.
|
||||
|
||||
use super::{
|
||||
pool::{self, Worker},
|
||||
};
|
||||
use crate::{LOG_TARGET, Priority, Pvf, artifacts::ArtifactId};
|
||||
use futures::{Future, SinkExt, channel::mpsc, stream::StreamExt as _};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use async_std::path::PathBuf;
|
||||
use always_assert::{always, never};
|
||||
|
||||
/// A request to pool.
|
||||
#[derive(Debug)]
|
||||
pub enum ToQueue {
|
||||
/// This schedules preparation of the given PVF.
|
||||
///
|
||||
/// Note that it is incorrect to enqueue the same PVF again without first receiving the
|
||||
/// [`FromQueue::Prepared`] response. In case there is a need to bump the priority, use
|
||||
/// [`ToQueue::Amend`].
|
||||
Enqueue { priority: Priority, pvf: Pvf },
|
||||
/// Amends the priority for the given [`ArtifactId`] if it is running. If it's not, then it's noop.
|
||||
Amend {
|
||||
priority: Priority,
|
||||
artifact_id: ArtifactId,
|
||||
},
|
||||
}
|
||||
|
||||
/// A response from queue.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum FromQueue {
|
||||
Prepared(ArtifactId),
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Limits {
|
||||
/// The maximum number of workers this pool can ever host. This is expected to be a small
|
||||
/// number, e.g. within a dozen.
|
||||
hard_capacity: usize,
|
||||
|
||||
/// The number of workers we want aim to have. If there is a critical job and we are already
|
||||
/// at `soft_capacity`, we are allowed to grow up to `hard_capacity`. Thus this should be equal
|
||||
/// or smaller than `hard_capacity`.
|
||||
soft_capacity: usize,
|
||||
}
|
||||
|
||||
impl Limits {
|
||||
/// Returns `true` if the queue is allowed to request one more worker.
|
||||
fn can_afford_one_more(&self, spawned_num: usize, critical: bool) -> bool {
|
||||
let cap = if critical {
|
||||
self.hard_capacity
|
||||
} else {
|
||||
self.soft_capacity
|
||||
};
|
||||
spawned_num < cap
|
||||
}
|
||||
|
||||
/// Offer the worker back to the pool. The passed worker ID must be considered unusable unless
|
||||
/// it wasn't taken by the pool, in which case it will be returned as `Some`.
|
||||
fn should_cull(&mut self, spawned_num: usize) -> bool {
|
||||
spawned_num > self.soft_capacity
|
||||
}
|
||||
}
|
||||
|
||||
slotmap::new_key_type! { pub struct Job; }
|
||||
|
||||
struct JobData {
|
||||
/// The priority of this job. Can be bumped.
|
||||
priority: Priority,
|
||||
pvf: Pvf,
|
||||
worker: Option<Worker>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct WorkerData {
|
||||
job: Option<Job>,
|
||||
}
|
||||
|
||||
impl WorkerData {
|
||||
fn is_idle(&self) -> bool {
|
||||
self.job.is_none()
|
||||
}
|
||||
}
|
||||
|
||||
/// A queue structured like this is prone to starving, however, we don't care that much since we expect
|
||||
/// there is going to be a limited number of critical jobs and we don't really care if background starve.
|
||||
#[derive(Default)]
|
||||
struct Unscheduled {
|
||||
background: VecDeque<Job>,
|
||||
normal: VecDeque<Job>,
|
||||
critical: VecDeque<Job>,
|
||||
}
|
||||
|
||||
impl Unscheduled {
|
||||
fn queue_mut(&mut self, prio: Priority) -> &mut VecDeque<Job> {
|
||||
match prio {
|
||||
Priority::Background => &mut self.background,
|
||||
Priority::Normal => &mut self.normal,
|
||||
Priority::Critical => &mut self.critical,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, prio: Priority, job: Job) {
|
||||
self.queue_mut(prio).push_back(job);
|
||||
}
|
||||
|
||||
fn readd(&mut self, prio: Priority, job: Job) {
|
||||
self.queue_mut(prio).push_front(job);
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.background.is_empty() && self.normal.is_empty() && self.critical.is_empty()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Option<Job> {
|
||||
let mut check = |prio: Priority| self.queue_mut(prio).pop_front();
|
||||
check(Priority::Critical)
|
||||
.or_else(|| check(Priority::Normal))
|
||||
.or_else(|| check(Priority::Background))
|
||||
}
|
||||
}
|
||||
|
||||
struct Queue {
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
from_queue_tx: mpsc::UnboundedSender<FromQueue>,
|
||||
|
||||
to_pool_tx: mpsc::Sender<pool::ToPool>,
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
|
||||
cache_path: PathBuf,
|
||||
limits: Limits,
|
||||
|
||||
jobs: slotmap::SlotMap<Job, JobData>,
|
||||
|
||||
/// A mapping from artifact id to a job.
|
||||
artifact_id_to_job: HashMap<ArtifactId, Job>,
|
||||
/// The registry of all workers.
|
||||
workers: slotmap::SparseSecondaryMap<Worker, WorkerData>,
|
||||
/// The number of workers requested to spawn but not yet spawned.
|
||||
spawn_inflight: usize,
|
||||
|
||||
/// The jobs that are not yet scheduled. These are waiting until the next `poll` where they are
|
||||
/// processed all at once.
|
||||
unscheduled: Unscheduled,
|
||||
}
|
||||
|
||||
/// A fatal error that warrants stopping the queue.
|
||||
struct Fatal;
|
||||
|
||||
impl Queue {
|
||||
fn new(
|
||||
soft_capacity: usize,
|
||||
hard_capacity: usize,
|
||||
cache_path: PathBuf,
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
from_queue_tx: mpsc::UnboundedSender<FromQueue>,
|
||||
to_pool_tx: mpsc::Sender<pool::ToPool>,
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
) -> Self {
|
||||
Self {
|
||||
to_queue_rx,
|
||||
from_queue_tx,
|
||||
to_pool_tx,
|
||||
from_pool_rx,
|
||||
cache_path,
|
||||
spawn_inflight: 0,
|
||||
limits: Limits {
|
||||
hard_capacity,
|
||||
soft_capacity,
|
||||
},
|
||||
jobs: slotmap::SlotMap::with_key(),
|
||||
unscheduled: Unscheduled::default(),
|
||||
artifact_id_to_job: HashMap::new(),
|
||||
workers: slotmap::SparseSecondaryMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(mut self) {
|
||||
macro_rules! break_if_fatal {
|
||||
($expr:expr) => {
|
||||
if let Err(Fatal) = $expr {
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
loop {
|
||||
// biased to make it behave deterministically for tests.
|
||||
futures::select_biased! {
|
||||
to_queue = self.to_queue_rx.select_next_some() =>
|
||||
break_if_fatal!(handle_to_queue(&mut self, to_queue).await),
|
||||
from_pool = self.from_pool_rx.select_next_some() =>
|
||||
break_if_fatal!(handle_from_pool(&mut self, from_pool).await),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fatal> {
|
||||
match to_queue {
|
||||
ToQueue::Enqueue { priority, pvf } => {
|
||||
handle_enqueue(queue, priority, pvf).await?;
|
||||
}
|
||||
ToQueue::Amend {
|
||||
priority,
|
||||
artifact_id,
|
||||
} => {
|
||||
handle_amend(queue, priority, artifact_id).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Result<(), Fatal> {
|
||||
let artifact_id = pvf.as_artifact_id();
|
||||
if never!(
|
||||
queue.artifact_id_to_job.contains_key(&artifact_id),
|
||||
"second Enqueue sent for a known artifact"
|
||||
) {
|
||||
// This function is called in response to a `Enqueue` message;
|
||||
// Precondtion for `Enqueue` is that it is sent only once for a PVF;
|
||||
// Thus this should always be `false`;
|
||||
// qed.
|
||||
tracing::warn!(
|
||||
target: LOG_TARGET,
|
||||
"duplicate `enqueue` command received for {:?}",
|
||||
artifact_id,
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let job = queue.jobs.insert(JobData {
|
||||
priority,
|
||||
pvf,
|
||||
worker: None,
|
||||
});
|
||||
queue.artifact_id_to_job.insert(artifact_id, job);
|
||||
|
||||
if let Some(available) = find_idle_worker(queue) {
|
||||
// This may seem not fair (w.r.t priority) on the first glance, but it should be. This is
|
||||
// because as soon as a worker finishes with the job it's immediatelly given the next one.
|
||||
assign(queue, available, job).await?;
|
||||
} else {
|
||||
spawn_extra_worker(queue, priority.is_critical()).await?;
|
||||
queue.unscheduled.add(priority, job);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_idle_worker(queue: &mut Queue) -> Option<Worker> {
|
||||
queue
|
||||
.workers
|
||||
.iter()
|
||||
.filter(|(_, data)| data.is_idle())
|
||||
.map(|(k, _)| k)
|
||||
.next()
|
||||
}
|
||||
|
||||
async fn handle_amend(
|
||||
queue: &mut Queue,
|
||||
priority: Priority,
|
||||
artifact_id: ArtifactId,
|
||||
) -> Result<(), Fatal> {
|
||||
if let Some(&job) = queue.artifact_id_to_job.get(&artifact_id) {
|
||||
let mut job_data: &mut JobData = &mut queue.jobs[job];
|
||||
|
||||
if job_data.priority < priority {
|
||||
// The new priority is higher. We should do two things:
|
||||
// - if the worker was already spawned with the background prio and the new one is not
|
||||
// (it's already the case, if we are in this branch but we still do the check for
|
||||
// clarity), then we should tell the pool to bump the priority for the worker.
|
||||
//
|
||||
// - save the new priority in the job.
|
||||
|
||||
if let Some(worker) = job_data.worker {
|
||||
if job_data.priority.is_background() && !priority.is_background() {
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::BumpPriority(worker)).await?;
|
||||
}
|
||||
}
|
||||
|
||||
job_data.priority = priority;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Result<(), Fatal> {
|
||||
use pool::FromPool::*;
|
||||
match from_pool {
|
||||
Spawned(worker) => handle_worker_spawned(queue, worker).await?,
|
||||
Concluded(worker, rip) => handle_worker_concluded(queue, worker, rip).await?,
|
||||
Rip(worker) => handle_worker_rip(queue, worker).await?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_worker_spawned(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
|
||||
queue.workers.insert(worker, WorkerData::default());
|
||||
queue.spawn_inflight -= 1;
|
||||
|
||||
if let Some(job) = queue.unscheduled.next() {
|
||||
assign(queue, worker, job).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_worker_concluded(
|
||||
queue: &mut Queue,
|
||||
worker: Worker,
|
||||
rip: bool,
|
||||
) -> Result<(), Fatal> {
|
||||
macro_rules! never_none {
|
||||
($expr:expr) => {
|
||||
match $expr {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
// Precondition of calling this is that the $expr is never none;
|
||||
// Assume the conditions holds, then this never is not hit;
|
||||
// qed.
|
||||
never!("never_none, {}", stringify!($expr));
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Find out on which artifact was the worker working.
|
||||
|
||||
// workers are registered upon spawn and removed in one of the following cases:
|
||||
// 1. received rip signal
|
||||
// 2. received concluded signal with rip=true;
|
||||
// concluded signal only comes from a spawned worker and only once;
|
||||
// rip signal is not sent after conclusion with rip=true;
|
||||
// the worker should be registered;
|
||||
// this can't be None;
|
||||
// qed.
|
||||
let worker_data = never_none!(queue.workers.get_mut(worker));
|
||||
|
||||
// worker_data.job is set only by `assign` and removed only here for a worker;
|
||||
// concluded signal only comes for a worker that was previously assigned and only once;
|
||||
// the worker should have the job;
|
||||
// this can't be None;
|
||||
// qed.
|
||||
let job = never_none!(worker_data.job.take());
|
||||
|
||||
// job_data is inserted upon enqueue and removed only here;
|
||||
// as was established above, this worker was previously `assign`ed to the job;
|
||||
// that implies that the job was enqueued;
|
||||
// conclude signal only comes once;
|
||||
// we are just to remove the job for the first and the only time;
|
||||
// this can't be None;
|
||||
// qed.
|
||||
let job_data = never_none!(queue.jobs.remove(job));
|
||||
let artifact_id = job_data.pvf.as_artifact_id();
|
||||
|
||||
queue.artifact_id_to_job.remove(&artifact_id);
|
||||
|
||||
reply(&mut queue.from_queue_tx, FromQueue::Prepared(artifact_id))?;
|
||||
|
||||
// Figure out what to do with the worker.
|
||||
if rip {
|
||||
let worker_data = queue.workers.remove(worker);
|
||||
// worker should exist, it's asserted above;
|
||||
// qed.
|
||||
always!(worker_data.is_some());
|
||||
|
||||
if !queue.unscheduled.is_empty() {
|
||||
// That is unconditionally not critical just to not accidentally fill up
|
||||
// the pool up to the hard cap.
|
||||
spawn_extra_worker(queue, false).await?;
|
||||
}
|
||||
} else {
|
||||
if queue
|
||||
.limits
|
||||
.should_cull(queue.workers.len() + queue.spawn_inflight)
|
||||
{
|
||||
// We no longer need services of this worker. Kill it.
|
||||
queue.workers.remove(worker);
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::Kill(worker)).await?;
|
||||
} else {
|
||||
// see if there are more work available and schedule it.
|
||||
if let Some(job) = queue.unscheduled.next() {
|
||||
assign(queue, worker, job).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_worker_rip(queue: &mut Queue, worker: Worker) -> Result<(), Fatal> {
|
||||
let worker_data = queue.workers.remove(worker);
|
||||
|
||||
if let Some(WorkerData { job: Some(job), .. }) = worker_data {
|
||||
// This is an edge case where the worker ripped after we sent assignment but before it
|
||||
// was received by the pool.
|
||||
let priority = queue
|
||||
.jobs
|
||||
.get(job)
|
||||
.map(|data| data.priority)
|
||||
.unwrap_or_else(|| {
|
||||
// job is inserted upon enqueue and removed on concluded signal;
|
||||
// this is enclosed in the if statement that narrows the situation to before
|
||||
// conclusion;
|
||||
// that means that the job still exists and is known;
|
||||
// this path cannot be hit;
|
||||
// qed.
|
||||
never!("the job of the ripped worker must be known but it is not");
|
||||
Priority::Normal
|
||||
});
|
||||
queue.unscheduled.readd(priority, job);
|
||||
}
|
||||
|
||||
// If there are still jobs left, spawn another worker to replace the ripped one (but only if it
|
||||
// was indeed removed). That is unconditionally not critical just to not accidentally fill up
|
||||
// the pool up to the hard cap.
|
||||
if worker_data.is_some() && !queue.unscheduled.is_empty() {
|
||||
spawn_extra_worker(queue, false).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Spawns an extra worker if possible.
|
||||
async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fatal> {
|
||||
if queue
|
||||
.limits
|
||||
.can_afford_one_more(queue.workers.len() + queue.spawn_inflight, critical)
|
||||
{
|
||||
queue.spawn_inflight += 1;
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::Spawn).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attaches the work to the given worker telling the poll about the job.
|
||||
async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> {
|
||||
let job_data = &mut queue.jobs[job];
|
||||
|
||||
let artifact_id = job_data.pvf.as_artifact_id();
|
||||
let artifact_path = artifact_id.path(&queue.cache_path);
|
||||
|
||||
job_data.worker = Some(worker);
|
||||
|
||||
queue.workers[worker].job = Some(job);
|
||||
|
||||
send_pool(
|
||||
&mut queue.to_pool_tx,
|
||||
pool::ToPool::StartWork {
|
||||
worker,
|
||||
code: job_data.pvf.code.clone(),
|
||||
artifact_path,
|
||||
background_priority: job_data.priority.is_background(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reply(from_queue_tx: &mut mpsc::UnboundedSender<FromQueue>, m: FromQueue) -> Result<(), Fatal> {
|
||||
from_queue_tx.unbounded_send(m).map_err(|_| {
|
||||
// The host has hung up and thus it's fatal and we should shutdown ourselves.
|
||||
Fatal
|
||||
})
|
||||
}
|
||||
|
||||
async fn send_pool(
|
||||
to_pool_tx: &mut mpsc::Sender<pool::ToPool>,
|
||||
m: pool::ToPool,
|
||||
) -> Result<(), Fatal> {
|
||||
to_pool_tx.send(m).await.map_err(|_| {
|
||||
// The pool has hung up and thus we are no longer are able to fulfill our duties. Shutdown.
|
||||
Fatal
|
||||
})
|
||||
}
|
||||
|
||||
/// Spins up the queue and returns the future that should be polled to make the queue functional.
|
||||
pub fn start(
|
||||
soft_capacity: usize,
|
||||
hard_capacity: usize,
|
||||
cache_path: PathBuf,
|
||||
to_pool_tx: mpsc::Sender<pool::ToPool>,
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
) -> (
|
||||
mpsc::Sender<ToQueue>,
|
||||
mpsc::UnboundedReceiver<FromQueue>,
|
||||
impl Future<Output = ()>,
|
||||
) {
|
||||
let (to_queue_tx, to_queue_rx) = mpsc::channel(150);
|
||||
let (from_queue_tx, from_queue_rx) = mpsc::unbounded();
|
||||
|
||||
let run = Queue::new(
|
||||
soft_capacity,
|
||||
hard_capacity,
|
||||
cache_path,
|
||||
to_queue_rx,
|
||||
from_queue_tx,
|
||||
to_pool_tx,
|
||||
from_pool_rx,
|
||||
)
|
||||
.run();
|
||||
|
||||
(to_queue_tx, from_queue_rx, run)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use slotmap::SlotMap;
|
||||
use assert_matches::assert_matches;
|
||||
use futures::{FutureExt, future::BoxFuture};
|
||||
use std::task::Poll;
|
||||
use super::*;
|
||||
|
||||
/// Creates a new pvf which artifact id can be uniquely identified by the given number.
|
||||
fn pvf(descriminator: u32) -> Pvf {
|
||||
Pvf::from_discriminator(descriminator)
|
||||
}
|
||||
|
||||
async fn run_until<R>(
|
||||
task: &mut (impl Future<Output = ()> + Unpin),
|
||||
mut fut: (impl Future<Output = R> + Unpin),
|
||||
) -> R {
|
||||
let start = std::time::Instant::now();
|
||||
let fut = &mut fut;
|
||||
loop {
|
||||
if start.elapsed() > std::time::Duration::from_secs(1) {
|
||||
// We expect that this will take only a couple of iterations and thus to take way
|
||||
// less than a second.
|
||||
panic!("timeout");
|
||||
}
|
||||
|
||||
if let Poll::Ready(r) = futures::poll!(&mut *fut) {
|
||||
break r;
|
||||
}
|
||||
|
||||
if futures::poll!(&mut *task).is_ready() {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Test {
|
||||
_tempdir: tempfile::TempDir,
|
||||
run: BoxFuture<'static, ()>,
|
||||
workers: SlotMap<Worker, ()>,
|
||||
from_pool_tx: mpsc::UnboundedSender<pool::FromPool>,
|
||||
to_pool_rx: mpsc::Receiver<pool::ToPool>,
|
||||
to_queue_tx: mpsc::Sender<ToQueue>,
|
||||
from_queue_rx: mpsc::UnboundedReceiver<FromQueue>,
|
||||
}
|
||||
|
||||
impl Test {
|
||||
fn new(soft_capacity: usize, hard_capacity: usize) -> Self {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
|
||||
let (to_pool_tx, to_pool_rx) = mpsc::channel(10);
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
|
||||
let workers: SlotMap<Worker, ()> = SlotMap::with_key();
|
||||
|
||||
let (to_queue_tx, from_queue_rx, run) = start(
|
||||
soft_capacity,
|
||||
hard_capacity,
|
||||
tempdir.path().to_owned().into(),
|
||||
to_pool_tx,
|
||||
from_pool_rx,
|
||||
);
|
||||
|
||||
Self {
|
||||
_tempdir: tempdir,
|
||||
run: run.boxed(),
|
||||
workers,
|
||||
from_pool_tx,
|
||||
to_pool_rx,
|
||||
to_queue_tx,
|
||||
from_queue_rx,
|
||||
}
|
||||
}
|
||||
|
||||
fn send_queue(&mut self, to_queue: ToQueue) {
|
||||
self.to_queue_tx
|
||||
.send(to_queue)
|
||||
.now_or_never()
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn poll_and_recv_from_queue(&mut self) -> FromQueue {
|
||||
let from_queue_rx = &mut self.from_queue_rx;
|
||||
run_until(
|
||||
&mut self.run,
|
||||
async { from_queue_rx.next().await.unwrap() }.boxed(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
fn send_from_pool(&mut self, from_pool: pool::FromPool) {
|
||||
self.from_pool_tx
|
||||
.send(from_pool)
|
||||
.now_or_never()
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn poll_and_recv_to_pool(&mut self) -> pool::ToPool {
|
||||
let to_pool_rx = &mut self.to_pool_rx;
|
||||
run_until(
|
||||
&mut self.run,
|
||||
async { to_pool_rx.next().await.unwrap() }.boxed(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn poll_ensure_to_pool_is_empty(&mut self) {
|
||||
use futures_timer::Delay;
|
||||
use std::time::Duration;
|
||||
|
||||
let to_pool_rx = &mut self.to_pool_rx;
|
||||
run_until(
|
||||
&mut self.run,
|
||||
async {
|
||||
futures::select! {
|
||||
_ = Delay::new(Duration::from_millis(500)).fuse() => (),
|
||||
_ = to_pool_rx.next().fuse() => {
|
||||
panic!("to pool supposed to be empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
.boxed(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn properly_concludes() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Background,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w));
|
||||
test.send_from_pool(pool::FromPool::Concluded(w, false));
|
||||
|
||||
assert_eq!(
|
||||
test.poll_and_recv_from_queue().await,
|
||||
FromQueue::Prepared(pvf(1).as_artifact_id())
|
||||
);
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn dont_spawn_over_soft_limit_unless_critical() {
|
||||
let mut test = Test::new(2, 3);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(2),
|
||||
});
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(3),
|
||||
});
|
||||
|
||||
// Receive only two spawns.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
let w2 = test.workers.insert(());
|
||||
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
test.send_from_pool(pool::FromPool::Spawned(w2));
|
||||
|
||||
// Get two start works.
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
|
||||
test.send_from_pool(pool::FromPool::Concluded(w1, false));
|
||||
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
|
||||
// Enqueue a critical job.
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Critical,
|
||||
pvf: pvf(4),
|
||||
});
|
||||
|
||||
// 2 out of 2 are working, but there is a critical job incoming. That means that spawning
|
||||
// another worker is warranted.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn cull_unwanted() {
|
||||
let mut test = Test::new(1, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
let w1 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
|
||||
// Enqueue a critical job, which warrants spawning over the soft limit.
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Critical,
|
||||
pvf: pvf(2),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
// However, before the new worker had a chance to spawn, the first worker finishes with its
|
||||
// job. The old worker will be killed while the new worker will be let live, even though
|
||||
// it's not instantiated.
|
||||
//
|
||||
// That's a bit silly in this context, but in production there will be an entire pool up
|
||||
// to the `soft_capacity` of workers and it doesn't matter which one to cull. Either way,
|
||||
// we just check that edge case of an edge case works.
|
||||
test.send_from_pool(pool::FromPool::Concluded(w1, false));
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1));
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn bump_prio_on_urgency_change() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Background,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w));
|
||||
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
test.send_queue(ToQueue::Amend {
|
||||
priority: Priority::Normal,
|
||||
artifact_id: pvf(1).as_artifact_id(),
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::BumpPriority(w)
|
||||
);
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn worker_mass_die_out_doesnt_stall_queue() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(2),
|
||||
});
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(3),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
let w2 = test.workers.insert(());
|
||||
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
test.send_from_pool(pool::FromPool::Spawned(w2));
|
||||
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
|
||||
// Conclude worker 1 and rip it.
|
||||
test.send_from_pool(pool::FromPool::Concluded(w1, true));
|
||||
|
||||
// Since there is still work, the queue requested one extra worker to spawn to handle the
|
||||
// remaining enqueued work items.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
assert_eq!(
|
||||
test.poll_and_recv_from_queue().await,
|
||||
FromQueue::Prepared(pvf(1).as_artifact_id())
|
||||
);
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn doesnt_resurrect_ripped_worker_if_no_work() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
|
||||
test.send_from_pool(pool::FromPool::Concluded(w1, true));
|
||||
test.poll_ensure_to_pool_is_empty().await;
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn rip_for_start_work() {
|
||||
let mut test = Test::new(2, 2);
|
||||
|
||||
test.send_queue(ToQueue::Enqueue {
|
||||
priority: Priority::Normal,
|
||||
pvf: pvf(1),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w1 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w1));
|
||||
|
||||
// Now, to the interesting part. After the queue normally issues the start_work command to
|
||||
// the pool, before receiving the command the queue may report that the worker ripped.
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
test.send_from_pool(pool::FromPool::Rip(w1));
|
||||
|
||||
// In this case, the pool should spawn a new worker and request it to work on the item.
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
|
||||
|
||||
let w2 = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w2));
|
||||
assert_matches!(
|
||||
test.poll_and_recv_to_pool().await,
|
||||
pool::ToPool::StartWork { .. }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{
|
||||
LOG_TARGET,
|
||||
artifacts::Artifact,
|
||||
worker_common::{
|
||||
IdleWorker, SpawnErr, WorkerHandle, bytes_to_path, framed_recv, framed_send, path_to_bytes,
|
||||
spawn_with_program_path, tmpfile, worker_event_loop,
|
||||
},
|
||||
};
|
||||
use async_std::{
|
||||
io,
|
||||
os::unix::net::UnixStream,
|
||||
path::{PathBuf, Path},
|
||||
};
|
||||
use futures::FutureExt as _;
|
||||
use futures_timer::Delay;
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
const NICENESS_BACKGROUND: i32 = 10;
|
||||
const NICENESS_FOREGROUND: i32 = 0;
|
||||
|
||||
const COMPILATION_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
/// The program should be able to handle `<program-path> prepare-worker <socket-path>` invocation.
|
||||
pub async fn spawn(
|
||||
program_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
spawn_with_program_path(
|
||||
"prepare",
|
||||
program_path,
|
||||
&["prepare-worker"],
|
||||
spawn_timeout,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub enum Outcome {
|
||||
/// The worker has finished the work assigned to it.
|
||||
Concluded(IdleWorker),
|
||||
/// The execution was interrupted abruptly and the worker is not available anymore. For example,
|
||||
/// this could've happen because the worker hadn't finished the work until the given deadline.
|
||||
///
|
||||
/// Note that in this case the artifact file is written (unless there was an error writing the
|
||||
/// the artifact).
|
||||
///
|
||||
/// This doesn't return an idle worker instance, thus this worker is no longer usable.
|
||||
DidntMakeIt,
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
pub async fn start_work(
|
||||
worker: IdleWorker,
|
||||
code: Arc<Vec<u8>>,
|
||||
artifact_path: PathBuf,
|
||||
background_priority: bool,
|
||||
) -> Outcome {
|
||||
let IdleWorker { mut stream, pid } = worker;
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
%background_priority,
|
||||
"starting prepare for {}",
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
if background_priority {
|
||||
renice(pid, NICENESS_BACKGROUND);
|
||||
}
|
||||
|
||||
if let Err(err) = send_request(&mut stream, code).await {
|
||||
tracing::warn!("failed to send a prepare request to pid={}: {:?}", pid, err);
|
||||
return Outcome::DidntMakeIt;
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines.
|
||||
//
|
||||
// In that case we should handle these gracefully by writing the artifact file by ourselves.
|
||||
// We may potentially overwrite the artifact in rare cases where the worker didn't make
|
||||
// it to report back the result.
|
||||
|
||||
enum Selected {
|
||||
Done,
|
||||
IoErr,
|
||||
Deadline,
|
||||
}
|
||||
|
||||
let selected = futures::select! {
|
||||
artifact_path_bytes = framed_recv(&mut stream).fuse() => {
|
||||
match artifact_path_bytes {
|
||||
Ok(bytes) => {
|
||||
if let Some(tmp_path) = bytes_to_path(&bytes) {
|
||||
async_std::fs::rename(tmp_path, &artifact_path)
|
||||
.await
|
||||
.map(|_| Selected::Done)
|
||||
.unwrap_or(Selected::IoErr)
|
||||
} else {
|
||||
Selected::IoErr
|
||||
}
|
||||
},
|
||||
Err(_) => Selected::IoErr,
|
||||
}
|
||||
},
|
||||
_ = Delay::new(COMPILATION_TIMEOUT).fuse() => Selected::Deadline,
|
||||
};
|
||||
|
||||
match selected {
|
||||
Selected::Done => {
|
||||
renice(pid, NICENESS_FOREGROUND);
|
||||
Outcome::Concluded(IdleWorker { stream, pid })
|
||||
}
|
||||
Selected::IoErr | Selected::Deadline => {
|
||||
let bytes = Artifact::DidntMakeIt.serialize();
|
||||
// best effort: there is nothing we can do here if the write fails.
|
||||
let _ = async_std::fs::write(&artifact_path, &bytes).await;
|
||||
Outcome::DidntMakeIt
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_request(stream: &mut UnixStream, code: Arc<Vec<u8>>) -> io::Result<()> {
|
||||
framed_send(stream, &*code).await
|
||||
}
|
||||
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<Vec<u8>> {
|
||||
framed_recv(stream).await
|
||||
}
|
||||
|
||||
pub fn bump_priority(handle: &WorkerHandle) {
|
||||
let pid = handle.id();
|
||||
renice(pid, NICENESS_FOREGROUND);
|
||||
}
|
||||
|
||||
fn renice(pid: u32, niceness: i32) {
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"changing niceness to {}",
|
||||
niceness,
|
||||
);
|
||||
|
||||
// Consider upstreaming this to the `nix` crate.
|
||||
unsafe {
|
||||
if -1 == libc::setpriority(libc::PRIO_PROCESS, pid, niceness) {
|
||||
let err = std::io::Error::last_os_error();
|
||||
tracing::warn!(target: LOG_TARGET, "failed to set the priority: {:?}", err,);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The entrypoint that the spawned prepare worker should start with. The socket_path specifies
|
||||
/// the path to the socket used to communicate with the host.
|
||||
pub fn worker_entrypoint(socket_path: &str) {
|
||||
worker_event_loop("prepare", socket_path, |mut stream| async move {
|
||||
loop {
|
||||
let code = recv_request(&mut stream).await?;
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: preparing artifact",
|
||||
);
|
||||
let artifact_bytes = prepare_artifact(&code).serialize();
|
||||
|
||||
// Write the serialized artifact into into a temp file.
|
||||
let dest = tmpfile("prepare-artifact-").await?;
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"worker: writing artifact to {}",
|
||||
dest.display(),
|
||||
);
|
||||
async_std::fs::write(&dest, &artifact_bytes).await?;
|
||||
|
||||
// Communicate the results back to the host.
|
||||
framed_send(&mut stream, &path_to_bytes(&dest)).await?;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn prepare_artifact(code: &[u8]) -> Artifact {
|
||||
let blob = match crate::executor_intf::prevalidate(code) {
|
||||
Err(err) => {
|
||||
return Artifact::PrevalidationErr(format!("{:?}", err));
|
||||
}
|
||||
Ok(b) => b,
|
||||
};
|
||||
|
||||
match crate::executor_intf::prepare(blob) {
|
||||
Ok(compiled_artifact) => Artifact::Compiled { compiled_artifact },
|
||||
Err(err) => Artifact::PreparationErr(format!("{:?}", err)),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
/// A priority assigned to execution of a PVF.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Priority {
|
||||
/// Jobs in this priority will be executed in the background, meaning that they will be only
|
||||
/// given spare CPU time.
|
||||
///
|
||||
/// This is mainly for cache warmings.
|
||||
Background,
|
||||
/// Normal priority for things that do not require immediate response, but still need to be
|
||||
/// done pretty quick.
|
||||
///
|
||||
/// Approvals and disputes fall into this category.
|
||||
Normal,
|
||||
/// This priority is used for requests that are required to be processed as soon as possible.
|
||||
///
|
||||
/// For example, backing is on critical path and require execution as soon as possible.
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl Priority {
|
||||
/// Returns `true` if `self` is `Crticial`
|
||||
pub fn is_critical(self) -> bool {
|
||||
self == Priority::Critical
|
||||
}
|
||||
|
||||
/// Returns `true` if `self` is `Background`
|
||||
pub fn is_background(self) -> bool {
|
||||
self == Priority::Background
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::artifacts::ArtifactId;
|
||||
use polkadot_core_primitives::Hash;
|
||||
use sp_core::blake2_256;
|
||||
use std::{fmt, sync::Arc};
|
||||
|
||||
/// A struct that carries code of a parachain validation function and it's hash.
|
||||
///
|
||||
/// Should be cheap to clone.
|
||||
#[derive(Clone)]
|
||||
pub struct Pvf {
|
||||
pub(crate) code: Arc<Vec<u8>>,
|
||||
pub(crate) code_hash: Hash,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Pvf {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "Pvf {{ code, code_hash: {:?} }}", self.code_hash)
|
||||
}
|
||||
}
|
||||
|
||||
impl Pvf {
|
||||
/// Returns an instance of the PVF out of the given PVF code.
|
||||
pub fn from_code(code: Vec<u8>) -> Self {
|
||||
let code = Arc::new(code);
|
||||
let code_hash = blake2_256(&code).into();
|
||||
Self { code, code_hash }
|
||||
}
|
||||
|
||||
/// Creates a new pvf which artifact id can be uniquely identified by the given number.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn from_discriminator(num: u32) -> Self {
|
||||
let descriminator_buf = num.to_le_bytes().to_vec();
|
||||
Pvf::from_code(descriminator_buf)
|
||||
}
|
||||
|
||||
/// Returns the artifact ID that corresponds to this PVF.
|
||||
pub(crate) fn as_artifact_id(&self) -> ArtifactId {
|
||||
ArtifactId::new(self.code_hash)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Various things for testing other crates.
|
||||
//!
|
||||
//! N.B. This is not guarded with some feature flag. Overexposing items here may affect the final
|
||||
//! artifact even for production builds.
|
||||
|
||||
pub mod worker_common {
|
||||
pub use crate::worker_common::{spawn_with_program_path, SpawnErr};
|
||||
}
|
||||
|
||||
/// A function that emulates the stitches together behaviors of the preparation and the execution
|
||||
/// worker in a single synchronous function.
|
||||
pub fn validate_candidate(
|
||||
code: &[u8],
|
||||
params: &[u8],
|
||||
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
|
||||
use crate::executor_intf::{prevalidate, prepare, execute, TaskExecutor};
|
||||
|
||||
let blob = prevalidate(code)?;
|
||||
let artifact = prepare(blob)?;
|
||||
let executor = TaskExecutor::new()?;
|
||||
let result = execute(&artifact, params, executor)?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Use this macro to declare a `fn main() {}` that will check the arguments and dispatch them to
|
||||
/// the appropriate worker, making the executable that can be used for spawning workers.
|
||||
#[macro_export]
|
||||
macro_rules! decl_puppet_worker_main {
|
||||
() => {
|
||||
fn main() {
|
||||
let args = std::env::args().collect::<Vec<_>>();
|
||||
if args.len() < 2 {
|
||||
panic!("wrong number of arguments");
|
||||
}
|
||||
|
||||
let subcommand = &args[1];
|
||||
match subcommand.as_ref() {
|
||||
"sleep" => {
|
||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||
}
|
||||
"prepare-worker" => {
|
||||
let socket_path = &args[2];
|
||||
$crate::prepare_worker_entrypoint(socket_path);
|
||||
}
|
||||
"execute-worker" => {
|
||||
let socket_path = &args[2];
|
||||
$crate::execute_worker_entrypoint(socket_path);
|
||||
}
|
||||
other => panic!("unknown subcommand: {}", other),
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,294 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Common logic for implementation of worker processes.
|
||||
|
||||
use crate::LOG_TARGET;
|
||||
use async_std::{
|
||||
io,
|
||||
os::unix::net::{UnixListener, UnixStream},
|
||||
path::{PathBuf, Path},
|
||||
};
|
||||
use futures::{
|
||||
AsyncRead, AsyncWrite, AsyncReadExt as _, AsyncWriteExt as _, FutureExt as _, never::Never,
|
||||
};
|
||||
use futures_timer::Delay;
|
||||
use rand::Rng;
|
||||
use std::{
|
||||
fmt, mem,
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
use pin_project::pin_project;
|
||||
|
||||
/// This is publicly exposed only for integration tests.
|
||||
#[doc(hidden)]
|
||||
pub async fn spawn_with_program_path(
|
||||
debug_id: &'static str,
|
||||
program_path: impl Into<PathBuf>,
|
||||
extra_args: &'static [&'static str],
|
||||
spawn_timeout: Duration,
|
||||
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
|
||||
let program_path = program_path.into();
|
||||
with_transient_socket_path(debug_id, |socket_path| {
|
||||
let socket_path = socket_path.to_owned();
|
||||
async move {
|
||||
let listener = UnixListener::bind(&socket_path)
|
||||
.await
|
||||
.map_err(|_| SpawnErr::Bind)?;
|
||||
|
||||
let handle = WorkerHandle::spawn(program_path, extra_args, socket_path)
|
||||
.map_err(|_| SpawnErr::ProcessSpawn)?;
|
||||
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (stream, _) = accept_result.map_err(|_| SpawnErr::Accept)?;
|
||||
Ok((IdleWorker { stream, pid: handle.id() }, handle))
|
||||
}
|
||||
_ = Delay::new(spawn_timeout).fuse() => {
|
||||
Err(SpawnErr::AcceptTimeout)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn with_transient_socket_path<T, F, Fut>(debug_id: &'static str, f: F) -> Result<T, SpawnErr>
|
||||
where
|
||||
F: FnOnce(&Path) -> Fut,
|
||||
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
|
||||
{
|
||||
let socket_path = tmpfile(&format!("pvf-host-{}", debug_id))
|
||||
.await
|
||||
.map_err(|_| SpawnErr::TmpFile)?;
|
||||
let result = f(&socket_path).await;
|
||||
|
||||
// Best effort to remove the socket file. Under normal circumstances the socket will be removed
|
||||
// by the worker. We make sure that it is removed here, just in case a failed rendezvous.
|
||||
let _ = async_std::fs::remove_file(socket_path).await;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns a path under the location for temporary files. The file name will start with the given
|
||||
/// prefix.
|
||||
///
|
||||
/// There is only a certain number of retries. If exceeded this function will give up and return an
|
||||
/// error.
|
||||
pub async fn tmpfile(prefix: &str) -> io::Result<PathBuf> {
|
||||
fn tmppath(prefix: &str) -> PathBuf {
|
||||
use rand::distributions::Alphanumeric;
|
||||
|
||||
const DESCRIMINATOR_LEN: usize = 10;
|
||||
|
||||
let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
|
||||
buf.extend(prefix.as_bytes());
|
||||
buf.extend(
|
||||
rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(DESCRIMINATOR_LEN),
|
||||
);
|
||||
|
||||
let s = std::str::from_utf8(&buf)
|
||||
.expect("the string is collected from a valid utf-8 sequence; qed");
|
||||
|
||||
let mut temp_dir = PathBuf::from(std::env::temp_dir());
|
||||
temp_dir.push(s);
|
||||
temp_dir
|
||||
}
|
||||
|
||||
const NUM_RETRIES: usize = 50;
|
||||
|
||||
for _ in 0..NUM_RETRIES {
|
||||
let candidate_path = tmppath(prefix);
|
||||
if !candidate_path.exists().await {
|
||||
return Ok(candidate_path)
|
||||
}
|
||||
}
|
||||
|
||||
Err(
|
||||
io::Error::new(io::ErrorKind::Other, "failed to create a temporary file")
|
||||
)
|
||||
}
|
||||
|
||||
pub fn worker_event_loop<F, Fut>(debug_id: &'static str, socket_path: &str, mut event_loop: F)
|
||||
where
|
||||
F: FnMut(UnixStream) -> Fut,
|
||||
Fut: futures::Future<Output = io::Result<Never>>,
|
||||
{
|
||||
let err = async_std::task::block_on::<_, io::Result<Never>>(async move {
|
||||
let stream = UnixStream::connect(socket_path).await?;
|
||||
let _ = async_std::fs::remove_file(socket_path).await;
|
||||
|
||||
event_loop(stream).await
|
||||
})
|
||||
.unwrap_err(); // it's never `Ok` because it's `Ok(Never)`
|
||||
|
||||
tracing::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %std::process::id(),
|
||||
"pvf worker ({}): {:?}",
|
||||
debug_id,
|
||||
err,
|
||||
);
|
||||
}
|
||||
|
||||
/// A struct that represents an idle worker.
|
||||
///
|
||||
/// This struct is supposed to be used as a token that is passed by move into a subroutine that
|
||||
/// initiates a job. If the worker dies on the duty, then the token is not returned back.
|
||||
#[derive(Debug)]
|
||||
pub struct IdleWorker {
|
||||
/// The stream to which the child process is connected.
|
||||
pub stream: UnixStream,
|
||||
|
||||
/// The identifier of this process. Used to reset the niceness.
|
||||
pub pid: u32,
|
||||
}
|
||||
|
||||
/// An error happened during spawning a worker process.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SpawnErr {
|
||||
/// Cannot obtain a temporary file location.
|
||||
TmpFile,
|
||||
/// Cannot bind the socket to the given path.
|
||||
Bind,
|
||||
/// An error happened during accepting a connection to the socket.
|
||||
Accept,
|
||||
/// An error happened during spawning the process.
|
||||
ProcessSpawn,
|
||||
/// The deadline alloted for the worker spawning and connecting to the socket has elapsed.
|
||||
AcceptTimeout,
|
||||
}
|
||||
|
||||
/// This is a representation of a potentially running worker. Drop it and the process will be killed.
|
||||
///
|
||||
/// A worker's handle is also a future that resolves when it's detected that the worker's process
|
||||
/// has been terminated. Since the worker is running in another process it is obviously not necessarily
|
||||
/// to poll this future to make the worker run, it's only for termination detection.
|
||||
///
|
||||
/// This future relies on the fact that a child process's stdout fd is closed upon it's termination.
|
||||
#[pin_project]
|
||||
pub struct WorkerHandle {
|
||||
child: async_process::Child,
|
||||
#[pin]
|
||||
stdout: async_process::ChildStdout,
|
||||
drop_box: Box<[u8]>,
|
||||
}
|
||||
|
||||
impl WorkerHandle {
|
||||
fn spawn(
|
||||
program: impl AsRef<Path>,
|
||||
extra_args: &[&str],
|
||||
socket_path: impl AsRef<Path>,
|
||||
) -> io::Result<Self> {
|
||||
let mut child = async_process::Command::new(program.as_ref())
|
||||
.args(extra_args)
|
||||
.arg(socket_path.as_ref().as_os_str())
|
||||
.stdout(async_process::Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
|
||||
let stdout = child
|
||||
.stdout
|
||||
.take()
|
||||
.expect("the process spawned with piped stdout should have the stdout handle");
|
||||
|
||||
Ok(WorkerHandle {
|
||||
child,
|
||||
stdout,
|
||||
// We don't expect the bytes to be ever read. But in case we do, we should not use a buffer
|
||||
// of a small size, because otherwise if the child process does return any data we will end up
|
||||
// issuing a syscall for each byte. We also prefer not to do allocate that on the stack, since
|
||||
// each poll the buffer will be allocated and initialized (and that's due poll_read takes &mut [u8]
|
||||
// and there are no guarantees that a `poll_read` won't ever read from there even though that's
|
||||
// unlikely).
|
||||
//
|
||||
// OTOH, we also don't want to be super smart here and we could just afford to allocate a buffer
|
||||
// for that here.
|
||||
drop_box: vec![0; 8192].into_boxed_slice(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the process id of this worker.
|
||||
pub fn id(&self) -> u32 {
|
||||
self.child.id()
|
||||
}
|
||||
}
|
||||
|
||||
impl futures::Future for WorkerHandle {
|
||||
type Output = ();
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let me = self.project();
|
||||
match futures::ready!(AsyncRead::poll_read(me.stdout, cx, &mut *me.drop_box)) {
|
||||
Ok(0) => {
|
||||
// 0 means EOF means the child was terminated. Resolve.
|
||||
Poll::Ready(())
|
||||
}
|
||||
Ok(_bytes_read) => {
|
||||
// weird, we've read something. Pretend that never happened and reschedule ourselves.
|
||||
cx.waker().wake_by_ref();
|
||||
Poll::Pending
|
||||
}
|
||||
Err(_) => {
|
||||
// The implementation is guaranteed to not to return WouldBlock and Interrupted. This
|
||||
// leaves us with a legit errors which we suppose were due to termination.
|
||||
Poll::Ready(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for WorkerHandle {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "WorkerHandle(pid={})", self.id())
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the given path into a byte buffer.
|
||||
pub fn path_to_bytes(path: &Path) -> &[u8] {
|
||||
// Ideally, we take the OsStr of the path, send that and reconstruct this on the other side.
|
||||
// However, libstd doesn't provide us with such an option. There are crates out there that
|
||||
// allow for extraction of a path, but TBH it doesn't seem to be a real issue.
|
||||
//
|
||||
// However, should be there reports we can incorporate such a crate here.
|
||||
path.to_str().expect("non-UTF-8 path").as_bytes()
|
||||
}
|
||||
|
||||
/// Interprets the given bytes as a path. Returns `None` if the given bytes do not constitute a
|
||||
/// a proper utf-8 string.
|
||||
pub fn bytes_to_path(bytes: &[u8]) -> Option<PathBuf> {
|
||||
std::str::from_utf8(bytes).ok().map(PathBuf::from)
|
||||
}
|
||||
|
||||
pub async fn framed_send(w: &mut (impl AsyncWrite + Unpin), buf: &[u8]) -> io::Result<()> {
|
||||
let len_buf = buf.len().to_le_bytes();
|
||||
w.write_all(&len_buf).await?;
|
||||
w.write_all(buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>> {
|
||||
let mut len_buf = [0u8; mem::size_of::<usize>()];
|
||||
r.read_exact(&mut len_buf).await?;
|
||||
let len = usize::from_le_bytes(len_buf);
|
||||
let mut buf = vec![0; len];
|
||||
r.read_exact(&mut buf).await?;
|
||||
Ok(buf)
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::TestHost;
|
||||
use polkadot_parachain::{
|
||||
primitives::{
|
||||
RelayChainBlockNumber, BlockData as GenericBlockData, HeadData as GenericHeadData,
|
||||
ValidationParams,
|
||||
},
|
||||
};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use adder::{HeadData, BlockData, hash_state};
|
||||
|
||||
#[async_std::test]
|
||||
async fn execute_good_on_parent() {
|
||||
let parent_head = HeadData {
|
||||
number: 0,
|
||||
parent_hash: [0; 32],
|
||||
post_state: hash_state(0),
|
||||
};
|
||||
|
||||
let block_data = BlockData { state: 0, add: 512 };
|
||||
|
||||
let host = TestHost::new();
|
||||
|
||||
let ret = host
|
||||
.validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
|
||||
|
||||
assert_eq!(new_head.number, 1);
|
||||
assert_eq!(new_head.parent_hash, parent_head.hash());
|
||||
assert_eq!(new_head.post_state, hash_state(512));
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn execute_good_chain_on_parent() {
|
||||
let mut number = 0;
|
||||
let mut parent_hash = [0; 32];
|
||||
let mut last_state = 0;
|
||||
|
||||
let host = TestHost::new();
|
||||
|
||||
for add in 0..10 {
|
||||
let parent_head = HeadData {
|
||||
number,
|
||||
parent_hash,
|
||||
post_state: hash_state(last_state),
|
||||
};
|
||||
|
||||
let block_data = BlockData {
|
||||
state: last_state,
|
||||
add,
|
||||
};
|
||||
|
||||
let ret = host
|
||||
.validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: number as RelayChainBlockNumber + 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
|
||||
|
||||
assert_eq!(new_head.number, number + 1);
|
||||
assert_eq!(new_head.parent_hash, parent_head.hash());
|
||||
assert_eq!(new_head.post_state, hash_state(last_state + add));
|
||||
|
||||
number += 1;
|
||||
parent_hash = new_head.hash();
|
||||
last_state += add;
|
||||
}
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn execute_bad_on_parent() {
|
||||
let parent_head = HeadData {
|
||||
number: 0,
|
||||
parent_hash: [0; 32],
|
||||
post_state: hash_state(0),
|
||||
};
|
||||
|
||||
let block_data = BlockData {
|
||||
state: 256, // start state is wrong.
|
||||
add: 256,
|
||||
};
|
||||
|
||||
let host = TestHost::new();
|
||||
|
||||
let _ret = host
|
||||
.validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
}
|
||||
@@ -0,0 +1,152 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use polkadot_node_core_pvf::{Pvf, ValidationHost, start, Config, InvalidCandidate, ValidationError};
|
||||
use polkadot_parachain::{
|
||||
primitives::{BlockData, ValidationParams, ValidationResult},
|
||||
};
|
||||
use parity_scale_codec::Encode as _;
|
||||
use async_std::sync::Mutex;
|
||||
|
||||
mod adder;
|
||||
mod worker_common;
|
||||
|
||||
const PUPPET_EXE: &str = env!("CARGO_BIN_EXE_puppet_worker");
|
||||
|
||||
struct TestHost {
|
||||
_cache_dir: tempfile::TempDir,
|
||||
host: Mutex<ValidationHost>,
|
||||
}
|
||||
|
||||
impl TestHost {
|
||||
fn new() -> Self {
|
||||
Self::new_with_config(|_| ())
|
||||
}
|
||||
|
||||
fn new_with_config<F>(f: F) -> Self
|
||||
where
|
||||
F: FnOnce(&mut Config),
|
||||
{
|
||||
let cache_dir = tempfile::tempdir().unwrap();
|
||||
let program_path = std::path::PathBuf::from(PUPPET_EXE);
|
||||
let mut config = Config::new(cache_dir.path().to_owned(), program_path);
|
||||
f(&mut config);
|
||||
let (host, task) = start(config);
|
||||
let _ = async_std::task::spawn(task);
|
||||
Self {
|
||||
_cache_dir: cache_dir,
|
||||
host: Mutex::new(host),
|
||||
}
|
||||
}
|
||||
|
||||
async fn validate_candidate(
|
||||
&self,
|
||||
code: &[u8],
|
||||
params: ValidationParams,
|
||||
) -> Result<ValidationResult, ValidationError> {
|
||||
let (result_tx, result_rx) = futures::channel::oneshot::channel();
|
||||
self.host
|
||||
.lock()
|
||||
.await
|
||||
.execute_pvf(
|
||||
Pvf::from_code(code.to_vec()),
|
||||
params.encode(),
|
||||
polkadot_node_core_pvf::Priority::Normal,
|
||||
result_tx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
result_rx.await.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn terminates_on_timeout() {
|
||||
let host = TestHost::new();
|
||||
|
||||
let result = host
|
||||
.validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)) => {}
|
||||
r => panic!("{:?}", r),
|
||||
}
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn parallel_execution() {
|
||||
let host = TestHost::new();
|
||||
let execute_pvf_future_1 = host.validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
);
|
||||
let execute_pvf_future_2 = host.validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
);
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let (_, _) = futures::join!(execute_pvf_future_1, execute_pvf_future_2);
|
||||
|
||||
// total time should be < 2 x EXECUTION_TIMEOUT_SEC
|
||||
const EXECUTION_TIMEOUT_SEC: u64 = 3;
|
||||
assert!(
|
||||
std::time::Instant::now().duration_since(start)
|
||||
< std::time::Duration::from_secs(EXECUTION_TIMEOUT_SEC * 2)
|
||||
);
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn execute_queue_doesnt_stall_if_workers_died() {
|
||||
let host = TestHost::new_with_config(|cfg| {
|
||||
assert_eq!(cfg.execute_workers_max_num, 5);
|
||||
});
|
||||
|
||||
// Here we spawn 8 validation jobs for the `halt` PVF and share those between 5 workers. The
|
||||
// first five jobs should timeout and the workers killed. For the next 3 jobs a new batch of
|
||||
// workers should be spun up.
|
||||
futures::future::join_all((0u8..=8).map(|_| {
|
||||
host.validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
)
|
||||
}))
|
||||
.await;
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::PUPPET_EXE;
|
||||
use polkadot_node_core_pvf::testing::worker_common::{spawn_with_program_path, SpawnErr};
|
||||
use std::time::Duration;
|
||||
|
||||
#[async_std::test]
|
||||
async fn spawn_timeout() {
|
||||
let result = spawn_with_program_path(
|
||||
"integration-test",
|
||||
PUPPET_EXE,
|
||||
&["sleep"],
|
||||
Duration::from_secs(2),
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(result, Err(SpawnErr::AcceptTimeout)));
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn should_connect() {
|
||||
let _ = spawn_with_program_path(
|
||||
"integration-test",
|
||||
PUPPET_EXE,
|
||||
&["prepare-worker"],
|
||||
Duration::from_secs(2),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
@@ -31,6 +31,7 @@ use {
|
||||
polkadot_node_core_av_store::Config as AvailabilityConfig,
|
||||
polkadot_node_core_av_store::Error as AvailabilityError,
|
||||
polkadot_node_core_approval_voting::Config as ApprovalVotingConfig,
|
||||
polkadot_node_core_candidate_validation::Config as CandidateValidationConfig,
|
||||
polkadot_node_core_proposer::ProposerFactory,
|
||||
polkadot_overseer::{AllSubsystems, BlockInfo, Overseer, OverseerHandler},
|
||||
polkadot_primitives::v1::ParachainHost,
|
||||
@@ -60,7 +61,6 @@ use telemetry::{Telemetry, TelemetryWorker, TelemetryWorkerHandle};
|
||||
pub use self::client::{AbstractClient, Client, ClientHandle, ExecuteWithClient, RuntimeApiCollection};
|
||||
pub use chain_spec::{PolkadotChainSpec, KusamaChainSpec, WestendChainSpec, RococoChainSpec};
|
||||
pub use consensus_common::{Proposal, SelectChain, BlockImport, block_validation::Chain};
|
||||
pub use polkadot_parachain::wasm_executor::IsolationStrategy;
|
||||
pub use polkadot_primitives::v1::{Block, BlockId, CollatorPair, Hash, Id as ParaId};
|
||||
pub use sc_client_api::{Backend, ExecutionStrategy, CallExecutor};
|
||||
pub use sc_consensus::LongestChain;
|
||||
@@ -426,7 +426,7 @@ fn real_overseer<Spawner, RuntimeClient>(
|
||||
registry: Option<&Registry>,
|
||||
spawner: Spawner,
|
||||
is_collator: IsCollator,
|
||||
isolation_strategy: IsolationStrategy,
|
||||
candidate_validation_config: CandidateValidationConfig,
|
||||
) -> Result<(Overseer<Spawner, Arc<RuntimeClient>>, OverseerHandler), Error>
|
||||
where
|
||||
RuntimeClient: 'static + ProvideRuntimeApi<Block> + HeaderBackend<Block> + AuxStore,
|
||||
@@ -484,10 +484,9 @@ where
|
||||
keystore.clone(),
|
||||
Metrics::register(registry)?,
|
||||
),
|
||||
candidate_validation: CandidateValidationSubsystem::new(
|
||||
spawner.clone(),
|
||||
candidate_validation: CandidateValidationSubsystem::with_config(
|
||||
candidate_validation_config,
|
||||
Metrics::register(registry)?,
|
||||
isolation_strategy,
|
||||
),
|
||||
chain_api: ChainApiSubsystem::new(
|
||||
runtime_client.clone(),
|
||||
@@ -673,8 +672,8 @@ pub fn new_full<RuntimeApi, Executor>(
|
||||
is_collator: IsCollator,
|
||||
grandpa_pause: Option<(u32, u32)>,
|
||||
jaeger_agent: Option<std::net::SocketAddr>,
|
||||
isolation_strategy: IsolationStrategy,
|
||||
telemetry_worker_handle: Option<TelemetryWorkerHandle>,
|
||||
program_path: Option<std::path::PathBuf>,
|
||||
) -> Result<NewFull<Arc<FullClient<RuntimeApi, Executor>>>, Error>
|
||||
where
|
||||
RuntimeApi: ConstructRuntimeApi<Block, FullClient<RuntimeApi, Executor>> + Send + Sync + 'static,
|
||||
@@ -785,6 +784,17 @@ pub fn new_full<RuntimeApi, Executor>(
|
||||
slot_duration_millis: slot_duration.as_millis() as u64,
|
||||
};
|
||||
|
||||
let candidate_validation_config = CandidateValidationConfig {
|
||||
artifacts_cache_path: config.database
|
||||
.path()
|
||||
.ok_or(Error::DatabasePathRequired)?
|
||||
.join("pvf-artifacts"),
|
||||
program_path: match program_path {
|
||||
None => std::env::current_exe()?,
|
||||
Some(p) => p,
|
||||
},
|
||||
};
|
||||
|
||||
let chain_spec = config.chain_spec.cloned_box();
|
||||
let rpc_handlers = service::spawn_tasks(service::SpawnTasksParams {
|
||||
config,
|
||||
@@ -863,7 +873,7 @@ pub fn new_full<RuntimeApi, Executor>(
|
||||
prometheus_registry.as_ref(),
|
||||
spawner,
|
||||
is_collator,
|
||||
isolation_strategy,
|
||||
candidate_validation_config,
|
||||
)?;
|
||||
let overseer_handler_clone = overseer_handler.clone();
|
||||
|
||||
@@ -1216,27 +1226,14 @@ pub fn build_full(
|
||||
jaeger_agent: Option<std::net::SocketAddr>,
|
||||
telemetry_worker_handle: Option<TelemetryWorkerHandle>,
|
||||
) -> Result<NewFull<Client>, Error> {
|
||||
let isolation_strategy = {
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
{
|
||||
let cache_base_path = config.database.path();
|
||||
IsolationStrategy::external_process_with_caching(cache_base_path)
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "android", target_os = "unknown"))]
|
||||
{
|
||||
IsolationStrategy::InProcess
|
||||
}
|
||||
};
|
||||
|
||||
if config.chain_spec.is_rococo() {
|
||||
new_full::<rococo_runtime::RuntimeApi, RococoExecutor>(
|
||||
config,
|
||||
is_collator,
|
||||
grandpa_pause,
|
||||
jaeger_agent,
|
||||
isolation_strategy,
|
||||
telemetry_worker_handle,
|
||||
None,
|
||||
).map(|full| full.with_client(Client::Rococo))
|
||||
} else if config.chain_spec.is_kusama() {
|
||||
new_full::<kusama_runtime::RuntimeApi, KusamaExecutor>(
|
||||
@@ -1244,8 +1241,8 @@ pub fn build_full(
|
||||
is_collator,
|
||||
grandpa_pause,
|
||||
jaeger_agent,
|
||||
isolation_strategy,
|
||||
telemetry_worker_handle,
|
||||
None,
|
||||
).map(|full| full.with_client(Client::Kusama))
|
||||
} else if config.chain_spec.is_westend() {
|
||||
new_full::<westend_runtime::RuntimeApi, WestendExecutor>(
|
||||
@@ -1253,8 +1250,8 @@ pub fn build_full(
|
||||
is_collator,
|
||||
grandpa_pause,
|
||||
jaeger_agent,
|
||||
isolation_strategy,
|
||||
telemetry_worker_handle,
|
||||
None,
|
||||
).map(|full| full.with_client(Client::Westend))
|
||||
} else {
|
||||
new_full::<polkadot_runtime::RuntimeApi, PolkadotExecutor>(
|
||||
@@ -1262,8 +1259,8 @@ pub fn build_full(
|
||||
is_collator,
|
||||
grandpa_pause,
|
||||
jaeger_agent,
|
||||
isolation_strategy,
|
||||
telemetry_worker_handle,
|
||||
None,
|
||||
).map(|full| full.with_client(Client::Polkadot))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ use sp_blockchain::HeaderBackend;
|
||||
use sp_keyring::Sr25519Keyring;
|
||||
use sp_runtime::{codec::Encode, generic, traits::IdentifyAccount, MultiSigner};
|
||||
use sp_state_machine::BasicExternalities;
|
||||
use std::sync::Arc;
|
||||
use std::{sync::Arc, path::PathBuf};
|
||||
use substrate_test_client::{BlockchainEventsExt, RpcHandlersExt, RpcTransactionOutput, RpcTransactionError};
|
||||
|
||||
native_executor_instance!(
|
||||
@@ -73,6 +73,7 @@ pub use polkadot_service::FullBackend;
|
||||
pub fn new_full(
|
||||
config: Configuration,
|
||||
is_collator: IsCollator,
|
||||
worker_program_path: Option<PathBuf>,
|
||||
) -> Result<
|
||||
NewFull<Arc<Client>>,
|
||||
Error,
|
||||
@@ -82,8 +83,8 @@ pub fn new_full(
|
||||
is_collator,
|
||||
None,
|
||||
None,
|
||||
polkadot_parachain::wasm_executor::IsolationStrategy::InProcess,
|
||||
None,
|
||||
worker_program_path,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -214,11 +215,12 @@ pub fn run_validator_node(
|
||||
key: Sr25519Keyring,
|
||||
storage_update_func: impl Fn(),
|
||||
boot_nodes: Vec<MultiaddrWithPeerId>,
|
||||
worker_program_path: Option<PathBuf>,
|
||||
) -> PolkadotTestNode {
|
||||
let config = node_config(storage_update_func, task_executor, key, boot_nodes, true);
|
||||
let multiaddr = config.network.listen_addresses[0].clone();
|
||||
let NewFull { task_manager, client, network, rpc_handlers, overseer_handler, .. } =
|
||||
new_full(config, IsCollator::No).expect("could not create Polkadot test service");
|
||||
new_full(config, IsCollator::No, worker_program_path).expect("could not create Polkadot test service");
|
||||
|
||||
let overseer_handler = overseer_handler.expect("test node must have an overseer handler");
|
||||
let peer_id = network.local_peer_id().clone();
|
||||
@@ -261,7 +263,7 @@ pub fn run_collator_node(
|
||||
rpc_handlers,
|
||||
overseer_handler,
|
||||
..
|
||||
} = new_full(config, IsCollator::Yes(collator_pair))
|
||||
} = new_full(config, IsCollator::Yes(collator_pair), None)
|
||||
.expect("could not create Polkadot test service");
|
||||
|
||||
let overseer_handler = overseer_handler.expect("test node must have an overseer handler");
|
||||
|
||||
@@ -30,12 +30,14 @@ async fn ensure_test_service_build_blocks(task_executor: TaskExecutor) {
|
||||
Sr25519Keyring::Alice,
|
||||
|| {},
|
||||
Vec::new(),
|
||||
None,
|
||||
);
|
||||
let mut bob = run_validator_node(
|
||||
task_executor.clone(),
|
||||
Sr25519Keyring::Bob,
|
||||
|| {},
|
||||
vec![alice.addr.clone()],
|
||||
None,
|
||||
);
|
||||
|
||||
{
|
||||
|
||||
@@ -20,7 +20,7 @@ use sp_keyring::Sr25519Keyring::{Alice, Bob};
|
||||
|
||||
#[substrate_test_utils::test]
|
||||
async fn call_function_actually_work(task_executor: TaskExecutor) {
|
||||
let alice = run_validator_node(task_executor, Alice, || {}, Vec::new());
|
||||
let alice = run_validator_node(task_executor, Alice, || {}, Vec::new(), None);
|
||||
|
||||
let function = polkadot_test_runtime::Call::Balances(pallet_balances::Call::transfer(
|
||||
Default::default(),
|
||||
|
||||
@@ -14,47 +14,21 @@ parity-util-mem = { version = "0.9.0", optional = true }
|
||||
sp-std = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
|
||||
sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
|
||||
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
|
||||
sp-wasm-interface = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
|
||||
polkadot-core-primitives = { path = "../core-primitives", default-features = false }
|
||||
derive_more = "0.99.11"
|
||||
|
||||
# all optional crates.
|
||||
thiserror = { version = "1.0.22", optional = true }
|
||||
serde = { version = "1.0.117", default-features = false, features = [ "derive" ], optional = true }
|
||||
sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
|
||||
sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
|
||||
sp-io = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
|
||||
parking_lot = { version = "0.11.1", optional = true }
|
||||
log = { version = "0.4.11", optional = true }
|
||||
futures = { version = "0.3.8", optional = true }
|
||||
static_assertions = { version = "1.1", optional = true }
|
||||
libc = { version = "0.2.81", optional = true }
|
||||
|
||||
[target.'cfg(not(any(target_os = "android", target_os = "unknown")))'.dependencies]
|
||||
shared_memory = { version = "0.11.0", optional = true }
|
||||
raw_sync = { version = "0.1", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
wasmtime = [ "sc-executor/wasmtime" ]
|
||||
wasm-api = []
|
||||
std = [
|
||||
"parity-scale-codec/std",
|
||||
"thiserror",
|
||||
"serde/std",
|
||||
"sp-std/std",
|
||||
"sp-runtime/std",
|
||||
"shared_memory",
|
||||
"raw_sync",
|
||||
"sp-core/std",
|
||||
"parking_lot",
|
||||
"static_assertions",
|
||||
"log",
|
||||
"libc",
|
||||
"parity-util-mem",
|
||||
"sp-externalities",
|
||||
"sc-executor",
|
||||
"sp-io",
|
||||
"polkadot-core-primitives/std",
|
||||
"futures",
|
||||
]
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#![warn(unused_crate_dependencies)]
|
||||
|
||||
//! Defines primitive types for creating or validating a parachain.
|
||||
//!
|
||||
//! When compiled with standard library support, this crate exports a `wasm`
|
||||
@@ -43,8 +45,6 @@
|
||||
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub mod wasm_executor;
|
||||
pub mod primitives;
|
||||
|
||||
mod wasm_api;
|
||||
|
||||
@@ -1,401 +0,0 @@
|
||||
// Copyright 2017-2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! WASM re-execution of a parachain candidate.
|
||||
//! In the context of relay-chain candidate evaluation, there are some additional
|
||||
//! steps to ensure that the provided input parameters are correct.
|
||||
//! Assuming the parameters are correct, this module provides a wrapper around
|
||||
//! a WASM VM for re-execution of a parachain candidate.
|
||||
|
||||
use std::{any::{TypeId, Any}, path::{Path, PathBuf}};
|
||||
use crate::primitives::{ValidationParams, ValidationResult};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use sp_core::{storage::{ChildInfo, TrackedStorageKey}, traits::{CallInWasm, SpawnNamed}};
|
||||
use sp_externalities::Extensions;
|
||||
use sp_wasm_interface::HostFunctions as _;
|
||||
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
pub use validation_host::{run_worker, ValidationPool, EXECUTION_TIMEOUT_SEC, WORKER_ARGS};
|
||||
|
||||
mod validation_host;
|
||||
|
||||
/// The strategy we employ for isolating execution of wasm parachain validation function (PVF).
|
||||
///
|
||||
/// For a typical validator an external process is the default way to run PVF. The rationale is based
|
||||
/// on the following observations:
|
||||
///
|
||||
/// (a) PVF is completely under control of parachain developers who may or may not be malicious.
|
||||
/// (b) Collators are in charge of providing PoV who also may or may not be malicious.
|
||||
/// (c) PVF is executed by a wasm engine based on optimizing compiler which is a very complex piece
|
||||
/// of machinery.
|
||||
///
|
||||
/// (a) and (b) may lead to a situation where due to a combination of PVF and PoV the validation work
|
||||
/// can stuck in an infinite loop, which can open up resource exhaustion or DoS attack vectors.
|
||||
///
|
||||
/// While some execution engines provide functionality to interrupt execution of wasm module from
|
||||
/// another thread, there are also some caveats to that: there is no clean way to interrupt execution
|
||||
/// if the control flow is in the host side and at the moment we haven't rigoriously vetted that all
|
||||
/// host functions terminate or, at least, return in a short amount of time. Additionally, we want
|
||||
/// some freedom on choosing wasm execution environment.
|
||||
///
|
||||
/// On top of that, execution in a separate process helps to minimize impact of (c) if exploited.
|
||||
/// It's not only the risk of miscompilation, but it also includes risk of JIT-bombs, i.e. cases
|
||||
/// of specially crafted code that take enourmous amounts of time and memory to compile.
|
||||
///
|
||||
/// At the same time, since PVF validates self-contained candidates, validation workers don't require
|
||||
/// extensive communication with polkadot host, therefore there should be no observable performance penalty
|
||||
/// coming from inter process communication.
|
||||
///
|
||||
/// All of the above should give a sense why isolation is crucial for a typical use-case.
|
||||
///
|
||||
/// However, in some cases, e.g. when running PVF validation on android (for whatever reason), we
|
||||
/// cannot afford the luxury of process isolation and thus there is an option to run validation in
|
||||
/// process. Also, running in process is convenient for testing.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum IsolationStrategy {
|
||||
/// The validation worker is ran in a thread inside the same process.
|
||||
InProcess,
|
||||
/// The validation worker is ran using the process' executable and the subcommand `validation-worker` is passed
|
||||
/// following by the address of the shared memory.
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
ExternalProcessSelfHost {
|
||||
pool: ValidationPool,
|
||||
cache_base_path: Option<String>,
|
||||
},
|
||||
/// The validation worker is ran using the command provided and the argument provided. The address of the shared
|
||||
/// memory is added at the end of the arguments.
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
ExternalProcessCustomHost {
|
||||
/// Validation pool.
|
||||
pool: ValidationPool,
|
||||
/// Path to the validation worker. The file must exists and be executable.
|
||||
binary: PathBuf,
|
||||
/// List of arguments passed to the validation worker. The address of the shared memory will be automatically
|
||||
/// added after the arguments.
|
||||
args: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl IsolationStrategy {
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
pub fn external_process_with_caching(cache_base_path: Option<&Path>) -> Self {
|
||||
// Convert cache path to string here so that we don't have to do that each time we launch
|
||||
// validation worker.
|
||||
let cache_base_path = cache_base_path.map(|path| path.display().to_string());
|
||||
|
||||
Self::ExternalProcessSelfHost {
|
||||
pool: ValidationPool::new(),
|
||||
cache_base_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
/// Candidate validation error.
|
||||
pub enum ValidationError {
|
||||
/// Validation failed due to internal reasons. The candidate might still be valid.
|
||||
#[error(transparent)]
|
||||
Internal(#[from] InternalError),
|
||||
/// Candidate is invalid.
|
||||
#[error(transparent)]
|
||||
InvalidCandidate(#[from] InvalidCandidate),
|
||||
}
|
||||
|
||||
/// Error type that indicates invalid candidate.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum InvalidCandidate {
|
||||
/// Wasm executor error.
|
||||
#[error("WASM executor error")]
|
||||
WasmExecutor(#[from] sc_executor::error::Error),
|
||||
/// Call data is too large.
|
||||
#[error("Validation parameters are {0} bytes, max allowed is {1}")]
|
||||
ParamsTooLarge(usize, usize),
|
||||
/// Code size it too large.
|
||||
#[error("WASM code is {0} bytes, max allowed is {1}")]
|
||||
CodeTooLarge(usize, usize),
|
||||
/// Error decoding returned data.
|
||||
#[error("Validation function returned invalid data.")]
|
||||
BadReturn,
|
||||
#[error("Validation function timeout.")]
|
||||
Timeout,
|
||||
#[error("External WASM execution error: {0}")]
|
||||
ExternalWasmExecutor(String),
|
||||
}
|
||||
|
||||
impl core::convert::From<String> for InvalidCandidate {
|
||||
fn from(s: String) -> Self {
|
||||
Self::ExternalWasmExecutor(s)
|
||||
}
|
||||
}
|
||||
|
||||
/// Host error during candidate validation. This does not indicate an invalid candidate.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum InternalError {
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("System error: {0}")]
|
||||
System(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
#[error("Failed to create shared memory: {0}")]
|
||||
WorkerStartTimeout(String),
|
||||
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
#[error("Failed to create shared memory: {0}")]
|
||||
FailedToCreateSharedMemory(String),
|
||||
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
#[error("Failed to send a singal to worker: {0}")]
|
||||
FailedToSignal(String),
|
||||
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
#[error("Failed to send data to worker: {0}")]
|
||||
FailedToWriteData(&'static str),
|
||||
|
||||
#[error("WASM worker error: {0}")]
|
||||
WasmWorker(String),
|
||||
}
|
||||
|
||||
/// A cache of executors for different parachain Wasm instances.
|
||||
///
|
||||
/// This should be reused across candidate validation instances.
|
||||
pub struct ExecutorCache(sc_executor::WasmExecutor);
|
||||
|
||||
impl ExecutorCache {
|
||||
/// Returns a new instance of an executor cache.
|
||||
///
|
||||
/// `cache_base_path` allows to specify a directory where the executor is allowed to store files
|
||||
/// for caching, e.g. compilation artifacts.
|
||||
pub fn new(cache_base_path: Option<PathBuf>) -> ExecutorCache {
|
||||
ExecutorCache(sc_executor::WasmExecutor::new(
|
||||
#[cfg(all(feature = "wasmtime", not(any(target_os = "android", target_os = "unknown"))))]
|
||||
sc_executor::WasmExecutionMethod::Compiled,
|
||||
#[cfg(any(not(feature = "wasmtime"), target_os = "android", target_os = "unknown"))]
|
||||
sc_executor::WasmExecutionMethod::Interpreted,
|
||||
// TODO: Make sure we don't use more than 1GB: https://github.com/paritytech/polkadot/issues/699
|
||||
Some(1024),
|
||||
HostFunctions::host_functions(),
|
||||
8,
|
||||
cache_base_path,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate a candidate under the given validation code.
|
||||
///
|
||||
/// This will fail if the validation code is not a proper parachain validation module.
|
||||
pub fn validate_candidate(
|
||||
validation_code: &[u8],
|
||||
params: ValidationParams,
|
||||
isolation_strategy: &IsolationStrategy,
|
||||
spawner: impl SpawnNamed + 'static,
|
||||
) -> Result<ValidationResult, ValidationError> {
|
||||
match isolation_strategy {
|
||||
IsolationStrategy::InProcess => {
|
||||
validate_candidate_internal(
|
||||
&ExecutorCache::new(None),
|
||||
validation_code,
|
||||
¶ms.encode(),
|
||||
spawner,
|
||||
)
|
||||
},
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
IsolationStrategy::ExternalProcessSelfHost { pool, cache_base_path } => {
|
||||
pool.validate_candidate(validation_code, params, cache_base_path.as_deref())
|
||||
},
|
||||
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
IsolationStrategy::ExternalProcessCustomHost { pool, binary, args } => {
|
||||
let args: Vec<&str> = args.iter().map(|x| x.as_str()).collect();
|
||||
pool.validate_candidate_custom(validation_code, params, binary, &args)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// The host functions provided by the wasm executor to the parachain wasm blob.
|
||||
type HostFunctions = sp_io::SubstrateHostFunctions;
|
||||
|
||||
/// Validate a candidate under the given validation code.
|
||||
///
|
||||
/// This will fail if the validation code is not a proper parachain validation module.
|
||||
pub fn validate_candidate_internal(
|
||||
executor: &ExecutorCache,
|
||||
validation_code: &[u8],
|
||||
encoded_call_data: &[u8],
|
||||
spawner: impl SpawnNamed + 'static,
|
||||
) -> Result<ValidationResult, ValidationError> {
|
||||
let executor = &executor.0;
|
||||
|
||||
let mut extensions = Extensions::new();
|
||||
extensions.register(sp_core::traits::TaskExecutorExt::new(spawner));
|
||||
extensions.register(sp_core::traits::CallInWasmExt::new(executor.clone()));
|
||||
|
||||
let mut ext = ValidationExternalities(extensions);
|
||||
|
||||
// Expensive, but not more-so than recompiling the wasm module.
|
||||
// And we need this hash to access the `sc_executor` cache.
|
||||
let code_hash = {
|
||||
use polkadot_core_primitives::{BlakeTwo256, HashT};
|
||||
BlakeTwo256::hash(validation_code)
|
||||
};
|
||||
|
||||
let res = executor.call_in_wasm(
|
||||
validation_code,
|
||||
Some(code_hash.as_bytes().to_vec()),
|
||||
"validate_block",
|
||||
encoded_call_data,
|
||||
&mut ext,
|
||||
sp_core::traits::MissingHostFunctions::Allow,
|
||||
).map_err(|e| ValidationError::InvalidCandidate(e.into()))?;
|
||||
|
||||
ValidationResult::decode(&mut &res[..])
|
||||
.map_err(|_| ValidationError::InvalidCandidate(InvalidCandidate::BadReturn).into())
|
||||
}
|
||||
|
||||
/// The validation externalities that will panic on any storage related access. They just provide
|
||||
/// access to the parachain extension.
|
||||
struct ValidationExternalities(Extensions);
|
||||
|
||||
impl sp_externalities::Externalities for ValidationExternalities {
|
||||
fn storage(&self, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_hash(&self, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("storage_hash: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn child_storage_hash(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("child_storage_hash: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn child_storage(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("child_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn kill_child_storage(&mut self, _: &ChildInfo, _: Option<u32>) -> (bool, u32) {
|
||||
panic!("kill_child_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn clear_prefix(&mut self, _: &[u8]) {
|
||||
panic!("clear_prefix: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn clear_child_prefix(&mut self, _: &ChildInfo, _: &[u8]) {
|
||||
panic!("clear_child_prefix: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) {
|
||||
panic!("place_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) {
|
||||
panic!("place_child_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_root(&mut self) -> Vec<u8> {
|
||||
panic!("storage_root: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn child_storage_root(&mut self, _: &ChildInfo) -> Vec<u8> {
|
||||
panic!("child_storage_root: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_changes_root(&mut self, _: &[u8]) -> Result<Option<Vec<u8>>, ()> {
|
||||
panic!("storage_changes_root: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn next_child_storage_key(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("next_child_storage_key: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn next_storage_key(&self, _: &[u8]) -> Option<Vec<u8>> {
|
||||
panic!("next_storage_key: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_append(
|
||||
&mut self,
|
||||
_key: Vec<u8>,
|
||||
_value: Vec<u8>,
|
||||
) {
|
||||
panic!("storage_append: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_start_transaction(&mut self) {
|
||||
panic!("storage_start_transaction: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_rollback_transaction(&mut self) -> Result<(), ()> {
|
||||
panic!("storage_rollback_transaction: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn storage_commit_transaction(&mut self) -> Result<(), ()> {
|
||||
panic!("storage_commit_transaction: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn wipe(&mut self) {
|
||||
panic!("wipe: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn commit(&mut self) {
|
||||
panic!("commit: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn read_write_count(&self) -> (u32, u32, u32, u32) {
|
||||
panic!("read_write_count: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn reset_read_write_count(&mut self) {
|
||||
panic!("reset_read_write_count: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn get_whitelist(&self) -> Vec<TrackedStorageKey> {
|
||||
panic!("get_whitelist: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) {
|
||||
panic!("set_whitelist: unsupported feature for parachain validation")
|
||||
}
|
||||
|
||||
fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) {
|
||||
panic!("set_offchain_storage: unsupported feature for parachain validation")
|
||||
}
|
||||
}
|
||||
|
||||
impl sp_externalities::ExtensionStore for ValidationExternalities {
|
||||
fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> {
|
||||
self.0.get_mut(type_id)
|
||||
}
|
||||
|
||||
fn register_extension_with_type_id(
|
||||
&mut self,
|
||||
type_id: TypeId,
|
||||
extension: Box<dyn sp_externalities::Extension>,
|
||||
) -> Result<(), sp_externalities::Error> {
|
||||
self.0.register_with_type_id(type_id, extension)
|
||||
}
|
||||
|
||||
fn deregister_extension_by_type_id(
|
||||
&mut self,
|
||||
type_id: TypeId,
|
||||
) -> Result<(), sp_externalities::Error> {
|
||||
if self.0.deregister(type_id) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,357 +0,0 @@
|
||||
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#![cfg(not(any(target_os = "android", target_os = "unknown")))]
|
||||
|
||||
use std::{env, path::PathBuf, process, sync::Arc, sync::atomic};
|
||||
use crate::primitives::{ValidationParams, ValidationResult};
|
||||
use super::{validate_candidate_internal, ValidationError, InvalidCandidate, InternalError};
|
||||
use parking_lot::Mutex;
|
||||
use log::{debug, trace};
|
||||
use futures::executor::ThreadPool;
|
||||
use sp_core::traits::SpawnNamed;
|
||||
|
||||
const WORKER_ARG: &'static str = "validation-worker";
|
||||
/// CLI Argument to start in validation worker mode.
|
||||
pub const WORKER_ARGS: &[&'static str] = &[WORKER_ARG];
|
||||
|
||||
const LOG_TARGET: &'static str = "parachain::validation-worker";
|
||||
|
||||
mod workspace;
|
||||
|
||||
/// Execution timeout in seconds;
|
||||
#[cfg(debug_assertions)]
|
||||
pub const EXECUTION_TIMEOUT_SEC: u64 = 30;
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
pub const EXECUTION_TIMEOUT_SEC: u64 = 5;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TaskExecutor(ThreadPool);
|
||||
|
||||
impl TaskExecutor {
|
||||
fn new() -> Result<Self, String> {
|
||||
ThreadPool::new().map_err(|e| e.to_string()).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl SpawnNamed for TaskExecutor {
|
||||
fn spawn_blocking(&self, _: &'static str, future: futures::future::BoxFuture<'static, ()>) {
|
||||
self.0.spawn_ok(future);
|
||||
}
|
||||
|
||||
fn spawn(&self, _: &'static str, future: futures::future::BoxFuture<'static, ()>) {
|
||||
self.0.spawn_ok(future);
|
||||
}
|
||||
}
|
||||
|
||||
/// A pool of hosts.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ValidationPool {
|
||||
hosts: Arc<Vec<Mutex<ValidationHost>>>,
|
||||
}
|
||||
|
||||
const DEFAULT_NUM_HOSTS: usize = 8;
|
||||
|
||||
impl ValidationPool {
|
||||
/// Creates a validation pool with the default configuration.
|
||||
pub fn new() -> ValidationPool {
|
||||
ValidationPool {
|
||||
hosts: Arc::new((0..DEFAULT_NUM_HOSTS).map(|_| Default::default()).collect()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate a candidate under the given validation code using the next free validation host.
|
||||
///
|
||||
/// This will fail if the validation code is not a proper parachain validation module.
|
||||
///
|
||||
/// This function will use `std::env::current_exe()` with the arguments that consist of [`WORKER_ARGS`]
|
||||
/// with appended `cache_base_path` (if any).
|
||||
pub fn validate_candidate(
|
||||
&self,
|
||||
validation_code: &[u8],
|
||||
params: ValidationParams,
|
||||
cache_base_path: Option<&str>,
|
||||
) -> Result<ValidationResult, ValidationError> {
|
||||
use std::{iter, borrow::Cow};
|
||||
|
||||
let worker_cli_args = match cache_base_path {
|
||||
Some(cache_base_path) => {
|
||||
let worker_cli_args: Vec<&str> = WORKER_ARGS
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.chain(iter::once(cache_base_path))
|
||||
.collect();
|
||||
Cow::from(worker_cli_args)
|
||||
}
|
||||
None => Cow::from(WORKER_ARGS),
|
||||
};
|
||||
|
||||
self.validate_candidate_custom(
|
||||
validation_code,
|
||||
params,
|
||||
&env::current_exe().map_err(|err| ValidationError::Internal(err.into()))?,
|
||||
&worker_cli_args,
|
||||
)
|
||||
}
|
||||
|
||||
/// Validate a candidate under the given validation code using the next free validation host.
|
||||
///
|
||||
/// This will fail if the validation code is not a proper parachain validation module.
|
||||
///
|
||||
/// This function will use the command and the arguments provided in the function's arguments to run the worker.
|
||||
pub fn validate_candidate_custom(
|
||||
&self,
|
||||
validation_code: &[u8],
|
||||
params: ValidationParams,
|
||||
command: &PathBuf,
|
||||
args: &[&str],
|
||||
) -> Result<ValidationResult, ValidationError> {
|
||||
for host in self.hosts.iter() {
|
||||
if let Some(mut host) = host.try_lock() {
|
||||
return host.validate_candidate(validation_code, params, command, args);
|
||||
}
|
||||
}
|
||||
|
||||
// all workers are busy, just wait for the first one
|
||||
self.hosts[0]
|
||||
.lock()
|
||||
.validate_candidate(validation_code, params, command, args)
|
||||
}
|
||||
}
|
||||
|
||||
/// Validation worker process entry point. Runs a loop waiting for candidates to validate
|
||||
/// and sends back results via shared memory.
|
||||
pub fn run_worker(mem_id: &str, cache_base_path: Option<PathBuf>) -> Result<(), String> {
|
||||
let mut worker_handle = match workspace::open(mem_id) {
|
||||
Err(e) => {
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} Error opening shared memory: {:?}",
|
||||
process::id(),
|
||||
e
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
Ok(h) => h,
|
||||
};
|
||||
|
||||
let exit = Arc::new(atomic::AtomicBool::new(false));
|
||||
let task_executor = TaskExecutor::new()?;
|
||||
// spawn parent monitor thread
|
||||
let watch_exit = exit.clone();
|
||||
std::thread::spawn(move || {
|
||||
use std::io::Read;
|
||||
let mut in_data = Vec::new();
|
||||
// pipe terminates when parent process exits
|
||||
std::io::stdin().read_to_end(&mut in_data).ok();
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} Parent process is dead. Exiting",
|
||||
process::id()
|
||||
);
|
||||
exit.store(true, atomic::Ordering::Relaxed);
|
||||
});
|
||||
|
||||
worker_handle.signal_ready()?;
|
||||
|
||||
let executor = super::ExecutorCache::new(cache_base_path);
|
||||
|
||||
loop {
|
||||
if watch_exit.load(atomic::Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} Waiting for candidate",
|
||||
process::id()
|
||||
);
|
||||
let work_item = match worker_handle.wait_for_work(3) {
|
||||
Err(workspace::WaitForWorkErr::Wait(e)) => {
|
||||
trace!(
|
||||
target: LOG_TARGET,
|
||||
"{} Timeout waiting for candidate: {:?}",
|
||||
process::id(),
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
Err(workspace::WaitForWorkErr::FailedToDecode(e)) => {
|
||||
return Err(e);
|
||||
}
|
||||
Ok(work_item) => work_item,
|
||||
};
|
||||
|
||||
debug!(target: LOG_TARGET, "{} Processing candidate", process::id());
|
||||
let result = validate_candidate_internal(
|
||||
&executor,
|
||||
work_item.code,
|
||||
work_item.params,
|
||||
task_executor.clone(),
|
||||
);
|
||||
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} Candidate validated: {:?}",
|
||||
process::id(),
|
||||
result
|
||||
);
|
||||
|
||||
let result_header = match result {
|
||||
Ok(r) => workspace::ValidationResultHeader::Ok(r),
|
||||
Err(ValidationError::Internal(e)) => workspace::ValidationResultHeader::Error(
|
||||
workspace::WorkerValidationError::InternalError(e.to_string()),
|
||||
),
|
||||
Err(ValidationError::InvalidCandidate(e)) => workspace::ValidationResultHeader::Error(
|
||||
workspace::WorkerValidationError::ValidationError(e.to_string()),
|
||||
),
|
||||
};
|
||||
worker_handle
|
||||
.report_result(result_header)
|
||||
.map_err(|e| format!("error reporting result: {:?}", e))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe impl Send for ValidationHost {}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct ValidationHost {
|
||||
worker: Option<process::Child>,
|
||||
host_handle: Option<workspace::HostHandle>,
|
||||
id: u32,
|
||||
}
|
||||
|
||||
impl Drop for ValidationHost {
|
||||
fn drop(&mut self) {
|
||||
if let Some(ref mut worker) = &mut self.worker {
|
||||
worker.kill().ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ValidationHost {
|
||||
fn start_worker(&mut self, cmd: &PathBuf, args: &[&str]) -> Result<(), InternalError> {
|
||||
if let Some(ref mut worker) = self.worker {
|
||||
// Check if still alive
|
||||
if let Ok(None) = worker.try_wait() {
|
||||
// Still running
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
let host_handle =
|
||||
workspace::create().map_err(|msg| InternalError::FailedToCreateSharedMemory(msg))?;
|
||||
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"Starting worker at {:?} with arguments: {:?} and {:?}",
|
||||
cmd,
|
||||
args,
|
||||
host_handle.id(),
|
||||
);
|
||||
let worker = process::Command::new(cmd)
|
||||
.args(args)
|
||||
.arg(host_handle.id())
|
||||
.stdin(process::Stdio::piped())
|
||||
.spawn()?;
|
||||
self.id = worker.id();
|
||||
self.worker = Some(worker);
|
||||
|
||||
host_handle
|
||||
.wait_until_ready(EXECUTION_TIMEOUT_SEC)
|
||||
.map_err(|e| InternalError::WorkerStartTimeout(format!("{:?}", e)))?;
|
||||
self.host_handle = Some(host_handle);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate a candidate under the given validation code.
|
||||
///
|
||||
/// This will fail if the validation code is not a proper parachain validation module.
|
||||
pub fn validate_candidate(
|
||||
&mut self,
|
||||
validation_code: &[u8],
|
||||
params: ValidationParams,
|
||||
binary: &PathBuf,
|
||||
args: &[&str],
|
||||
) -> Result<ValidationResult, ValidationError> {
|
||||
// First, check if need to spawn the child process
|
||||
self.start_worker(binary, args)?;
|
||||
|
||||
let host_handle = self
|
||||
.host_handle
|
||||
.as_mut()
|
||||
.expect("host_handle is always `Some` after `start_worker` completes successfully");
|
||||
|
||||
debug!(target: LOG_TARGET, "{} Signaling candidate", self.id);
|
||||
match host_handle.request_validation(validation_code, params) {
|
||||
Ok(()) => {}
|
||||
Err(workspace::RequestValidationErr::CodeTooLarge { actual, max }) => {
|
||||
return Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::CodeTooLarge(actual, max),
|
||||
));
|
||||
}
|
||||
Err(workspace::RequestValidationErr::ParamsTooLarge { actual, max }) => {
|
||||
return Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::ParamsTooLarge(actual, max),
|
||||
));
|
||||
}
|
||||
Err(workspace::RequestValidationErr::Signal(msg)) => {
|
||||
return Err(ValidationError::Internal(InternalError::FailedToSignal(msg)));
|
||||
}
|
||||
Err(workspace::RequestValidationErr::WriteData(msg)) => {
|
||||
return Err(ValidationError::Internal(InternalError::FailedToWriteData(msg)));
|
||||
}
|
||||
}
|
||||
|
||||
debug!(target: LOG_TARGET, "{} Waiting for results", self.id);
|
||||
let result_header = match host_handle.wait_for_result(EXECUTION_TIMEOUT_SEC) {
|
||||
Ok(inner_result) => inner_result,
|
||||
Err(assumed_timeout) => {
|
||||
debug!(target: LOG_TARGET, "Worker timeout: {:?}", assumed_timeout);
|
||||
if let Some(mut worker) = self.worker.take() {
|
||||
worker.kill().ok();
|
||||
}
|
||||
return Err(ValidationError::InvalidCandidate(InvalidCandidate::Timeout));
|
||||
}
|
||||
};
|
||||
|
||||
match result_header {
|
||||
workspace::ValidationResultHeader::Ok(result) => Ok(result),
|
||||
workspace::ValidationResultHeader::Error(
|
||||
workspace::WorkerValidationError::InternalError(e),
|
||||
) => {
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} Internal validation error: {}", self.id, e
|
||||
);
|
||||
Err(ValidationError::Internal(InternalError::WasmWorker(e)))
|
||||
}
|
||||
workspace::ValidationResultHeader::Error(
|
||||
workspace::WorkerValidationError::ValidationError(e),
|
||||
) => {
|
||||
debug!(
|
||||
target: LOG_TARGET,
|
||||
"{} External validation error: {}", self.id, e
|
||||
);
|
||||
Err(ValidationError::InvalidCandidate(
|
||||
InvalidCandidate::ExternalWasmExecutor(e),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,614 +0,0 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! This module implements a "workspace" - basically a wrapper around a shared memory that
|
||||
//! is used as an IPC channel for communication between the validation host and it's validation
|
||||
//! worker.
|
||||
|
||||
use crate::primitives::{ValidationParams, ValidationResult};
|
||||
use super::LOG_TARGET;
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use raw_sync::{
|
||||
events::{Event, EventImpl, EventInit, EventState},
|
||||
Timeout,
|
||||
};
|
||||
use shared_memory::{Shmem, ShmemConf};
|
||||
use std::{
|
||||
error::Error,
|
||||
fmt,
|
||||
io::{Cursor, Write},
|
||||
slice,
|
||||
sync::atomic::AtomicBool,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
// maximum memory in bytes
|
||||
const MAX_PARAMS_MEM: usize = 16 * 1024 * 1024; // 16 MiB
|
||||
const MAX_CODE_MEM: usize = 16 * 1024 * 1024; // 16 MiB
|
||||
|
||||
/// The size of the shared workspace region. The maximum amount
|
||||
const SHARED_WORKSPACE_SIZE: usize = MAX_PARAMS_MEM + MAX_CODE_MEM + (1024 * 1024);
|
||||
|
||||
/// Params header in shared memory. All offsets should be aligned to WASM page size.
|
||||
#[derive(Encode, Decode, Debug)]
|
||||
struct ValidationHeader {
|
||||
code_size: u64,
|
||||
params_size: u64,
|
||||
}
|
||||
|
||||
/// An error that could happen during validation of a candidate.
|
||||
#[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)]
|
||||
pub enum WorkerValidationError {
|
||||
InternalError(String),
|
||||
ValidationError(String),
|
||||
}
|
||||
|
||||
/// An enum that is used to marshal a validation result in order to pass it through the shared memory.
|
||||
#[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)]
|
||||
pub enum ValidationResultHeader {
|
||||
Ok(ValidationResult),
|
||||
Error(WorkerValidationError),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
||||
enum Mode {
|
||||
Initialize,
|
||||
Attach,
|
||||
}
|
||||
|
||||
fn stringify_err(err: Box<dyn Error>) -> String {
|
||||
format!("{:?}", err)
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
shmem: Shmem,
|
||||
candidate_ready_ev: Box<dyn EventImpl>,
|
||||
result_ready_ev: Box<dyn EventImpl>,
|
||||
worker_ready_ev: Box<dyn EventImpl>,
|
||||
|
||||
/// Flag that indicates that the worker side is attached to this workspace.
|
||||
///
|
||||
/// While there are apparent problems attaching multiple workers to the same workspace, we don't
|
||||
/// need that anyway. So to make our reasoning a little bit simpler just add a flag and check
|
||||
/// it before attaching.
|
||||
attached: *mut AtomicBool,
|
||||
|
||||
/// The number of bytes reserved by the auxilary stuff like events from the beginning of the
|
||||
/// shared memory area.
|
||||
///
|
||||
/// We expect this to be way smaller than the whole shmem size.
|
||||
consumed: usize,
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
fn layout(shmem: Shmem, mode: Mode) -> Self {
|
||||
unsafe {
|
||||
let base_ptr = shmem.as_ptr();
|
||||
let mut consumed = 0;
|
||||
|
||||
let candidate_ready_ev = add_event(base_ptr, &mut consumed, mode);
|
||||
let result_ready_ev = add_event(base_ptr, &mut consumed, mode);
|
||||
let worker_ready_ev = add_event(base_ptr, &mut consumed, mode);
|
||||
|
||||
// The size of AtomicBool is guaranteed to be the same as the bool, however, docs
|
||||
// on the bool primitve doesn't actually state that the in-memory size is equal to 1 byte.
|
||||
//
|
||||
// AtomicBool requires hardware support of 1 byte width of atomic operations though, so
|
||||
// that should be fine.
|
||||
//
|
||||
// We still assert here to be safe than sorry.
|
||||
static_assertions::assert_eq_size!(AtomicBool, u8);
|
||||
// SAFETY: `AtomicBool` is represented by an u8 thus will be happy to take any alignment.
|
||||
let attached = base_ptr.add(consumed) as *mut AtomicBool;
|
||||
consumed += 1;
|
||||
|
||||
let consumed = align_up_to(consumed, 64);
|
||||
|
||||
Self {
|
||||
shmem,
|
||||
attached,
|
||||
consumed,
|
||||
candidate_ready_ev,
|
||||
result_ready_ev,
|
||||
worker_ready_ev,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn as_slice(&self) -> &[u8] {
|
||||
unsafe {
|
||||
let base_ptr = self.shmem.as_ptr().add(self.consumed);
|
||||
let remaining = self.shmem.len() - self.consumed;
|
||||
slice::from_raw_parts(base_ptr, remaining)
|
||||
}
|
||||
}
|
||||
|
||||
fn as_slice_mut(&mut self) -> &mut [u8] {
|
||||
unsafe {
|
||||
let base_ptr = self.shmem.as_ptr().add(self.consumed);
|
||||
let remaining = self.shmem.len() - self.consumed;
|
||||
slice::from_raw_parts_mut(base_ptr, remaining)
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark that this workspace has an attached worker already. Returning `true` means that this
|
||||
/// was the first worker attached.
|
||||
fn declare_exclusive_attached(&self) -> bool {
|
||||
unsafe {
|
||||
// If this succeeded then the value was `false`, thus, we managed to attach exclusively.
|
||||
(&*self.attached)
|
||||
.compare_exchange_weak(
|
||||
false,
|
||||
true,
|
||||
std::sync::atomic::Ordering::SeqCst,
|
||||
std::sync::atomic::Ordering::SeqCst,
|
||||
)
|
||||
.is_ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn align_up_to(v: usize, alignment: usize) -> usize {
|
||||
((v + alignment - 1) / alignment) * alignment
|
||||
}
|
||||
|
||||
/// Initializes a new or attaches to an exising event.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This function should be called with the combination of `base_ptr` and `consumed` so that `base_ptr + consumed`
|
||||
/// points on the memory area that is allocated and accessible.
|
||||
///
|
||||
/// This function should be called only once for the same combination of the `base_ptr + consumed` and the mode.
|
||||
/// Furthermore, this function should be called once for initialization.
|
||||
///
|
||||
/// Specifically, `consumed` should not be modified by the caller, it should be passed as is to this function.
|
||||
unsafe fn add_event(base_ptr: *mut u8, consumed: &mut usize, mode: Mode) -> Box<dyn EventImpl> {
|
||||
// SAFETY: there is no safety proof since the documentation doesn't specify the particular constraints
|
||||
// besides requiring the pointer to be valid. AFAICT, the pointer is valid.
|
||||
let ptr = base_ptr.add(*consumed);
|
||||
|
||||
const EXPECTATION: &str =
|
||||
"given that the preconditions were fulfilled, the creation of the event should succeed";
|
||||
let (ev, used_bytes) = match mode {
|
||||
Mode::Initialize => Event::new(ptr, true).expect(EXPECTATION),
|
||||
Mode::Attach => Event::from_existing(ptr).expect(EXPECTATION),
|
||||
};
|
||||
*consumed += used_bytes;
|
||||
ev
|
||||
}
|
||||
|
||||
/// A message received by the worker that specifies a candidate validation work.
|
||||
pub struct WorkItem<'handle> {
|
||||
pub params: &'handle [u8],
|
||||
pub code: &'handle [u8],
|
||||
}
|
||||
|
||||
/// An error that could be returned from [`WorkerHandle::wait_for_work`].
|
||||
#[derive(Debug)]
|
||||
pub enum WaitForWorkErr {
|
||||
/// An error occured during waiting for work. Typically a timeout.
|
||||
Wait(String),
|
||||
/// An error ocurred when trying to decode the validation request from the host.
|
||||
FailedToDecode(String),
|
||||
}
|
||||
|
||||
/// An error that could be returned from [`WorkerHandle::report_result`].
|
||||
#[derive(Debug)]
|
||||
pub enum ReportResultErr {
|
||||
/// An error occured during signalling to the host that the result is ready.
|
||||
Signal(String),
|
||||
}
|
||||
|
||||
/// A worker side handle to a workspace.
|
||||
pub struct WorkerHandle {
|
||||
inner: Inner,
|
||||
}
|
||||
|
||||
impl WorkerHandle {
|
||||
/// Signals to the validation host that this worker is ready to accept new work requests.
|
||||
pub fn signal_ready(&self) -> Result<(), String> {
|
||||
self.inner
|
||||
.worker_ready_ev
|
||||
.set(EventState::Signaled)
|
||||
.map_err(stringify_err)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Waits until a new piece of work. Returns `Err` if the work doesn't come within the given
|
||||
/// timeout.
|
||||
pub fn wait_for_work(&mut self, timeout_secs: u64) -> Result<WorkItem, WaitForWorkErr> {
|
||||
self.inner
|
||||
.candidate_ready_ev
|
||||
.wait(Timeout::Val(Duration::from_secs(timeout_secs)))
|
||||
.map_err(stringify_err)
|
||||
.map_err(WaitForWorkErr::Wait)?;
|
||||
|
||||
let mut cur = self.inner.as_slice();
|
||||
let header = ValidationHeader::decode(&mut cur)
|
||||
.map_err(|e| format!("{:?}", e))
|
||||
.map_err(WaitForWorkErr::FailedToDecode)?;
|
||||
|
||||
let (params, cur) = cur.split_at(header.params_size as usize);
|
||||
let (code, _) = cur.split_at(header.code_size as usize);
|
||||
|
||||
Ok(WorkItem { params, code })
|
||||
}
|
||||
|
||||
/// Report back the result of validation.
|
||||
pub fn report_result(&mut self, result: ValidationResultHeader) -> Result<(), ReportResultErr> {
|
||||
let mut cur = self.inner.as_slice_mut();
|
||||
result.encode_to(&mut cur);
|
||||
self.inner
|
||||
.result_ready_ev
|
||||
.set(EventState::Signaled)
|
||||
.map_err(stringify_err)
|
||||
.map_err(ReportResultErr::Signal)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that could be returned from [`HostHandle::wait_until_ready`].
|
||||
#[derive(Debug)]
|
||||
pub enum WaitUntilReadyErr {
|
||||
/// An error occured during waiting for the signal from the worker.
|
||||
Wait(String),
|
||||
}
|
||||
|
||||
/// An error that could be returned from [`HostHandle::request_validation`].
|
||||
#[derive(Debug)]
|
||||
pub enum RequestValidationErr {
|
||||
/// The code passed exceeds the maximum allowed limit.
|
||||
CodeTooLarge { actual: usize, max: usize },
|
||||
/// The call parameters exceed the maximum allowed limit.
|
||||
ParamsTooLarge { actual: usize, max: usize },
|
||||
/// An error occured during writing either the code or the call params (the inner string specifies which)
|
||||
WriteData(&'static str),
|
||||
/// An error occured during signalling that the request is ready.
|
||||
Signal(String),
|
||||
}
|
||||
|
||||
/// An error that could be returned from [`HostHandle::wait_for_result`]
|
||||
#[derive(Debug)]
|
||||
pub enum WaitForResultErr {
|
||||
/// A error happened during waiting for the signal. Typically a timeout.
|
||||
Wait(String),
|
||||
/// Failed to decode the result header sent by the worker.
|
||||
HeaderDecodeErr(String),
|
||||
}
|
||||
|
||||
/// A worker side handle to a workspace.
|
||||
pub struct HostHandle {
|
||||
inner: Inner,
|
||||
}
|
||||
|
||||
impl fmt::Debug for HostHandle {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "HostHandle")
|
||||
}
|
||||
}
|
||||
|
||||
impl HostHandle {
|
||||
/// Returns the OS specific ID for this workspace.
|
||||
pub fn id(&self) -> &str {
|
||||
self.inner.shmem.get_os_id()
|
||||
}
|
||||
|
||||
/// Wait until the worker is online and ready for accepting validation requests.
|
||||
pub fn wait_until_ready(&self, timeout_secs: u64) -> Result<(), WaitUntilReadyErr> {
|
||||
self.inner
|
||||
.worker_ready_ev
|
||||
.wait(Timeout::Val(Duration::from_secs(timeout_secs)))
|
||||
.map_err(stringify_err)
|
||||
.map_err(WaitUntilReadyErr::Wait)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Request validation with the given code and parameters.
|
||||
pub fn request_validation(
|
||||
&mut self,
|
||||
code: &[u8],
|
||||
params: ValidationParams,
|
||||
) -> Result<(), RequestValidationErr> {
|
||||
if code.len() > MAX_CODE_MEM {
|
||||
return Err(RequestValidationErr::CodeTooLarge {
|
||||
actual: code.len(),
|
||||
max: MAX_CODE_MEM,
|
||||
});
|
||||
}
|
||||
|
||||
let params = params.encode();
|
||||
if params.len() > MAX_PARAMS_MEM {
|
||||
return Err(RequestValidationErr::ParamsTooLarge {
|
||||
actual: params.len(),
|
||||
max: MAX_PARAMS_MEM,
|
||||
});
|
||||
}
|
||||
|
||||
let mut cur = Cursor::new(self.inner.as_slice_mut());
|
||||
ValidationHeader {
|
||||
code_size: code.len() as u64,
|
||||
params_size: params.len() as u64,
|
||||
}
|
||||
.encode_to(&mut cur);
|
||||
cur.write_all(¶ms)
|
||||
.map_err(|_| RequestValidationErr::WriteData("params"))?;
|
||||
cur.write_all(code)
|
||||
.map_err(|_| RequestValidationErr::WriteData("code"))?;
|
||||
|
||||
self.inner
|
||||
.candidate_ready_ev
|
||||
.set(EventState::Signaled)
|
||||
.map_err(stringify_err)
|
||||
.map_err(RequestValidationErr::Signal)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Wait for the validation result from the worker with the given timeout.
|
||||
///
|
||||
/// Returns `Ok` if the response was received within the deadline or error otherwise. An error
|
||||
/// could also occur because of failing decoding the result from the worker. Returning
|
||||
/// `Ok` doesn't mean that the candidate was successfully validated though, for that the client
|
||||
/// needs to inspect the returned validation result header.
|
||||
pub fn wait_for_result(
|
||||
&self,
|
||||
execution_timeout: u64,
|
||||
) -> Result<ValidationResultHeader, WaitForResultErr> {
|
||||
self.inner
|
||||
.result_ready_ev
|
||||
.wait(Timeout::Val(Duration::from_secs(execution_timeout)))
|
||||
.map_err(|e| WaitForResultErr::Wait(format!("{:?}", e)))?;
|
||||
|
||||
let mut cur = self.inner.as_slice();
|
||||
let header = ValidationResultHeader::decode(&mut cur)
|
||||
.map_err(|e| WaitForResultErr::HeaderDecodeErr(format!("{:?}", e)))?;
|
||||
Ok(header)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new workspace and return a handle to it.
|
||||
pub fn create() -> Result<HostHandle, String> {
|
||||
let shmem = ShmemConf::new()
|
||||
.size(SHARED_WORKSPACE_SIZE)
|
||||
.create()
|
||||
.map_err(|e| format!("Error creating shared memory: {:?}", e))?;
|
||||
|
||||
Ok(HostHandle {
|
||||
inner: Inner::layout(shmem, Mode::Initialize),
|
||||
})
|
||||
}
|
||||
|
||||
/// Open a workspace with the given `id`.
|
||||
///
|
||||
/// You can attach only once to a single workspace.
|
||||
pub fn open(id: &str) -> Result<WorkerHandle, String> {
|
||||
let shmem = ShmemConf::new()
|
||||
.os_id(id)
|
||||
.open()
|
||||
.map_err(|e| format!("Error opening shared memory: {:?}", e))?;
|
||||
|
||||
#[cfg(unix)]
|
||||
unlink_shmem(&id);
|
||||
|
||||
let inner = Inner::layout(shmem, Mode::Attach);
|
||||
if !inner.declare_exclusive_attached() {
|
||||
return Err(format!("The workspace has been already attached to"));
|
||||
}
|
||||
|
||||
return Ok(WorkerHandle { inner });
|
||||
|
||||
#[cfg(unix)]
|
||||
fn unlink_shmem(shmem_id: &str) {
|
||||
// Unlink the shmem. Unlinking it from the filesystem will make it unaccessible for further
|
||||
// opening, however, the kernel will still let the object live until the last reference dies
|
||||
// out.
|
||||
//
|
||||
// There is still a chance that the shm stays on the fs, but that's a highly unlikely case
|
||||
// that we don't address at this time.
|
||||
|
||||
// shared-memory doesn't return file path to the shmem if get_flink_path is called, so we
|
||||
// resort to `shm_unlink`.
|
||||
//
|
||||
// Additionally, even thouygh `fs::remove_file` is said to use `unlink` we still avoid relying on it,
|
||||
// because the stdlib doesn't actually provide any gurantees on what syscalls will be called.
|
||||
// (Not sure, what alternative it has though).
|
||||
unsafe {
|
||||
// must be in a local var in order to be not deallocated.
|
||||
let shmem_id_cstr =
|
||||
std::ffi::CString::new(shmem_id).expect("the shmmem id cannot have NUL in it; qed");
|
||||
|
||||
if libc::shm_unlink(shmem_id_cstr.as_ptr()) == -1 {
|
||||
// failed to remove the shmem file nothing we can do ¯\_(ツ)_/¯
|
||||
log::warn!(
|
||||
target: LOG_TARGET,
|
||||
"failed to remove the shmem with id {}",
|
||||
shmem_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use polkadot_core_primitives::OutboundHrmpMessage;
|
||||
|
||||
use crate::primitives::BlockData;
|
||||
|
||||
use super::*;
|
||||
use std::thread;
|
||||
|
||||
#[test]
|
||||
fn wait_until_ready() {
|
||||
let host = create().unwrap();
|
||||
|
||||
let worker_handle = thread::spawn({
|
||||
let id = host.id().to_string();
|
||||
move || {
|
||||
let worker = open(&id).unwrap();
|
||||
worker.signal_ready().unwrap();
|
||||
}
|
||||
});
|
||||
|
||||
host.wait_until_ready(1).unwrap();
|
||||
|
||||
worker_handle.join().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wait_until_ready_timeout() {
|
||||
let host = create().unwrap();
|
||||
|
||||
let _worker_handle = thread::spawn({
|
||||
let id = host.id().to_string();
|
||||
move || {
|
||||
let _worker = open(&id).unwrap();
|
||||
}
|
||||
});
|
||||
|
||||
assert!(matches!(
|
||||
host.wait_until_ready(1),
|
||||
Err(WaitUntilReadyErr::Wait(_))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_junk_id() {
|
||||
assert!(open("").is_err());
|
||||
assert!(open("non_existent").is_err());
|
||||
assert!(open("☭").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn attach_twice() {
|
||||
let host = create().unwrap();
|
||||
|
||||
thread::spawn({
|
||||
let id = host.id().to_string();
|
||||
move || {
|
||||
let _worker1 = open(&id).unwrap();
|
||||
assert!(open(&id).is_err());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validation_works() {
|
||||
let mut host = create().unwrap();
|
||||
|
||||
let worker_handle = thread::spawn({
|
||||
let id = host.id().to_string();
|
||||
move || {
|
||||
let mut worker = open(&id).unwrap();
|
||||
worker.signal_ready().unwrap();
|
||||
|
||||
let work = worker.wait_for_work(3).unwrap();
|
||||
assert_eq!(work.code, b"\0asm\01\00\00\00");
|
||||
|
||||
worker
|
||||
.report_result(ValidationResultHeader::Ok(ValidationResult {
|
||||
head_data: Default::default(),
|
||||
new_validation_code: None,
|
||||
upward_messages: vec![],
|
||||
horizontal_messages: vec![],
|
||||
processed_downward_messages: 322,
|
||||
hrmp_watermark: 0,
|
||||
}))
|
||||
.unwrap();
|
||||
}
|
||||
});
|
||||
|
||||
host.wait_until_ready(1).unwrap();
|
||||
host.request_validation(
|
||||
b"\0asm\01\00\00\00",
|
||||
ValidationParams {
|
||||
parent_head: Default::default(),
|
||||
block_data: BlockData(b"hello world".to_vec()),
|
||||
relay_parent_number: 228,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
match host.wait_for_result(3).unwrap() {
|
||||
ValidationResultHeader::Ok(r) => {
|
||||
assert_eq!(r.processed_downward_messages, 322);
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
|
||||
worker_handle.join().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn works_with_jumbo_sized_params() {
|
||||
let mut host = create().unwrap();
|
||||
|
||||
let jumbo_code = vec![0x42; 16 * 1024 * 104];
|
||||
let fat_pov = vec![0x33; 16 * 1024 * 104];
|
||||
let big_params = ValidationParams {
|
||||
parent_head: Default::default(),
|
||||
block_data: BlockData(fat_pov),
|
||||
relay_parent_number: 228,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
// If modifying please make sure that this has a big size.
|
||||
};
|
||||
let plump_result = ValidationResultHeader::Ok(ValidationResult {
|
||||
head_data: Default::default(),
|
||||
new_validation_code: Some(jumbo_code.clone().into()),
|
||||
processed_downward_messages: 322,
|
||||
hrmp_watermark: 0,
|
||||
// We don't know about the limits here. Just make sure that those are reasonably big.
|
||||
upward_messages: fill(|| vec![0x99; 8 * 1024], 64),
|
||||
horizontal_messages: fill(
|
||||
|| OutboundHrmpMessage {
|
||||
recipient: 1.into(),
|
||||
data: vec![0x11; 8 * 1024],
|
||||
},
|
||||
64,
|
||||
),
|
||||
// If modifying please make sure that this has a big size.
|
||||
});
|
||||
|
||||
let _worker_handle = thread::spawn({
|
||||
let id = host.id().to_string();
|
||||
let jumbo_code = jumbo_code.clone();
|
||||
let big_params = big_params.clone();
|
||||
let plump_result = plump_result.clone();
|
||||
move || {
|
||||
let mut worker = open(&id).unwrap();
|
||||
worker.signal_ready().unwrap();
|
||||
|
||||
let work = worker.wait_for_work(3).unwrap();
|
||||
assert_eq!(work.code, &jumbo_code);
|
||||
assert_eq!(work.params, &big_params.encode());
|
||||
|
||||
worker.report_result(plump_result).unwrap();
|
||||
}
|
||||
});
|
||||
|
||||
host.wait_until_ready(1).unwrap();
|
||||
host.request_validation(&jumbo_code, big_params).unwrap();
|
||||
|
||||
assert_eq!(host.wait_for_result(3).unwrap(), plump_result);
|
||||
|
||||
fn fill<T, F: Fn() -> T>(f: F, times: usize) -> Vec<T> {
|
||||
std::iter::repeat_with(f).take(times).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,10 @@ edition = "2018"
|
||||
name = "adder-collator"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "adder_collator_puppet_worker"
|
||||
path = "bin/puppet_worker.rs"
|
||||
|
||||
[dependencies]
|
||||
parity-scale-codec = { version = "2.0.0", default-features = false, features = ["derive"] }
|
||||
futures = "0.3.12"
|
||||
@@ -28,6 +32,11 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sc-authority-discovery = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
sc-service = { git = "https://github.com/paritytech/substrate", branch = "master" }
|
||||
|
||||
# This one is tricky. Even though it is not used directly by the collator, we still need it for the
|
||||
# `puppet_worker` binary, which is required for the integration test. However, this shouldn't be
|
||||
# a big problem since it is used transitively anyway.
|
||||
polkadot-node-core-pvf = { path = "../../../../node/core/pvf" }
|
||||
|
||||
[dev-dependencies]
|
||||
polkadot-parachain = { path = "../../.." }
|
||||
polkadot-test-service = { path = "../../../../node/test/service" }
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
// Copyright 2021 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
polkadot_node_core_pvf::decl_puppet_worker_main!();
|
||||
@@ -233,7 +233,7 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
use futures::executor::block_on;
|
||||
use polkadot_parachain::{primitives::ValidationParams, wasm_executor::IsolationStrategy};
|
||||
use polkadot_parachain::{primitives::{ValidationParams, ValidationResult}};
|
||||
use polkadot_primitives::v1::PersistedValidationData;
|
||||
|
||||
#[test]
|
||||
@@ -268,18 +268,19 @@ mod tests {
|
||||
parent_head: HeadData,
|
||||
collation: Collation,
|
||||
) {
|
||||
let ret = polkadot_parachain::wasm_executor::validate_candidate(
|
||||
use polkadot_node_core_pvf::testing::validate_candidate;
|
||||
|
||||
let ret_buf = validate_candidate(
|
||||
collator.validation_code(),
|
||||
ValidationParams {
|
||||
&ValidationParams {
|
||||
parent_head: parent_head.encode().into(),
|
||||
block_data: collation.proof_of_validity.block_data,
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
&IsolationStrategy::InProcess,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
}.encode(),
|
||||
)
|
||||
.unwrap();
|
||||
let ret = ValidationResult::decode(&mut &ret_buf[..]).unwrap();
|
||||
|
||||
let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
|
||||
assert_eq!(
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
//! Integration test that ensures that we can build and include parachain
|
||||
//! blocks of the adder parachain.
|
||||
|
||||
const PUPPET_EXE: &str = env!("CARGO_BIN_EXE_adder_collator_puppet_worker");
|
||||
|
||||
// If this test is failing, make sure to run all tests with the `real-overseer` feature being enabled.
|
||||
#[substrate_test_utils::test]
|
||||
async fn collating_using_adder_collator(task_executor: sc_service::TaskExecutor) {
|
||||
use sp_keyring::AccountKeyring::*;
|
||||
@@ -30,7 +33,12 @@ async fn collating_using_adder_collator(task_executor: sc_service::TaskExecutor)
|
||||
let para_id = ParaId::from(100);
|
||||
|
||||
// start alice
|
||||
let alice = polkadot_test_service::run_validator_node(task_executor.clone(), Alice, || {}, vec![]);
|
||||
let alice = polkadot_test_service::run_validator_node(
|
||||
task_executor.clone(),
|
||||
Alice, || {},
|
||||
vec![],
|
||||
Some(PUPPET_EXE.into()),
|
||||
);
|
||||
|
||||
// start bob
|
||||
let bob = polkadot_test_service::run_validator_node(
|
||||
@@ -38,6 +46,7 @@ async fn collating_using_adder_collator(task_executor: sc_service::TaskExecutor)
|
||||
Bob,
|
||||
|| {},
|
||||
vec![alice.addr.clone()],
|
||||
Some(PUPPET_EXE.into()),
|
||||
);
|
||||
|
||||
let collator = test_parachain_adder_collator::Collator::new();
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
// Copyright 2017-2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Basic parachain that adds a number as part of its state.
|
||||
|
||||
const WORKER_ARGS_TEST: &[&'static str] = &["--nocapture", "validation_worker"];
|
||||
|
||||
use parachain::{
|
||||
primitives::{
|
||||
RelayChainBlockNumber,
|
||||
BlockData as GenericBlockData,
|
||||
HeadData as GenericHeadData,
|
||||
ValidationParams,
|
||||
},
|
||||
wasm_executor::{ValidationPool, IsolationStrategy}
|
||||
};
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use adder::{HeadData, BlockData, hash_state};
|
||||
|
||||
fn isolation_strategy() -> IsolationStrategy {
|
||||
IsolationStrategy::ExternalProcessCustomHost {
|
||||
pool: ValidationPool::new(),
|
||||
binary: std::env::current_exe().unwrap(),
|
||||
args: WORKER_ARGS_TEST.iter().map(|x| x.to_string()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn execute_good_on_parent_with_inprocess_validation() {
|
||||
let isolation_strategy = IsolationStrategy::InProcess;
|
||||
execute_good_on_parent(isolation_strategy);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn execute_good_on_parent_with_external_process_validation() {
|
||||
let isolation_strategy = isolation_strategy();
|
||||
execute_good_on_parent(isolation_strategy);
|
||||
}
|
||||
|
||||
fn execute_good_on_parent(isolation_strategy: IsolationStrategy) {
|
||||
let parent_head = HeadData {
|
||||
number: 0,
|
||||
parent_hash: [0; 32],
|
||||
post_state: hash_state(0),
|
||||
};
|
||||
|
||||
let block_data = BlockData {
|
||||
state: 0,
|
||||
add: 512,
|
||||
};
|
||||
|
||||
let ret = parachain::wasm_executor::validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
&isolation_strategy,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
).unwrap();
|
||||
|
||||
let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
|
||||
|
||||
assert_eq!(new_head.number, 1);
|
||||
assert_eq!(new_head.parent_hash, parent_head.hash());
|
||||
assert_eq!(new_head.post_state, hash_state(512));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn execute_good_chain_on_parent() {
|
||||
let mut number = 0;
|
||||
let mut parent_hash = [0; 32];
|
||||
let mut last_state = 0;
|
||||
let isolation_strategy = isolation_strategy();
|
||||
|
||||
for add in 0..10 {
|
||||
let parent_head = HeadData {
|
||||
number,
|
||||
parent_hash,
|
||||
post_state: hash_state(last_state),
|
||||
};
|
||||
|
||||
let block_data = BlockData {
|
||||
state: last_state,
|
||||
add,
|
||||
};
|
||||
|
||||
let ret = parachain::wasm_executor::validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: number as RelayChainBlockNumber + 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
&isolation_strategy,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
).unwrap();
|
||||
|
||||
let new_head = HeadData::decode(&mut &ret.head_data.0[..]).unwrap();
|
||||
|
||||
assert_eq!(new_head.number, number + 1);
|
||||
assert_eq!(new_head.parent_hash, parent_head.hash());
|
||||
assert_eq!(new_head.post_state, hash_state(last_state + add));
|
||||
|
||||
number += 1;
|
||||
parent_hash = new_head.hash();
|
||||
last_state += add;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn execute_bad_on_parent() {
|
||||
let isolation_strategy = isolation_strategy();
|
||||
|
||||
let parent_head = HeadData {
|
||||
number: 0,
|
||||
parent_hash: [0; 32],
|
||||
post_state: hash_state(0),
|
||||
};
|
||||
|
||||
let block_data = BlockData {
|
||||
state: 256, // start state is wrong.
|
||||
add: 256,
|
||||
};
|
||||
|
||||
let _ret = parachain::wasm_executor::validate_candidate(
|
||||
adder::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
parent_head: GenericHeadData(parent_head.encode()),
|
||||
block_data: GenericBlockData(block_data.encode()),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
&isolation_strategy,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
).unwrap_err();
|
||||
}
|
||||
@@ -1,94 +0,0 @@
|
||||
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
|
||||
// This file is part of Polkadot.
|
||||
|
||||
// Polkadot is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// Polkadot is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Basic parachain that adds a number as part of its state.
|
||||
|
||||
const WORKER_ARGS_TEST: &[&'static str] = &["--nocapture", "validation_worker"];
|
||||
|
||||
use crate::adder;
|
||||
use parachain::{
|
||||
primitives::{BlockData, ValidationParams},
|
||||
wasm_executor::{ValidationError, InvalidCandidate, EXECUTION_TIMEOUT_SEC, IsolationStrategy, ValidationPool},
|
||||
};
|
||||
|
||||
fn isolation_strategy() -> IsolationStrategy {
|
||||
IsolationStrategy::ExternalProcessCustomHost {
|
||||
pool: ValidationPool::new(),
|
||||
binary: std::env::current_exe().unwrap(),
|
||||
args: WORKER_ARGS_TEST.iter().map(|x| x.to_string()).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terminates_on_timeout() {
|
||||
let isolation_strategy = isolation_strategy();
|
||||
|
||||
let result = parachain::wasm_executor::validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
&isolation_strategy,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
);
|
||||
match result {
|
||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::Timeout)) => {},
|
||||
r => panic!("{:?}", r),
|
||||
}
|
||||
|
||||
// check that another parachain can validate normaly
|
||||
adder::execute_good_on_parent_with_external_process_validation();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parallel_execution() {
|
||||
let isolation_strategy = isolation_strategy();
|
||||
let isolation_strategy_clone = isolation_strategy.clone();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let thread = std::thread::spawn(move ||
|
||||
parachain::wasm_executor::validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
relay_parent_storage_root: Default::default(),
|
||||
},
|
||||
&isolation_strategy,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
).ok());
|
||||
let _ = parachain::wasm_executor::validate_candidate(
|
||||
halt::wasm_binary_unwrap(),
|
||||
ValidationParams {
|
||||
block_data: BlockData(Vec::new()),
|
||||
parent_head: Default::default(),
|
||||
relay_parent_storage_root: Default::default(),
|
||||
relay_parent_number: 1,
|
||||
},
|
||||
&isolation_strategy_clone,
|
||||
sp_core::testing::TaskExecutor::new(),
|
||||
);
|
||||
thread.join().unwrap();
|
||||
// total time should be < 2 x EXECUTION_TIMEOUT_SEC
|
||||
assert!(
|
||||
std::time::Instant::now().duration_since(start)
|
||||
< std::time::Duration::from_secs(EXECUTION_TIMEOUT_SEC * 2)
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user