mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-31 11:01:01 +00:00
PVF: Fix unshare "no such file or directory" error (#2426)
This commit is contained in:
@@ -29,7 +29,7 @@ test-linux-stable:
|
||||
--locked \
|
||||
--release \
|
||||
--no-fail-fast \
|
||||
--features try-runtime,experimental \
|
||||
--features try-runtime,experimental,ci-only-tests \
|
||||
--partition count:${CI_NODE_INDEX}/${CI_NODE_TOTAL}
|
||||
# Upload tests results to Elasticsearch
|
||||
- echo "Upload test results to Elasticsearch"
|
||||
|
||||
Generated
+1
@@ -12591,6 +12591,7 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
"rococo-runtime",
|
||||
"rusty-fork",
|
||||
"sc-sysinfo",
|
||||
"slotmap",
|
||||
"sp-core",
|
||||
"sp-maybe-compressed-blob",
|
||||
|
||||
@@ -54,6 +54,7 @@ halt = { package = "test-parachain-halt", path = "../../../parachain/test-parach
|
||||
[target.'cfg(target_os = "linux")'.dev-dependencies]
|
||||
procfs = "0.16.0"
|
||||
rusty-fork = "0.3.0"
|
||||
sc-sysinfo = { path = "../../../../substrate/client/sysinfo" }
|
||||
|
||||
[[bench]]
|
||||
name = "host_prepare_rococo_runtime"
|
||||
|
||||
@@ -47,7 +47,7 @@ pub mod tests {
|
||||
}
|
||||
|
||||
/// Status of security features on the current system.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct SecurityStatus {
|
||||
/// Whether the landlock features we use are fully available on this system.
|
||||
pub can_enable_landlock: bool,
|
||||
|
||||
@@ -499,8 +499,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn remove_stale_cache_on_startup() {
|
||||
let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap();
|
||||
fs::create_dir_all(&cache_dir).unwrap();
|
||||
let cache_dir = tempfile::Builder::new().prefix("test-cache-").tempdir().unwrap();
|
||||
|
||||
// invalid prefix
|
||||
create_rand_artifact(&cache_dir, "");
|
||||
@@ -529,7 +528,7 @@ mod tests {
|
||||
|
||||
assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7);
|
||||
|
||||
let artifacts = Artifacts::new_and_prune(&cache_dir).await;
|
||||
let artifacts = Artifacts::new_and_prune(cache_dir.path()).await;
|
||||
|
||||
assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1);
|
||||
assert_eq!(artifacts.len(), 1);
|
||||
|
||||
@@ -278,7 +278,7 @@ where
|
||||
// Cheaply create a hard link to the artifact. The artifact is always at a known location in the
|
||||
// worker cache, and the child can't access any other artifacts or gain any information from the
|
||||
// original filename.
|
||||
let link_path = worker_dir::execute_artifact(&worker_dir.path);
|
||||
let link_path = worker_dir::execute_artifact(worker_dir.path());
|
||||
if let Err(err) = tokio::fs::hard_link(artifact_path, link_path).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
@@ -292,7 +292,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let worker_dir_path = worker_dir.path().to_owned();
|
||||
let outcome = f(worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::{
|
||||
artifacts::{ArtifactId, ArtifactPathId, ArtifactState, Artifacts},
|
||||
execute::{self, PendingExecutionRequest},
|
||||
metrics::Metrics,
|
||||
prepare, security, Priority, ValidationError, LOG_TARGET,
|
||||
prepare, security, Priority, SecurityStatus, ValidationError, LOG_TARGET,
|
||||
};
|
||||
use always_assert::never;
|
||||
use futures::{
|
||||
@@ -70,6 +70,8 @@ pub(crate) type PrecheckResultSender = oneshot::Sender<PrecheckResult>;
|
||||
#[derive(Clone)]
|
||||
pub struct ValidationHost {
|
||||
to_host_tx: mpsc::Sender<ToHost>,
|
||||
/// Available security features, detected by the host during startup.
|
||||
pub security_status: SecurityStatus,
|
||||
}
|
||||
|
||||
impl ValidationHost {
|
||||
@@ -216,7 +218,7 @@ pub async fn start(
|
||||
|
||||
let (to_host_tx, to_host_rx) = mpsc::channel(10);
|
||||
|
||||
let validation_host = ValidationHost { to_host_tx };
|
||||
let validation_host = ValidationHost { to_host_tx, security_status: security_status.clone() };
|
||||
|
||||
let (to_prepare_pool, from_prepare_pool, run_prepare_pool) = prepare::start_pool(
|
||||
metrics.clone(),
|
||||
@@ -978,7 +980,8 @@ pub(crate) mod tests {
|
||||
|
||||
fn host_handle(&mut self) -> ValidationHost {
|
||||
let to_host_tx = self.to_host_tx.take().unwrap();
|
||||
ValidationHost { to_host_tx }
|
||||
let security_status = Default::default();
|
||||
ValidationHost { to_host_tx, security_status }
|
||||
}
|
||||
|
||||
async fn poll_and_recv_result<T>(&mut self, result_rx: oneshot::Receiver<T>) -> T
|
||||
|
||||
@@ -318,7 +318,7 @@ where
|
||||
{
|
||||
// Create the tmp file here so that the child doesn't need any file creation rights. This will
|
||||
// be cleared at the end of this function.
|
||||
let tmp_file = worker_dir::prepare_tmp_artifact(&worker_dir.path);
|
||||
let tmp_file = worker_dir::prepare_tmp_artifact(worker_dir.path());
|
||||
if let Err(err) = tokio::fs::File::create(&tmp_file).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
@@ -333,7 +333,7 @@ where
|
||||
}
|
||||
};
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
let worker_dir_path = worker_dir.path().to_owned();
|
||||
let outcome = f(tmp_file, stream, worker_dir).await;
|
||||
|
||||
// Try to clear the worker dir.
|
||||
|
||||
@@ -28,7 +28,7 @@ const SECURE_MODE_ANNOUNCEMENT: &'static str =
|
||||
|
||||
/// Run checks for supported security features.
|
||||
///
|
||||
/// # Return
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns the set of security features that we were able to enable. If an error occurs while
|
||||
/// enabling a security feature we set the corresponding status to `false`.
|
||||
@@ -158,18 +158,15 @@ async fn check_can_unshare_user_namespace_and_change_root(
|
||||
) -> SecureModeResult {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "linux")] {
|
||||
let cache_dir_tempdir =
|
||||
crate::worker_intf::tmppath_in("check-can-unshare", cache_path)
|
||||
.await
|
||||
.map_err(
|
||||
|err|
|
||||
SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
|
||||
format!("could not create a temporary directory in {:?}: {}", cache_path, err)
|
||||
)
|
||||
)?;
|
||||
let cache_dir_tempdir = tempfile::Builder::new()
|
||||
.prefix("check-can-unshare-")
|
||||
.tempdir_in(cache_path)
|
||||
.map_err(|err| SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
|
||||
format!("could not create a temporary directory in {:?}: {}", cache_path, err)
|
||||
))?;
|
||||
match tokio::process::Command::new(prepare_worker_program_path)
|
||||
.arg("--check-can-unshare-user-namespace-and-change-root")
|
||||
.arg(cache_dir_tempdir)
|
||||
.arg(cache_dir_tempdir.path())
|
||||
.output()
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -71,6 +71,7 @@ pub async fn spawn_with_program_path(
|
||||
|
||||
with_transient_socket_path(debug_id, |socket_path| {
|
||||
let socket_path = socket_path.to_owned();
|
||||
let worker_dir_path = worker_dir.path().to_owned();
|
||||
|
||||
async move {
|
||||
let listener = UnixListener::bind(&socket_path).map_err(|err| {
|
||||
@@ -91,7 +92,7 @@ pub async fn spawn_with_program_path(
|
||||
&program_path,
|
||||
&extra_args,
|
||||
&socket_path,
|
||||
&worker_dir.path,
|
||||
&worker_dir_path,
|
||||
security_status,
|
||||
)
|
||||
.map_err(|err| {
|
||||
@@ -100,7 +101,7 @@ pub async fn spawn_with_program_path(
|
||||
%debug_id,
|
||||
?program_path,
|
||||
?extra_args,
|
||||
?worker_dir.path,
|
||||
?worker_dir_path,
|
||||
?socket_path,
|
||||
"cannot spawn a worker: {:?}",
|
||||
err,
|
||||
@@ -108,7 +109,6 @@ pub async fn spawn_with_program_path(
|
||||
SpawnErr::ProcessSpawn
|
||||
})?;
|
||||
|
||||
let worker_dir_path = worker_dir.path.clone();
|
||||
futures::select! {
|
||||
accept_result = listener.accept().fuse() => {
|
||||
let (stream, _) = accept_result.map_err(|err| {
|
||||
@@ -150,7 +150,42 @@ where
|
||||
F: FnOnce(&Path) -> Fut,
|
||||
Fut: futures::Future<Output = Result<T, SpawnErr>> + 'static,
|
||||
{
|
||||
let socket_path = tmppath(&format!("pvf-host-{}", debug_id))
|
||||
/// Returns a path under [`std::env::temp_dir`]. The path name will start with the given prefix.
|
||||
///
|
||||
/// There is only a certain number of retries. If exceeded this function will give up and return
|
||||
/// an error.
|
||||
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
|
||||
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
use rand::distributions::Alphanumeric;
|
||||
|
||||
const DESCRIMINATOR_LEN: usize = 10;
|
||||
|
||||
let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
|
||||
buf.extend(prefix.as_bytes());
|
||||
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DESCRIMINATOR_LEN));
|
||||
|
||||
let s = std::str::from_utf8(&buf)
|
||||
.expect("the string is collected from a valid utf-8 sequence; qed");
|
||||
|
||||
let mut path = dir.to_owned();
|
||||
path.push(s);
|
||||
path
|
||||
}
|
||||
|
||||
const NUM_RETRIES: usize = 50;
|
||||
|
||||
let dir = std::env::temp_dir();
|
||||
for _ in 0..NUM_RETRIES {
|
||||
let tmp_path = make_tmppath(prefix, &dir);
|
||||
if !tmp_path.exists() {
|
||||
return Ok(tmp_path)
|
||||
}
|
||||
}
|
||||
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
|
||||
}
|
||||
|
||||
let socket_path = tmppath(&format!("pvf-host-{}-", debug_id))
|
||||
.await
|
||||
.map_err(|_| SpawnErr::TmpPath)?;
|
||||
let result = f(&socket_path).await;
|
||||
@@ -162,46 +197,6 @@ where
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns a path under the given `dir`. The path name will start with the given prefix.
|
||||
///
|
||||
/// There is only a certain number of retries. If exceeded this function will give up and return an
|
||||
/// error.
|
||||
pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result<PathBuf> {
|
||||
fn make_tmppath(prefix: &str, dir: &Path) -> PathBuf {
|
||||
use rand::distributions::Alphanumeric;
|
||||
|
||||
const DESCRIMINATOR_LEN: usize = 10;
|
||||
|
||||
let mut buf = Vec::with_capacity(prefix.len() + DESCRIMINATOR_LEN);
|
||||
buf.extend(prefix.as_bytes());
|
||||
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(DESCRIMINATOR_LEN));
|
||||
|
||||
let s = std::str::from_utf8(&buf)
|
||||
.expect("the string is collected from a valid utf-8 sequence; qed");
|
||||
|
||||
let mut path = dir.to_owned();
|
||||
path.push(s);
|
||||
path
|
||||
}
|
||||
|
||||
const NUM_RETRIES: usize = 50;
|
||||
|
||||
for _ in 0..NUM_RETRIES {
|
||||
let tmp_path = make_tmppath(prefix, dir);
|
||||
if !tmp_path.exists() {
|
||||
return Ok(tmp_path)
|
||||
}
|
||||
}
|
||||
|
||||
Err(io::Error::new(io::ErrorKind::Other, "failed to create a temporary path"))
|
||||
}
|
||||
|
||||
/// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory.
|
||||
pub async fn tmppath(prefix: &str) -> io::Result<PathBuf> {
|
||||
let temp_dir = std::env::temp_dir();
|
||||
tmppath_in(prefix, &temp_dir).await
|
||||
}
|
||||
|
||||
/// A struct that represents an idle worker.
|
||||
///
|
||||
/// This struct is supposed to be used as a token that is passed by move into a subroutine that
|
||||
@@ -224,8 +219,6 @@ pub struct IdleWorker {
|
||||
pub enum SpawnErr {
|
||||
/// Cannot obtain a temporary path location.
|
||||
TmpPath,
|
||||
/// An FS error occurred.
|
||||
Fs(String),
|
||||
/// Cannot bind the socket to the given path.
|
||||
Bind,
|
||||
/// An error happened during accepting a connection to the socket.
|
||||
@@ -419,26 +412,22 @@ pub async fn framed_recv(r: &mut (impl AsyncRead + Unpin)) -> io::Result<Vec<u8>
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct WorkerDir {
|
||||
pub path: PathBuf,
|
||||
tempdir: tempfile::TempDir,
|
||||
}
|
||||
|
||||
impl WorkerDir {
|
||||
/// Creates a new, empty worker dir with a random name in the given cache dir.
|
||||
pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> {
|
||||
let prefix = format!("worker-dir-{}-", debug_id);
|
||||
let path = tmppath_in(&prefix, cache_dir).await.map_err(|_| SpawnErr::TmpPath)?;
|
||||
tokio::fs::create_dir(&path)
|
||||
.await
|
||||
.map_err(|err| SpawnErr::Fs(err.to_string()))?;
|
||||
Ok(Self { path })
|
||||
let tempdir = tempfile::Builder::new()
|
||||
.prefix(&prefix)
|
||||
.tempdir_in(cache_dir)
|
||||
.map_err(|_| SpawnErr::TmpPath)?;
|
||||
Ok(Self { tempdir })
|
||||
}
|
||||
}
|
||||
|
||||
// Try to clean up the temporary worker dir at the end of the worker's lifetime. It should be wiped
|
||||
// on startup, but we make a best effort not to leave it around.
|
||||
impl Drop for WorkerDir {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::fs::remove_dir_all(&self.path);
|
||||
pub fn path(&self) -> &Path {
|
||||
self.tempdir.path()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use parity_scale_codec::Encode as _;
|
||||
#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
|
||||
use polkadot_node_core_pvf::SecurityStatus;
|
||||
use polkadot_node_core_pvf::{
|
||||
start, testing::build_workers_and_get_paths, Config, InvalidCandidate, Metrics, PrepareError,
|
||||
PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
@@ -122,6 +124,11 @@ impl TestHost {
|
||||
.unwrap();
|
||||
result_rx.await.unwrap()
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
|
||||
async fn security_status(&self) -> SecurityStatus {
|
||||
self.host.lock().await.security_status.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -402,3 +409,28 @@ async fn prepare_can_run_serially() {
|
||||
// Prepare a different wasm blob to prevent skipping work.
|
||||
let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), Default::default()).await.unwrap();
|
||||
}
|
||||
|
||||
// CI machines should be able to enable all the security features.
|
||||
#[cfg(all(feature = "ci-only-tests", target_os = "linux"))]
|
||||
#[tokio::test]
|
||||
async fn all_security_features_work() {
|
||||
// Landlock is only available starting Linux 5.13, and we may be testing on an old kernel.
|
||||
let sysinfo = sc_sysinfo::gather_sysinfo();
|
||||
// The version will look something like "5.15.0-87-generic".
|
||||
let version = sysinfo.linux_kernel.unwrap();
|
||||
let version_split: Vec<&str> = version.split(".").collect();
|
||||
let major: u32 = version_split[0].parse().unwrap();
|
||||
let minor: u32 = version_split[1].parse().unwrap();
|
||||
let can_enable_landlock = if major >= 6 { true } else { minor >= 13 };
|
||||
|
||||
let host = TestHost::new().await;
|
||||
|
||||
assert_eq!(
|
||||
host.security_status().await,
|
||||
SecurityStatus {
|
||||
can_enable_landlock,
|
||||
can_enable_seccomp: true,
|
||||
can_unshare_user_namespace_and_change_root: true,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@@ -54,8 +54,8 @@ one: reports histogram polkadot_pvf_preparation_time has 0 samples in buckets ["
|
||||
two: reports histogram polkadot_pvf_preparation_time has 0 samples in buckets ["20", "30", "60", "120", "+Inf"] within 10 seconds
|
||||
|
||||
# Check execution time.
|
||||
# There are two different timeout conditions: BACKING_EXECUTION_TIMEOUT(2s) and
|
||||
# APPROVAL_EXECUTION_TIMEOUT(6s). Currently these are not differentiated by metrics
|
||||
# There are two different timeout conditions: DEFAULT_BACKING_EXECUTION_TIMEOUT(2s) and
|
||||
# DEFAULT_APPROVAL_EXECUTION_TIMEOUT(12s). Currently these are not differentiated by metrics
|
||||
# because the metrics are defined in `polkadot-node-core-pvf` which is a level below
|
||||
# the relevant subsystems.
|
||||
# That being said, we will take the simplifying assumption of testing only the
|
||||
|
||||
Reference in New Issue
Block a user