Pvf thiserror (#2958)

resolve #2157 

- [x] fix broken doc links
- [x] fix codec macro typo
https://github.com/paritytech/polkadot-sdk/blob/master/polkadot/node/core/pvf/common/src/error.rs#L81
(see the comment below)
- [x] refactor `ValidationError`, `PrepareError` and related error types
to use `thiserror` crate

## `codec` issue

`codec` macro was mistakenly applied two times to `Kernel` error (so it
was encoded with 10 instead of 11 and the same as `JobDied`). The PR
changes it to 11 because

- it was an initial goal of the code author
- Kernel is less frequent than JobDied so in case of existing error
encoding it is more probable to have 10 as JobDied than Kernel

See https://github.com/paritytech/parity-scale-codec/issues/555

----
polkadot address: 13zCyRG2a1W2ih5SioL8byqmQ6mc8vkgFwQgVzJSdRUUmp46

---------

Co-authored-by: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com>
This commit is contained in:
maksimryndin
2024-01-19 15:10:09 +01:00
committed by GitHub
parent 697c2c392e
commit 47a3faa41e
7 changed files with 43 additions and 69 deletions
+21 -51
View File
@@ -16,7 +16,6 @@
use crate::prepare::{PrepareSuccess, PrepareWorkerSuccess};
use parity_scale_codec::{Decode, Encode};
use std::fmt;
/// Result of PVF preparation from a worker, with checksum of the compiled PVF and stats of the
/// preparation if successful.
@@ -32,35 +31,43 @@ pub type PrecheckResult = Result<(), PrepareError>;
/// An error that occurred during the prepare part of the PVF pipeline.
// Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD`)
#[derive(Debug, Clone, Encode, Decode)]
#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
pub enum PrepareError {
/// During the prevalidation stage of preparation an issue was found with the PVF.
#[codec(index = 0)]
#[error("prepare: prevalidation error: {0}")]
Prevalidation(String),
/// Compilation failed for the given PVF.
#[codec(index = 1)]
#[error("prepare: preparation error: {0}")]
Preparation(String),
/// Instantiation of the WASM module instance failed.
#[codec(index = 2)]
#[error("prepare: runtime construction: {0}")]
RuntimeConstruction(String),
/// An unexpected error has occurred in the preparation job.
#[codec(index = 3)]
#[error("prepare: job error: {0}")]
JobError(String),
/// Failed to prepare the PVF due to the time limit.
#[codec(index = 4)]
#[error("prepare: timeout")]
TimedOut,
/// An IO error occurred. This state is reported by either the validation host or by the
/// worker.
#[codec(index = 5)]
#[error("prepare: io error while receiving response: {0}")]
IoErr(String),
/// The temporary file for the artifact could not be created at the given cache path. This
/// state is reported by the validation host (not by the worker).
#[codec(index = 6)]
#[error("prepare: error creating tmp file: {0}")]
CreateTmpFile(String),
/// The response from the worker is received, but the file cannot be renamed (moved) to the
/// final destination location. This state is reported by the validation host (not by the
/// worker).
#[codec(index = 7)]
#[error("prepare: error renaming tmp file ({src:?} -> {dest:?}): {err}")]
RenameTmpFile {
err: String,
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
@@ -70,17 +77,21 @@ pub enum PrepareError {
},
/// Memory limit reached
#[codec(index = 8)]
#[error("prepare: out of memory")]
OutOfMemory,
/// The response from the worker is received, but the worker cache could not be cleared. The
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
/// reported by the validation host (not by the worker).
#[codec(index = 9)]
#[error("prepare: error clearing worker cache: {0}")]
ClearWorkerDir(String),
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
JobDied { err: String, job_pid: i32 },
#[codec(index = 10)]
#[error("prepare: prepare job with pid {job_pid} died: {err}")]
JobDied { err: String, job_pid: i32 },
/// Some error occurred when interfacing with the kernel.
#[codec(index = 11)]
#[error("prepare: error interfacing with the kernel: {0}")]
Kernel(String),
}
@@ -109,41 +120,23 @@ impl PrepareError {
}
}
impl fmt::Display for PrepareError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use PrepareError::*;
match self {
Prevalidation(err) => write!(f, "prevalidation: {}", err),
Preparation(err) => write!(f, "preparation: {}", err),
RuntimeConstruction(err) => write!(f, "runtime construction: {}", err),
JobError(err) => write!(f, "panic: {}", err),
TimedOut => write!(f, "prepare: timeout"),
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
JobDied { err, job_pid } =>
write!(f, "prepare: prepare job with pid {job_pid} died: {err}"),
CreateTmpFile(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFile { err, src, dest } =>
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
OutOfMemory => write!(f, "prepare: out of memory"),
ClearWorkerDir(err) => write!(f, "prepare: error clearing worker cache: {}", err),
Kernel(err) => write!(f, "prepare: error interfacing with the kernel: {}", err),
}
}
}
/// Some internal error occurred.
///
/// Should only ever be used for validation errors independent of the candidate and PVF, or for
/// errors we ruled out during pre-checking (so preparation errors are fine).
#[derive(Debug, Clone, Encode, Decode)]
#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
pub enum InternalValidationError {
/// Some communication error occurred with the host.
#[error("validation: some communication error occurred with the host: {0}")]
HostCommunication(String),
/// Host could not create a hard link to the artifact path.
#[error("validation: host could not create a hard link to the artifact path: {0}")]
CouldNotCreateLink(String),
/// Could not find or open compiled artifact file.
#[error("validation: could not find or open compiled artifact file: {0}")]
CouldNotOpenFile(String),
/// Host could not clear the worker cache after a job.
#[error("validation: host could not clear the worker cache ({path:?}) after a job: {err}")]
CouldNotClearWorkerDir {
err: String,
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
@@ -151,32 +144,9 @@ pub enum InternalValidationError {
path: Option<String>,
},
/// Some error occurred when interfacing with the kernel.
#[error("validation: error interfacing with the kernel: {0}")]
Kernel(String),
/// Some non-deterministic preparation error occurred.
#[error("validation: prepare: {0}")]
NonDeterministicPrepareError(PrepareError),
}
impl fmt::Display for InternalValidationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use InternalValidationError::*;
match self {
HostCommunication(err) =>
write!(f, "validation: some communication error occurred with the host: {}", err),
CouldNotCreateLink(err) => write!(
f,
"validation: host could not create a hard link to the artifact path: {}",
err
),
CouldNotOpenFile(err) =>
write!(f, "validation: could not find or open compiled artifact file: {}", err),
CouldNotClearWorkerDir { err, path } => write!(
f,
"validation: host could not clear the worker cache ({:?}) after a job: {}",
path, err
),
Kernel(err) => write!(f, "validation: error interfacing with the kernel: {}", err),
NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
}
}
}