mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-16 20:11:03 +00:00
Pvf thiserror (#2958)
resolve #2157 - [x] fix broken doc links - [x] fix codec macro typo https://github.com/paritytech/polkadot-sdk/blob/master/polkadot/node/core/pvf/common/src/error.rs#L81 (see the comment below) - [x] refactor `ValidationError`, `PrepareError` and related error types to use `thiserror` crate ## `codec` issue `codec` macro was mistakenly applied two times to `Kernel` error (so it was encoded with 10 instead of 11 and the same as `JobDied`). The PR changes it to 11 because - it was an initial goal of the code author - Kernel is less frequent than JobDied so in case of existing error encoding it is more probable to have 10 as JobDied than Kernel See https://github.com/paritytech/parity-scale-codec/issues/555 ---- polkadot address: 13zCyRG2a1W2ih5SioL8byqmQ6mc8vkgFwQgVzJSdRUUmp46 --------- Co-authored-by: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com>
This commit is contained in:
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
use crate::prepare::{PrepareSuccess, PrepareWorkerSuccess};
|
use crate::prepare::{PrepareSuccess, PrepareWorkerSuccess};
|
||||||
use parity_scale_codec::{Decode, Encode};
|
use parity_scale_codec::{Decode, Encode};
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
/// Result of PVF preparation from a worker, with checksum of the compiled PVF and stats of the
|
/// Result of PVF preparation from a worker, with checksum of the compiled PVF and stats of the
|
||||||
/// preparation if successful.
|
/// preparation if successful.
|
||||||
@@ -32,35 +31,43 @@ pub type PrecheckResult = Result<(), PrepareError>;
|
|||||||
|
|
||||||
/// An error that occurred during the prepare part of the PVF pipeline.
|
/// An error that occurred during the prepare part of the PVF pipeline.
|
||||||
// Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD`)
|
// Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD`)
|
||||||
#[derive(Debug, Clone, Encode, Decode)]
|
#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
|
||||||
pub enum PrepareError {
|
pub enum PrepareError {
|
||||||
/// During the prevalidation stage of preparation an issue was found with the PVF.
|
/// During the prevalidation stage of preparation an issue was found with the PVF.
|
||||||
#[codec(index = 0)]
|
#[codec(index = 0)]
|
||||||
|
#[error("prepare: prevalidation error: {0}")]
|
||||||
Prevalidation(String),
|
Prevalidation(String),
|
||||||
/// Compilation failed for the given PVF.
|
/// Compilation failed for the given PVF.
|
||||||
#[codec(index = 1)]
|
#[codec(index = 1)]
|
||||||
|
#[error("prepare: preparation error: {0}")]
|
||||||
Preparation(String),
|
Preparation(String),
|
||||||
/// Instantiation of the WASM module instance failed.
|
/// Instantiation of the WASM module instance failed.
|
||||||
#[codec(index = 2)]
|
#[codec(index = 2)]
|
||||||
|
#[error("prepare: runtime construction: {0}")]
|
||||||
RuntimeConstruction(String),
|
RuntimeConstruction(String),
|
||||||
/// An unexpected error has occurred in the preparation job.
|
/// An unexpected error has occurred in the preparation job.
|
||||||
#[codec(index = 3)]
|
#[codec(index = 3)]
|
||||||
|
#[error("prepare: job error: {0}")]
|
||||||
JobError(String),
|
JobError(String),
|
||||||
/// Failed to prepare the PVF due to the time limit.
|
/// Failed to prepare the PVF due to the time limit.
|
||||||
#[codec(index = 4)]
|
#[codec(index = 4)]
|
||||||
|
#[error("prepare: timeout")]
|
||||||
TimedOut,
|
TimedOut,
|
||||||
/// An IO error occurred. This state is reported by either the validation host or by the
|
/// An IO error occurred. This state is reported by either the validation host or by the
|
||||||
/// worker.
|
/// worker.
|
||||||
#[codec(index = 5)]
|
#[codec(index = 5)]
|
||||||
|
#[error("prepare: io error while receiving response: {0}")]
|
||||||
IoErr(String),
|
IoErr(String),
|
||||||
/// The temporary file for the artifact could not be created at the given cache path. This
|
/// The temporary file for the artifact could not be created at the given cache path. This
|
||||||
/// state is reported by the validation host (not by the worker).
|
/// state is reported by the validation host (not by the worker).
|
||||||
#[codec(index = 6)]
|
#[codec(index = 6)]
|
||||||
|
#[error("prepare: error creating tmp file: {0}")]
|
||||||
CreateTmpFile(String),
|
CreateTmpFile(String),
|
||||||
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
/// The response from the worker is received, but the file cannot be renamed (moved) to the
|
||||||
/// final destination location. This state is reported by the validation host (not by the
|
/// final destination location. This state is reported by the validation host (not by the
|
||||||
/// worker).
|
/// worker).
|
||||||
#[codec(index = 7)]
|
#[codec(index = 7)]
|
||||||
|
#[error("prepare: error renaming tmp file ({src:?} -> {dest:?}): {err}")]
|
||||||
RenameTmpFile {
|
RenameTmpFile {
|
||||||
err: String,
|
err: String,
|
||||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||||
@@ -70,17 +77,21 @@ pub enum PrepareError {
|
|||||||
},
|
},
|
||||||
/// Memory limit reached
|
/// Memory limit reached
|
||||||
#[codec(index = 8)]
|
#[codec(index = 8)]
|
||||||
|
#[error("prepare: out of memory")]
|
||||||
OutOfMemory,
|
OutOfMemory,
|
||||||
/// The response from the worker is received, but the worker cache could not be cleared. The
|
/// The response from the worker is received, but the worker cache could not be cleared. The
|
||||||
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
|
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
|
||||||
/// reported by the validation host (not by the worker).
|
/// reported by the validation host (not by the worker).
|
||||||
#[codec(index = 9)]
|
#[codec(index = 9)]
|
||||||
|
#[error("prepare: error clearing worker cache: {0}")]
|
||||||
ClearWorkerDir(String),
|
ClearWorkerDir(String),
|
||||||
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
|
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
|
||||||
JobDied { err: String, job_pid: i32 },
|
|
||||||
#[codec(index = 10)]
|
#[codec(index = 10)]
|
||||||
|
#[error("prepare: prepare job with pid {job_pid} died: {err}")]
|
||||||
|
JobDied { err: String, job_pid: i32 },
|
||||||
/// Some error occurred when interfacing with the kernel.
|
/// Some error occurred when interfacing with the kernel.
|
||||||
#[codec(index = 11)]
|
#[codec(index = 11)]
|
||||||
|
#[error("prepare: error interfacing with the kernel: {0}")]
|
||||||
Kernel(String),
|
Kernel(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,41 +120,23 @@ impl PrepareError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for PrepareError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
use PrepareError::*;
|
|
||||||
match self {
|
|
||||||
Prevalidation(err) => write!(f, "prevalidation: {}", err),
|
|
||||||
Preparation(err) => write!(f, "preparation: {}", err),
|
|
||||||
RuntimeConstruction(err) => write!(f, "runtime construction: {}", err),
|
|
||||||
JobError(err) => write!(f, "panic: {}", err),
|
|
||||||
TimedOut => write!(f, "prepare: timeout"),
|
|
||||||
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
|
|
||||||
JobDied { err, job_pid } =>
|
|
||||||
write!(f, "prepare: prepare job with pid {job_pid} died: {err}"),
|
|
||||||
CreateTmpFile(err) => write!(f, "prepare: error creating tmp file: {}", err),
|
|
||||||
RenameTmpFile { err, src, dest } =>
|
|
||||||
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
|
|
||||||
OutOfMemory => write!(f, "prepare: out of memory"),
|
|
||||||
ClearWorkerDir(err) => write!(f, "prepare: error clearing worker cache: {}", err),
|
|
||||||
Kernel(err) => write!(f, "prepare: error interfacing with the kernel: {}", err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Some internal error occurred.
|
/// Some internal error occurred.
|
||||||
///
|
///
|
||||||
/// Should only ever be used for validation errors independent of the candidate and PVF, or for
|
/// Should only ever be used for validation errors independent of the candidate and PVF, or for
|
||||||
/// errors we ruled out during pre-checking (so preparation errors are fine).
|
/// errors we ruled out during pre-checking (so preparation errors are fine).
|
||||||
#[derive(Debug, Clone, Encode, Decode)]
|
#[derive(thiserror::Error, Debug, Clone, Encode, Decode)]
|
||||||
pub enum InternalValidationError {
|
pub enum InternalValidationError {
|
||||||
/// Some communication error occurred with the host.
|
/// Some communication error occurred with the host.
|
||||||
|
#[error("validation: some communication error occurred with the host: {0}")]
|
||||||
HostCommunication(String),
|
HostCommunication(String),
|
||||||
/// Host could not create a hard link to the artifact path.
|
/// Host could not create a hard link to the artifact path.
|
||||||
|
#[error("validation: host could not create a hard link to the artifact path: {0}")]
|
||||||
CouldNotCreateLink(String),
|
CouldNotCreateLink(String),
|
||||||
/// Could not find or open compiled artifact file.
|
/// Could not find or open compiled artifact file.
|
||||||
|
#[error("validation: could not find or open compiled artifact file: {0}")]
|
||||||
CouldNotOpenFile(String),
|
CouldNotOpenFile(String),
|
||||||
/// Host could not clear the worker cache after a job.
|
/// Host could not clear the worker cache after a job.
|
||||||
|
#[error("validation: host could not clear the worker cache ({path:?}) after a job: {err}")]
|
||||||
CouldNotClearWorkerDir {
|
CouldNotClearWorkerDir {
|
||||||
err: String,
|
err: String,
|
||||||
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
|
||||||
@@ -151,32 +144,9 @@ pub enum InternalValidationError {
|
|||||||
path: Option<String>,
|
path: Option<String>,
|
||||||
},
|
},
|
||||||
/// Some error occurred when interfacing with the kernel.
|
/// Some error occurred when interfacing with the kernel.
|
||||||
|
#[error("validation: error interfacing with the kernel: {0}")]
|
||||||
Kernel(String),
|
Kernel(String),
|
||||||
|
|
||||||
/// Some non-deterministic preparation error occurred.
|
/// Some non-deterministic preparation error occurred.
|
||||||
|
#[error("validation: prepare: {0}")]
|
||||||
NonDeterministicPrepareError(PrepareError),
|
NonDeterministicPrepareError(PrepareError),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for InternalValidationError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
use InternalValidationError::*;
|
|
||||||
match self {
|
|
||||||
HostCommunication(err) =>
|
|
||||||
write!(f, "validation: some communication error occurred with the host: {}", err),
|
|
||||||
CouldNotCreateLink(err) => write!(
|
|
||||||
f,
|
|
||||||
"validation: host could not create a hard link to the artifact path: {}",
|
|
||||||
err
|
|
||||||
),
|
|
||||||
CouldNotOpenFile(err) =>
|
|
||||||
write!(f, "validation: could not find or open compiled artifact file: {}", err),
|
|
||||||
CouldNotClearWorkerDir { err, path } => write!(
|
|
||||||
f,
|
|
||||||
"validation: host could not clear the worker cache ({:?}) after a job: {}",
|
|
||||||
path, err
|
|
||||||
),
|
|
||||||
Kernel(err) => write!(f, "validation: error interfacing with the kernel: {}", err),
|
|
||||||
NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -485,7 +485,7 @@ fn recv_child_response(received_data: &mut io::BufReader<&[u8]>) -> io::Result<J
|
|||||||
JobResult::decode(&mut response_bytes.as_slice()).map_err(|e| {
|
JobResult::decode(&mut response_bytes.as_slice()).map_err(|e| {
|
||||||
io::Error::new(
|
io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("execute pvf recv_child_response: decode error: {:?}", e),
|
format!("execute pvf recv_child_response: decode error: {}", e),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
use polkadot_node_core_pvf_common::error::{InternalValidationError, PrepareError};
|
use polkadot_node_core_pvf_common::error::{InternalValidationError, PrepareError};
|
||||||
|
|
||||||
/// A error raised during validation of the candidate.
|
/// A error raised during validation of the candidate.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(thiserror::Error, Debug, Clone)]
|
||||||
pub enum ValidationError {
|
pub enum ValidationError {
|
||||||
/// Deterministic preparation issue. In practice, most of the problems should be caught by
|
/// Deterministic preparation issue. In practice, most of the problems should be caught by
|
||||||
/// prechecking, so this may be a sign of internal conditions.
|
/// prechecking, so this may be a sign of internal conditions.
|
||||||
@@ -27,35 +27,42 @@ pub enum ValidationError {
|
|||||||
/// pre-checking enabled only valid runtimes should ever get enacted, so we can be
|
/// pre-checking enabled only valid runtimes should ever get enacted, so we can be
|
||||||
/// reasonably sure that this is some local problem on the current node. However, as this
|
/// reasonably sure that this is some local problem on the current node. However, as this
|
||||||
/// particular error *seems* to indicate a deterministic error, we raise a warning.
|
/// particular error *seems* to indicate a deterministic error, we raise a warning.
|
||||||
|
#[error("candidate validation: {0}")]
|
||||||
Preparation(PrepareError),
|
Preparation(PrepareError),
|
||||||
/// The error was raised because the candidate is invalid. Should vote against.
|
/// The error was raised because the candidate is invalid. Should vote against.
|
||||||
Invalid(InvalidCandidate),
|
#[error("candidate validation: {0}")]
|
||||||
|
Invalid(#[from] InvalidCandidate),
|
||||||
/// Possibly transient issue that may resolve after retries. Should vote against when retries
|
/// Possibly transient issue that may resolve after retries. Should vote against when retries
|
||||||
/// fail.
|
/// fail.
|
||||||
PossiblyInvalid(PossiblyInvalidError),
|
#[error("candidate validation: {0}")]
|
||||||
|
PossiblyInvalid(#[from] PossiblyInvalidError),
|
||||||
/// Preparation or execution issue caused by an internal condition. Should not vote against.
|
/// Preparation or execution issue caused by an internal condition. Should not vote against.
|
||||||
Internal(InternalValidationError),
|
#[error("candidate validation: internal: {0}")]
|
||||||
|
Internal(#[from] InternalValidationError),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A description of an error raised during executing a PVF and can be attributed to the combination
|
/// A description of an error raised during executing a PVF and can be attributed to the combination
|
||||||
/// of the candidate [`polkadot_parachain_primitives::primitives::ValidationParams`] and the PVF.
|
/// of the candidate [`polkadot_parachain_primitives::primitives::ValidationParams`] and the PVF.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(thiserror::Error, Debug, Clone)]
|
||||||
pub enum InvalidCandidate {
|
pub enum InvalidCandidate {
|
||||||
/// The candidate is reported to be invalid by the execution worker. The string contains the
|
/// The candidate is reported to be invalid by the execution worker. The string contains the
|
||||||
/// error message.
|
/// error message.
|
||||||
|
#[error("invalid: worker reported: {0}")]
|
||||||
WorkerReportedInvalid(String),
|
WorkerReportedInvalid(String),
|
||||||
/// PVF execution (compilation is not included) took more time than was allotted.
|
/// PVF execution (compilation is not included) took more time than was allotted.
|
||||||
|
#[error("invalid: hard timeout")]
|
||||||
HardTimeout,
|
HardTimeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Possibly transient issue that may resolve after retries.
|
/// Possibly transient issue that may resolve after retries.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(thiserror::Error, Debug, Clone)]
|
||||||
pub enum PossiblyInvalidError {
|
pub enum PossiblyInvalidError {
|
||||||
/// The worker process (not the job) has died during validation of a candidate.
|
/// The worker process (not the job) has died during validation of a candidate.
|
||||||
///
|
///
|
||||||
/// It's unlikely that this is caused by malicious code since workers spawn separate job
|
/// It's unlikely that this is caused by malicious code since workers spawn separate job
|
||||||
/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
|
/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
|
||||||
/// case, and if the error persists, we assume it's caused by the candidate and vote against.
|
/// case, and if the error persists, we assume it's caused by the candidate and vote against.
|
||||||
|
#[error("possibly invalid: ambiguous worker death")]
|
||||||
AmbiguousWorkerDeath,
|
AmbiguousWorkerDeath,
|
||||||
/// The job process (not the worker) has died for one of the following reasons:
|
/// The job process (not the worker) has died for one of the following reasons:
|
||||||
///
|
///
|
||||||
@@ -69,6 +76,7 @@ pub enum PossiblyInvalidError {
|
|||||||
/// (c) Some other reason, perhaps transient or perhaps caused by malicious code.
|
/// (c) Some other reason, perhaps transient or perhaps caused by malicious code.
|
||||||
///
|
///
|
||||||
/// We cannot treat this as an internal error because malicious code may have caused this.
|
/// We cannot treat this as an internal error because malicious code may have caused this.
|
||||||
|
#[error("possibly invalid: ambiguous job death: {0}")]
|
||||||
AmbiguousJobDeath(String),
|
AmbiguousJobDeath(String),
|
||||||
/// An unexpected error occurred in the job process and we can't be sure whether the candidate
|
/// An unexpected error occurred in the job process and we can't be sure whether the candidate
|
||||||
/// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never
|
/// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never
|
||||||
@@ -76,15 +84,10 @@ pub enum PossiblyInvalidError {
|
|||||||
/// issue was due to the candidate, then all validators would abstain, stalling finality on the
|
/// issue was due to the candidate, then all validators would abstain, stalling finality on the
|
||||||
/// chain. So we will first retry the candidate, and if the issue persists we are forced to
|
/// chain. So we will first retry the candidate, and if the issue persists we are forced to
|
||||||
/// vote invalid.
|
/// vote invalid.
|
||||||
|
#[error("possibly invalid: job error: {0}")]
|
||||||
JobError(String),
|
JobError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<InternalValidationError> for ValidationError {
|
|
||||||
fn from(error: InternalValidationError) -> Self {
|
|
||||||
Self::Internal(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<PrepareError> for ValidationError {
|
impl From<PrepareError> for ValidationError {
|
||||||
fn from(error: PrepareError) -> Self {
|
fn from(error: PrepareError) -> Self {
|
||||||
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
|
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
|
||||||
|
|||||||
@@ -372,7 +372,7 @@ fn handle_job_finish(
|
|||||||
?artifact_id,
|
?artifact_id,
|
||||||
?worker,
|
?worker,
|
||||||
worker_rip = idle_worker.is_none(),
|
worker_rip = idle_worker.is_none(),
|
||||||
"execution worker concluded, error occurred: {:?}",
|
"execution worker concluded, error occurred: {}",
|
||||||
err
|
err
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -486,7 +486,8 @@ async fn handle_precheck_pvf(
|
|||||||
///
|
///
|
||||||
/// If the prepare job failed previously, we may retry it under certain conditions.
|
/// If the prepare job failed previously, we may retry it under certain conditions.
|
||||||
///
|
///
|
||||||
/// When preparing for execution, we use a more lenient timeout ([`LENIENT_PREPARATION_TIMEOUT`])
|
/// When preparing for execution, we use a more lenient timeout
|
||||||
|
/// ([`DEFAULT_LENIENT_PREPARATION_TIMEOUT`](polkadot_primitives::executor_params::DEFAULT_LENIENT_PREPARATION_TIMEOUT))
|
||||||
/// than when prechecking.
|
/// than when prechecking.
|
||||||
async fn handle_execute_pvf(
|
async fn handle_execute_pvf(
|
||||||
artifacts: &mut Artifacts,
|
artifacts: &mut Artifacts,
|
||||||
|
|||||||
@@ -45,8 +45,8 @@ pub struct FromQueue {
|
|||||||
/// Identifier of an artifact.
|
/// Identifier of an artifact.
|
||||||
pub(crate) artifact_id: ArtifactId,
|
pub(crate) artifact_id: ArtifactId,
|
||||||
/// Outcome of the PVF processing. [`Ok`] indicates that compiled artifact
|
/// Outcome of the PVF processing. [`Ok`] indicates that compiled artifact
|
||||||
/// is successfully stored on disk. Otherwise, an [error](crate::error::PrepareError)
|
/// is successfully stored on disk. Otherwise, an
|
||||||
/// is supplied.
|
/// [error](polkadot_node_core_pvf_common::error::PrepareError) is supplied.
|
||||||
pub(crate) result: PrepareResult,
|
pub(crate) result: PrepareResult,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ pub async fn start_work(
|
|||||||
gum::warn!(
|
gum::warn!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
worker_pid = %pid,
|
worker_pid = %pid,
|
||||||
"failed to recv a prepare response: {:?}",
|
"failed to recv a prepare response: {}",
|
||||||
err,
|
err,
|
||||||
);
|
);
|
||||||
Outcome::IoErr(err.to_string())
|
Outcome::IoErr(err.to_string())
|
||||||
|
|||||||
Reference in New Issue
Block a user