mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-31 13:21:01 +00:00
Refactor ValidationError (#2406)
This commit is contained in:
@@ -24,8 +24,8 @@
|
|||||||
#![warn(missing_docs)]
|
#![warn(missing_docs)]
|
||||||
|
|
||||||
use polkadot_node_core_pvf::{
|
use polkadot_node_core_pvf::{
|
||||||
InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PrepareError,
|
InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PossiblyInvalidError,
|
||||||
PrepareJobKind, PvfPrepData, ValidationError, ValidationHost,
|
PrepareError, PrepareJobKind, PvfPrepData, ValidationError, ValidationHost,
|
||||||
};
|
};
|
||||||
use polkadot_node_primitives::{
|
use polkadot_node_primitives::{
|
||||||
BlockData, InvalidCandidate, PoV, ValidationResult, POV_BOMB_LIMIT, VALIDATION_CODE_BOMB_LIMIT,
|
BlockData, InvalidCandidate, PoV, ValidationResult, POV_BOMB_LIMIT, VALIDATION_CODE_BOMB_LIMIT,
|
||||||
@@ -642,7 +642,7 @@ async fn validate_candidate_exhaustive(
|
|||||||
}
|
}
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Err(ValidationError::InternalError(e)) => {
|
Err(ValidationError::Internal(e)) => {
|
||||||
gum::warn!(
|
gum::warn!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
?para_id,
|
?para_id,
|
||||||
@@ -651,34 +651,29 @@ async fn validate_candidate_exhaustive(
|
|||||||
);
|
);
|
||||||
Err(ValidationFailed(e.to_string()))
|
Err(ValidationFailed(e.to_string()))
|
||||||
},
|
},
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout)) =>
|
Err(ValidationError::Invalid(WasmInvalidCandidate::HardTimeout)) =>
|
||||||
Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)),
|
Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)),
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::WorkerReportedInvalid(e))) =>
|
Err(ValidationError::Invalid(WasmInvalidCandidate::WorkerReportedInvalid(e))) =>
|
||||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e))),
|
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e))),
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)) =>
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)) =>
|
||||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(
|
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(
|
||||||
"ambiguous worker death".to_string(),
|
"ambiguous worker death".to_string(),
|
||||||
))),
|
))),
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError(err))) =>
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(err))) =>
|
||||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(err))),
|
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(err))),
|
||||||
|
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousJobDeath(err))) =>
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err))) =>
|
||||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(format!(
|
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(format!(
|
||||||
"ambiguous job death: {err}"
|
"ambiguous job death: {err}"
|
||||||
)))),
|
)))),
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::PrepareError(e))) => {
|
Err(ValidationError::Preparation(e)) => {
|
||||||
// In principle if preparation of the `WASM` fails, the current candidate can not be the
|
|
||||||
// reason for that. So we can't say whether it is invalid or not. In addition, with
|
|
||||||
// pre-checking enabled only valid runtimes should ever get enacted, so we can be
|
|
||||||
// reasonably sure that this is some local problem on the current node. However, as this
|
|
||||||
// particular error *seems* to indicate a deterministic error, we raise a warning.
|
|
||||||
gum::warn!(
|
gum::warn!(
|
||||||
target: LOG_TARGET,
|
target: LOG_TARGET,
|
||||||
?para_id,
|
?para_id,
|
||||||
?e,
|
?e,
|
||||||
"Deterministic error occurred during preparation (should have been ruled out by pre-checking phase)",
|
"Deterministic error occurred during preparation (should have been ruled out by pre-checking phase)",
|
||||||
);
|
);
|
||||||
Err(ValidationFailed(e))
|
Err(ValidationFailed(e.to_string()))
|
||||||
},
|
},
|
||||||
Ok(res) =>
|
Ok(res) =>
|
||||||
if res.head_data.hash() != candidate_receipt.descriptor.para_head {
|
if res.head_data.hash() != candidate_receipt.descriptor.para_head {
|
||||||
@@ -762,16 +757,31 @@ trait ValidationBackend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
match validation_result {
|
match validation_result {
|
||||||
Err(ValidationError::InvalidCandidate(
|
Err(ValidationError::PossiblyInvalid(
|
||||||
WasmInvalidCandidate::AmbiguousWorkerDeath |
|
PossiblyInvalidError::AmbiguousWorkerDeath |
|
||||||
WasmInvalidCandidate::AmbiguousJobDeath(_),
|
PossiblyInvalidError::AmbiguousJobDeath(_),
|
||||||
)) if num_death_retries_left > 0 => num_death_retries_left -= 1,
|
)) =>
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError(_)))
|
if num_death_retries_left > 0 {
|
||||||
if num_job_error_retries_left > 0 =>
|
num_death_retries_left -= 1;
|
||||||
num_job_error_retries_left -= 1,
|
} else {
|
||||||
Err(ValidationError::InternalError(_)) if num_internal_retries_left > 0 =>
|
break;
|
||||||
num_internal_retries_left -= 1,
|
},
|
||||||
_ => break,
|
|
||||||
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(_))) =>
|
||||||
|
if num_job_error_retries_left > 0 {
|
||||||
|
num_job_error_retries_left -= 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
Err(ValidationError::Internal(_)) =>
|
||||||
|
if num_internal_retries_left > 0 {
|
||||||
|
num_internal_retries_left -= 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
Ok(_) | Err(ValidationError::Invalid(_) | ValidationError::Preparation(_)) => break,
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we got a possibly transient error, retry once after a brief delay, on the
|
// If we got a possibly transient error, retry once after a brief delay, on the
|
||||||
|
|||||||
@@ -480,9 +480,9 @@ fn candidate_validation_bad_return_is_invalid() {
|
|||||||
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
|
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
|
||||||
|
|
||||||
let v = executor::block_on(validate_candidate_exhaustive(
|
let v = executor::block_on(validate_candidate_exhaustive(
|
||||||
MockValidateCandidateBackend::with_hardcoded_result(Err(
|
MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::Invalid(
|
||||||
ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout),
|
WasmInvalidCandidate::HardTimeout,
|
||||||
)),
|
))),
|
||||||
validation_data,
|
validation_data,
|
||||||
validation_code,
|
validation_code,
|
||||||
candidate_receipt,
|
candidate_receipt,
|
||||||
@@ -561,7 +561,7 @@ fn candidate_validation_one_ambiguous_error_is_valid() {
|
|||||||
|
|
||||||
let v = executor::block_on(validate_candidate_exhaustive(
|
let v = executor::block_on(validate_candidate_exhaustive(
|
||||||
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
|
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
Ok(validation_result),
|
Ok(validation_result),
|
||||||
]),
|
]),
|
||||||
validation_data.clone(),
|
validation_data.clone(),
|
||||||
@@ -602,8 +602,8 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
|
|||||||
|
|
||||||
let v = executor::block_on(validate_candidate_exhaustive(
|
let v = executor::block_on(validate_candidate_exhaustive(
|
||||||
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
|
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
]),
|
]),
|
||||||
validation_data,
|
validation_data,
|
||||||
validation_code,
|
validation_code,
|
||||||
@@ -626,7 +626,7 @@ fn candidate_validation_retry_internal_errors() {
|
|||||||
vec![
|
vec![
|
||||||
Err(InternalValidationError::HostCommunication("foo".into()).into()),
|
Err(InternalValidationError::HostCommunication("foo".into()).into()),
|
||||||
// Throw an AJD error, we should still retry again.
|
// Throw an AJD error, we should still retry again.
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousJobDeath(
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(
|
||||||
"baz".into(),
|
"baz".into(),
|
||||||
))),
|
))),
|
||||||
// Throw another internal error.
|
// Throw another internal error.
|
||||||
@@ -644,7 +644,7 @@ fn candidate_validation_dont_retry_internal_errors() {
|
|||||||
vec![
|
vec![
|
||||||
Err(InternalValidationError::HostCommunication("foo".into()).into()),
|
Err(InternalValidationError::HostCommunication("foo".into()).into()),
|
||||||
// Throw an AWD error, we should still retry again.
|
// Throw an AWD error, we should still retry again.
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
// Throw another internal error.
|
// Throw another internal error.
|
||||||
Err(InternalValidationError::HostCommunication("bar".into()).into()),
|
Err(InternalValidationError::HostCommunication("bar".into()).into()),
|
||||||
],
|
],
|
||||||
@@ -659,11 +659,11 @@ fn candidate_validation_retry_panic_errors() {
|
|||||||
let v = candidate_validation_retry_on_error_helper(
|
let v = candidate_validation_retry_on_error_helper(
|
||||||
PvfExecKind::Approval,
|
PvfExecKind::Approval,
|
||||||
vec![
|
vec![
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError("foo".into()))),
|
||||||
// Throw an AWD error, we should still retry again.
|
// Throw an AWD error, we should still retry again.
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
// Throw another panic error.
|
// Throw another panic error.
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError("bar".into()))),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -676,11 +676,11 @@ fn candidate_validation_dont_retry_panic_errors() {
|
|||||||
let v = candidate_validation_retry_on_error_helper(
|
let v = candidate_validation_retry_on_error_helper(
|
||||||
PvfExecKind::Backing,
|
PvfExecKind::Backing,
|
||||||
vec![
|
vec![
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError("foo".into()))),
|
||||||
// Throw an AWD error, we should still retry again.
|
// Throw an AWD error, we should still retry again.
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
// Throw another panic error.
|
// Throw another panic error.
|
||||||
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError("bar".into()))),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -758,9 +758,9 @@ fn candidate_validation_timeout_is_internal_error() {
|
|||||||
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
|
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
|
||||||
|
|
||||||
let v = executor::block_on(validate_candidate_exhaustive(
|
let v = executor::block_on(validate_candidate_exhaustive(
|
||||||
MockValidateCandidateBackend::with_hardcoded_result(Err(
|
MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::Invalid(
|
||||||
ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout),
|
WasmInvalidCandidate::HardTimeout,
|
||||||
)),
|
))),
|
||||||
validation_data,
|
validation_data,
|
||||||
validation_code,
|
validation_code,
|
||||||
candidate_receipt,
|
candidate_receipt,
|
||||||
@@ -852,9 +852,9 @@ fn candidate_validation_code_mismatch_is_invalid() {
|
|||||||
let (_ctx, _ctx_handle) = test_helpers::make_subsystem_context::<AllMessages, _>(pool.clone());
|
let (_ctx, _ctx_handle) = test_helpers::make_subsystem_context::<AllMessages, _>(pool.clone());
|
||||||
|
|
||||||
let v = executor::block_on(validate_candidate_exhaustive(
|
let v = executor::block_on(validate_candidate_exhaustive(
|
||||||
MockValidateCandidateBackend::with_hardcoded_result(Err(
|
MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::Invalid(
|
||||||
ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout),
|
WasmInvalidCandidate::HardTimeout,
|
||||||
)),
|
))),
|
||||||
validation_data,
|
validation_data,
|
||||||
validation_code,
|
validation_code,
|
||||||
candidate_receipt,
|
candidate_receipt,
|
||||||
|
|||||||
@@ -18,8 +18,7 @@
|
|||||||
|
|
||||||
use criterion::{criterion_group, criterion_main, BatchSize, Criterion, SamplingMode};
|
use criterion::{criterion_group, criterion_main, BatchSize, Criterion, SamplingMode};
|
||||||
use polkadot_node_core_pvf::{
|
use polkadot_node_core_pvf::{
|
||||||
start, testing, Config, Metrics, PrepareError, PrepareJobKind, PrepareStats, PvfPrepData,
|
start, testing, Config, Metrics, PrepareError, PrepareJobKind, PvfPrepData, ValidationHost,
|
||||||
ValidationHost,
|
|
||||||
};
|
};
|
||||||
use polkadot_primitives::ExecutorParams;
|
use polkadot_primitives::ExecutorParams;
|
||||||
use rococo_runtime::WASM_BINARY;
|
use rococo_runtime::WASM_BINARY;
|
||||||
|
|||||||
@@ -140,8 +140,7 @@ pub unsafe fn create_runtime_from_artifact_bytes(
|
|||||||
executor_params: &ExecutorParams,
|
executor_params: &ExecutorParams,
|
||||||
) -> Result<WasmtimeRuntime, WasmError> {
|
) -> Result<WasmtimeRuntime, WasmError> {
|
||||||
let mut config = DEFAULT_CONFIG.clone();
|
let mut config = DEFAULT_CONFIG.clone();
|
||||||
config.semantics =
|
config.semantics = params_to_wasmtime_semantics(executor_params);
|
||||||
params_to_wasmtime_semantics(executor_params).map_err(|err| WasmError::Other(err))?;
|
|
||||||
|
|
||||||
sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>(
|
sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>(
|
||||||
compiled_artifact_blob,
|
compiled_artifact_blob,
|
||||||
@@ -149,13 +148,12 @@ pub unsafe fn create_runtime_from_artifact_bytes(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, String> {
|
pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Semantics {
|
||||||
let mut sem = DEFAULT_CONFIG.semantics.clone();
|
let mut sem = DEFAULT_CONFIG.semantics.clone();
|
||||||
let mut stack_limit = if let Some(stack_limit) = sem.deterministic_stack_limit.clone() {
|
let mut stack_limit = sem
|
||||||
stack_limit
|
.deterministic_stack_limit
|
||||||
} else {
|
.expect("There is a comment to not change the default stack limit; it should always be available; qed")
|
||||||
return Err("No default stack limit set".to_owned())
|
.clone();
|
||||||
};
|
|
||||||
|
|
||||||
for p in par.iter() {
|
for p in par.iter() {
|
||||||
match p {
|
match p {
|
||||||
@@ -172,7 +170,7 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, S
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
sem.deterministic_stack_limit = Some(stack_limit);
|
sem.deterministic_stack_limit = Some(stack_limit);
|
||||||
Ok(sem)
|
sem
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
|
/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
|
||||||
@@ -191,8 +189,7 @@ pub fn prepare(
|
|||||||
blob: RuntimeBlob,
|
blob: RuntimeBlob,
|
||||||
executor_params: &ExecutorParams,
|
executor_params: &ExecutorParams,
|
||||||
) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
|
) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
|
||||||
let semantics = params_to_wasmtime_semantics(executor_params)
|
let semantics = params_to_wasmtime_semantics(executor_params);
|
||||||
.map_err(|e| sc_executor_common::error::WasmError::Other(e))?;
|
|
||||||
sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics)
|
sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,31 +19,44 @@ use polkadot_node_core_pvf_common::error::{InternalValidationError, PrepareError
|
|||||||
/// A error raised during validation of the candidate.
|
/// A error raised during validation of the candidate.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ValidationError {
|
pub enum ValidationError {
|
||||||
/// The error was raised because the candidate is invalid.
|
/// Deterministic preparation issue. In practice, most of the problems should be caught by
|
||||||
|
/// prechecking, so this may be a sign of internal conditions.
|
||||||
///
|
///
|
||||||
/// Whenever we are unsure if the error was due to the candidate or not, we must vote invalid.
|
/// In principle if preparation of the `WASM` fails, the current candidate cannot be the
|
||||||
InvalidCandidate(InvalidCandidate),
|
/// reason for that. So we can't say whether it is invalid or not. In addition, with
|
||||||
/// Some internal error occurred.
|
/// pre-checking enabled only valid runtimes should ever get enacted, so we can be
|
||||||
InternalError(InternalValidationError),
|
/// reasonably sure that this is some local problem on the current node. However, as this
|
||||||
|
/// particular error *seems* to indicate a deterministic error, we raise a warning.
|
||||||
|
Preparation(PrepareError),
|
||||||
|
/// The error was raised because the candidate is invalid. Should vote against.
|
||||||
|
Invalid(InvalidCandidate),
|
||||||
|
/// Possibly transient issue that may resolve after retries. Should vote against when retries
|
||||||
|
/// fail.
|
||||||
|
PossiblyInvalid(PossiblyInvalidError),
|
||||||
|
/// Preparation or execution issue caused by an internal condition. Should not vote against.
|
||||||
|
Internal(InternalValidationError),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A description of an error raised during executing a PVF and can be attributed to the combination
|
/// A description of an error raised during executing a PVF and can be attributed to the combination
|
||||||
/// of the candidate [`polkadot_parachain_primitives::primitives::ValidationParams`] and the PVF.
|
/// of the candidate [`polkadot_parachain_primitives::primitives::ValidationParams`] and the PVF.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum InvalidCandidate {
|
pub enum InvalidCandidate {
|
||||||
/// PVF preparation ended up with a deterministic error.
|
|
||||||
PrepareError(String),
|
|
||||||
/// The candidate is reported to be invalid by the execution worker. The string contains the
|
/// The candidate is reported to be invalid by the execution worker. The string contains the
|
||||||
/// error message.
|
/// error message.
|
||||||
WorkerReportedInvalid(String),
|
WorkerReportedInvalid(String),
|
||||||
|
/// PVF execution (compilation is not included) took more time than was allotted.
|
||||||
|
HardTimeout,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Possibly transient issue that may resolve after retries.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum PossiblyInvalidError {
|
||||||
/// The worker process (not the job) has died during validation of a candidate.
|
/// The worker process (not the job) has died during validation of a candidate.
|
||||||
///
|
///
|
||||||
/// It's unlikely that this is caused by malicious code since workers spawn separate job
|
/// It's unlikely that this is caused by malicious code since workers spawn separate job
|
||||||
/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
|
/// processes, and those job processes are sandboxed. But, it is possible. We retry in this
|
||||||
/// case, and if the error persists, we assume it's caused by the candidate and vote against.
|
/// case, and if the error persists, we assume it's caused by the candidate and vote against.
|
||||||
AmbiguousWorkerDeath,
|
AmbiguousWorkerDeath,
|
||||||
/// PVF execution (compilation is not included) took more time than was allotted.
|
|
||||||
HardTimeout,
|
|
||||||
/// The job process (not the worker) has died for one of the following reasons:
|
/// The job process (not the worker) has died for one of the following reasons:
|
||||||
///
|
///
|
||||||
/// (a) A seccomp violation occurred, most likely due to an attempt by malicious code to
|
/// (a) A seccomp violation occurred, most likely due to an attempt by malicious code to
|
||||||
@@ -68,7 +81,7 @@ pub enum InvalidCandidate {
|
|||||||
|
|
||||||
impl From<InternalValidationError> for ValidationError {
|
impl From<InternalValidationError> for ValidationError {
|
||||||
fn from(error: InternalValidationError) -> Self {
|
fn from(error: InternalValidationError) -> Self {
|
||||||
Self::InternalError(error)
|
Self::Internal(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,9 +90,9 @@ impl From<PrepareError> for ValidationError {
|
|||||||
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
|
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
|
||||||
// See [`PrepareError::is_deterministic`].
|
// See [`PrepareError::is_deterministic`].
|
||||||
if error.is_deterministic() {
|
if error.is_deterministic() {
|
||||||
Self::InvalidCandidate(InvalidCandidate::PrepareError(error.to_string()))
|
Self::Preparation(error)
|
||||||
} else {
|
} else {
|
||||||
Self::InternalError(InternalValidationError::NonDeterministicPrepareError(error))
|
Self::Internal(InternalValidationError::NonDeterministicPrepareError(error))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ use crate::{
|
|||||||
host::ResultSender,
|
host::ResultSender,
|
||||||
metrics::Metrics,
|
metrics::Metrics,
|
||||||
worker_intf::{IdleWorker, WorkerHandle},
|
worker_intf::{IdleWorker, WorkerHandle},
|
||||||
InvalidCandidate, ValidationError, LOG_TARGET,
|
InvalidCandidate, PossiblyInvalidError, ValidationError, LOG_TARGET,
|
||||||
};
|
};
|
||||||
use futures::{
|
use futures::{
|
||||||
channel::mpsc,
|
channel::mpsc,
|
||||||
@@ -342,27 +342,27 @@ fn handle_job_finish(
|
|||||||
},
|
},
|
||||||
Outcome::InvalidCandidate { err, idle_worker } => (
|
Outcome::InvalidCandidate { err, idle_worker } => (
|
||||||
Some(idle_worker),
|
Some(idle_worker),
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedInvalid(err))),
|
Err(ValidationError::Invalid(InvalidCandidate::WorkerReportedInvalid(err))),
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
Outcome::InternalError { err } => (None, Err(ValidationError::InternalError(err)), None),
|
Outcome::InternalError { err } => (None, Err(ValidationError::Internal(err)), None),
|
||||||
// Either the worker or the job timed out. Kill the worker in either case. Treated as
|
// Either the worker or the job timed out. Kill the worker in either case. Treated as
|
||||||
// definitely-invalid, because if we timed out, there's no time left for a retry.
|
// definitely-invalid, because if we timed out, there's no time left for a retry.
|
||||||
Outcome::HardTimeout =>
|
Outcome::HardTimeout =>
|
||||||
(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)), None),
|
(None, Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)), None),
|
||||||
// "Maybe invalid" errors (will retry).
|
// "Maybe invalid" errors (will retry).
|
||||||
Outcome::WorkerIntfErr => (
|
Outcome::WorkerIntfErr => (
|
||||||
None,
|
None,
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)),
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
Outcome::JobDied { err } => (
|
Outcome::JobDied { err } => (
|
||||||
None,
|
None,
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousJobDeath(err))),
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err))),
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
Outcome::JobError { err } =>
|
Outcome::JobError { err } =>
|
||||||
(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::JobError(err))), None),
|
(None, Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(err))), None),
|
||||||
};
|
};
|
||||||
|
|
||||||
queue.metrics.execute_finished();
|
queue.metrics.execute_finished();
|
||||||
|
|||||||
@@ -873,7 +873,7 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream<Item = ()>
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::InvalidCandidate;
|
use crate::PossiblyInvalidError;
|
||||||
use assert_matches::assert_matches;
|
use assert_matches::assert_matches;
|
||||||
use futures::future::BoxFuture;
|
use futures::future::BoxFuture;
|
||||||
use polkadot_node_core_pvf_common::{
|
use polkadot_node_core_pvf_common::{
|
||||||
@@ -1211,27 +1211,27 @@ pub(crate) mod tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
result_tx_pvf_1_1
|
result_tx_pvf_1_1
|
||||||
.send(Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)))
|
.send(Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result_rx_pvf_1_1.now_or_never().unwrap().unwrap(),
|
result_rx_pvf_1_1.now_or_never().unwrap().unwrap(),
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath))
|
||||||
);
|
);
|
||||||
|
|
||||||
result_tx_pvf_1_2
|
result_tx_pvf_1_2
|
||||||
.send(Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)))
|
.send(Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result_rx_pvf_1_2.now_or_never().unwrap().unwrap(),
|
result_rx_pvf_1_2.now_or_never().unwrap().unwrap(),
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath))
|
||||||
);
|
);
|
||||||
|
|
||||||
result_tx_pvf_2
|
result_tx_pvf_2
|
||||||
.send(Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)))
|
.send(Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result_rx_pvf_2.now_or_never().unwrap().unwrap(),
|
result_rx_pvf_2.now_or_never().unwrap().unwrap(),
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1337,7 +1337,7 @@ pub(crate) mod tests {
|
|||||||
assert_matches!(result_rx.now_or_never().unwrap().unwrap(), Err(PrepareError::TimedOut));
|
assert_matches!(result_rx.now_or_never().unwrap().unwrap(), Err(PrepareError::TimedOut));
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result_rx_execute.now_or_never().unwrap().unwrap(),
|
result_rx_execute.now_or_never().unwrap().unwrap(),
|
||||||
Err(ValidationError::InternalError(_))
|
Err(ValidationError::Internal(_))
|
||||||
);
|
);
|
||||||
|
|
||||||
// Reversed case: first send multiple precheck requests, then ask for an execution.
|
// Reversed case: first send multiple precheck requests, then ask for an execution.
|
||||||
@@ -1479,7 +1479,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// The result should contain the error.
|
// The result should contain the error.
|
||||||
let result = test.poll_and_recv_result(result_rx).await;
|
let result = test.poll_and_recv_result(result_rx).await;
|
||||||
assert_matches!(result, Err(ValidationError::InternalError(_)));
|
assert_matches!(result, Err(ValidationError::Internal(_)));
|
||||||
|
|
||||||
// Submit another execute request. We shouldn't try to prepare again, yet.
|
// Submit another execute request. We shouldn't try to prepare again, yet.
|
||||||
let (result_tx_2, result_rx_2) = oneshot::channel();
|
let (result_tx_2, result_rx_2) = oneshot::channel();
|
||||||
@@ -1498,7 +1498,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// The result should contain the original error.
|
// The result should contain the original error.
|
||||||
let result = test.poll_and_recv_result(result_rx_2).await;
|
let result = test.poll_and_recv_result(result_rx_2).await;
|
||||||
assert_matches!(result, Err(ValidationError::InternalError(_)));
|
assert_matches!(result, Err(ValidationError::Internal(_)));
|
||||||
|
|
||||||
// Pause for enough time to reset the cooldown for this failed prepare request.
|
// Pause for enough time to reset the cooldown for this failed prepare request.
|
||||||
futures_timer::Delay::new(PREPARE_FAILURE_COOLDOWN).await;
|
futures_timer::Delay::new(PREPARE_FAILURE_COOLDOWN).await;
|
||||||
@@ -1538,11 +1538,11 @@ pub(crate) mod tests {
|
|||||||
// Send an error for the execution here, just so we can check the result receiver is still
|
// Send an error for the execution here, just so we can check the result receiver is still
|
||||||
// alive.
|
// alive.
|
||||||
result_tx_3
|
result_tx_3
|
||||||
.send(Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)))
|
.send(Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result_rx_3.now_or_never().unwrap().unwrap(),
|
result_rx_3.now_or_never().unwrap().unwrap(),
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1581,10 +1581,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// The result should contain the error.
|
// The result should contain the error.
|
||||||
let result = test.poll_and_recv_result(result_rx).await;
|
let result = test.poll_and_recv_result(result_rx).await;
|
||||||
assert_matches!(
|
assert_matches!(result, Err(ValidationError::Preparation(_)));
|
||||||
result,
|
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::PrepareError(_)))
|
|
||||||
);
|
|
||||||
|
|
||||||
// Submit another execute request.
|
// Submit another execute request.
|
||||||
let (result_tx_2, result_rx_2) = oneshot::channel();
|
let (result_tx_2, result_rx_2) = oneshot::channel();
|
||||||
@@ -1603,10 +1600,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// The result should contain the original error.
|
// The result should contain the original error.
|
||||||
let result = test.poll_and_recv_result(result_rx_2).await;
|
let result = test.poll_and_recv_result(result_rx_2).await;
|
||||||
assert_matches!(
|
assert_matches!(result, Err(ValidationError::Preparation(_)));
|
||||||
result,
|
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::PrepareError(_)))
|
|
||||||
);
|
|
||||||
|
|
||||||
// Pause for enough time to reset the cooldown for this failed prepare request.
|
// Pause for enough time to reset the cooldown for this failed prepare request.
|
||||||
futures_timer::Delay::new(PREPARE_FAILURE_COOLDOWN).await;
|
futures_timer::Delay::new(PREPARE_FAILURE_COOLDOWN).await;
|
||||||
@@ -1628,10 +1622,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// The result should still contain the original error.
|
// The result should still contain the original error.
|
||||||
let result = test.poll_and_recv_result(result_rx_3).await;
|
let result = test.poll_and_recv_result(result_rx_3).await;
|
||||||
assert_matches!(
|
assert_matches!(result, Err(ValidationError::Preparation(_)));
|
||||||
result,
|
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::PrepareError(_)))
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that multiple heads-up requests trigger preparation retries if the first one failed.
|
// Test that multiple heads-up requests trigger preparation retries if the first one failed.
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ mod worker_intf;
|
|||||||
#[cfg(feature = "test-utils")]
|
#[cfg(feature = "test-utils")]
|
||||||
pub mod testing;
|
pub mod testing;
|
||||||
|
|
||||||
pub use error::{InvalidCandidate, ValidationError};
|
pub use error::{InvalidCandidate, PossiblyInvalidError, ValidationError};
|
||||||
pub use host::{start, Config, ValidationHost, EXECUTE_BINARY_NAME, PREPARE_BINARY_NAME};
|
pub use host::{start, Config, ValidationHost, EXECUTE_BINARY_NAME, PREPARE_BINARY_NAME};
|
||||||
pub use metrics::Metrics;
|
pub use metrics::Metrics;
|
||||||
pub use priority::Priority;
|
pub use priority::Priority;
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ async fn execute_job_terminates_on_timeout() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)) => {},
|
Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) => {},
|
||||||
r => panic!("{:?}", r),
|
r => panic!("{:?}", r),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,8 +202,8 @@ async fn ensure_parallel_execution() {
|
|||||||
assert_matches!(
|
assert_matches!(
|
||||||
(res1, res2),
|
(res1, res2),
|
||||||
(
|
(
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)),
|
Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)),
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout))
|
Err(ValidationError::Invalid(InvalidCandidate::HardTimeout))
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -344,7 +344,7 @@ async fn deleting_prepared_artifact_does_not_dispute() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)) => {},
|
Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) => {},
|
||||||
r => panic!("{:?}", r),
|
r => panic!("{:?}", r),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,9 @@
|
|||||||
|
|
||||||
use super::TestHost;
|
use super::TestHost;
|
||||||
use assert_matches::assert_matches;
|
use assert_matches::assert_matches;
|
||||||
use polkadot_node_core_pvf::{InvalidCandidate, PrepareError, ValidationError};
|
use polkadot_node_core_pvf::{
|
||||||
|
InvalidCandidate, PossiblyInvalidError, PrepareError, ValidationError,
|
||||||
|
};
|
||||||
use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams};
|
use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams};
|
||||||
use procfs::process;
|
use procfs::process;
|
||||||
use rusty_fork::rusty_fork_test;
|
use rusty_fork::rusty_fork_test;
|
||||||
@@ -151,7 +153,7 @@ rusty_fork_test! {
|
|||||||
|
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result,
|
result,
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout))
|
Err(ValidationError::Invalid(InvalidCandidate::HardTimeout))
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -217,7 +219,7 @@ rusty_fork_test! {
|
|||||||
|
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result,
|
result,
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath))
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -288,7 +290,7 @@ rusty_fork_test! {
|
|||||||
// Note that we get a more specific error if the job died than if the whole worker died.
|
// Note that we get a more specific error if the job died than if the whole worker died.
|
||||||
assert_matches!(
|
assert_matches!(
|
||||||
result,
|
result,
|
||||||
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousJobDeath(err)))
|
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err)))
|
||||||
if err == "received signal: SIGKILL"
|
if err == "received signal: SIGKILL"
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user