remove retry from backers on failed candidate validation (#2182)

Hey guys, as discussed I've changed the name to a more general one
`PvfExecKind`, is this good or too general?
Creating this as a draft, I still have to fix the tests.

Closes #1585

Kusama address: FkB6QEo8VnV3oifugNj5NeVG3Mvq1zFbrUu4P5YwRoe5mQN

---------

Co-authored-by: command-bot <>
Co-authored-by: Marcin S <marcin@realemail.net>
This commit is contained in:
jserrat
2023-11-20 11:00:19 +00:00
committed by GitHub
parent b35300c377
commit ede4a36262
18 changed files with 276 additions and 248 deletions
@@ -54,7 +54,7 @@ use polkadot_node_subsystem_util::{
};
use polkadot_primitives::{
ApprovalVote, BlockNumber, CandidateHash, CandidateIndex, CandidateReceipt, DisputeStatement,
ExecutorParams, GroupIndex, Hash, PvfExecTimeoutKind, SessionIndex, SessionInfo,
ExecutorParams, GroupIndex, Hash, PvfExecKind, SessionIndex, SessionInfo,
ValidDisputeStatementKind, ValidatorId, ValidatorIndex, ValidatorPair, ValidatorSignature,
};
use sc_keystore::LocalKeystore;
@@ -2867,7 +2867,7 @@ async fn launch_approval<Context>(
candidate_receipt: candidate.clone(),
pov: available_data.pov,
executor_params,
exec_timeout_kind: PvfExecTimeoutKind::Approval,
exec_kind: PvfExecKind::Approval,
response_sender: val_tx,
})
.await;
@@ -2705,10 +2705,10 @@ async fn handle_double_assignment_import(
assert_matches!(
overseer_recv(virtual_overseer).await,
AllMessages::CandidateValidation(CandidateValidationMessage::ValidateFromExhaustive {
exec_timeout_kind,
exec_kind,
response_sender,
..
}) if exec_timeout_kind == PvfExecTimeoutKind::Approval => {
}) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Valid(Default::default(), Default::default())))
.unwrap();
}
+2 -2
View File
@@ -106,7 +106,7 @@ use polkadot_node_subsystem_util::{
use polkadot_primitives::{
BackedCandidate, CandidateCommitments, CandidateHash, CandidateReceipt,
CommittedCandidateReceipt, CoreIndex, CoreState, ExecutorParams, Hash, Id as ParaId,
PersistedValidationData, PvfExecTimeoutKind, SigningContext, ValidationCode, ValidatorId,
PersistedValidationData, PvfExecKind, SigningContext, ValidationCode, ValidatorId,
ValidatorIndex, ValidatorSignature, ValidityAttestation,
};
use sp_keystore::KeystorePtr;
@@ -566,7 +566,7 @@ async fn request_candidate_validation(
candidate_receipt,
pov,
executor_params,
exec_timeout_kind: PvfExecTimeoutKind::Backing,
exec_kind: PvfExecKind::Backing,
response_sender: tx,
})
.await;
+19 -19
View File
@@ -33,7 +33,7 @@ use polkadot_node_subsystem::{
};
use polkadot_node_subsystem_test_helpers as test_helpers;
use polkadot_primitives::{
CandidateDescriptor, GroupRotationInfo, HeadData, PersistedValidationData, PvfExecTimeoutKind,
CandidateDescriptor, GroupRotationInfo, HeadData, PersistedValidationData, PvfExecKind,
ScheduledCore, SessionIndex, LEGACY_MIN_BACKING_VOTES,
};
use sp_application_crypto::AppCrypto;
@@ -344,14 +344,14 @@ async fn assert_validate_from_exhaustive(
validation_data,
validation_code,
candidate_receipt,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == *assert_pvd &&
validation_code == *assert_validation_code &&
*pov == *assert_pov && &candidate_receipt.descriptor == assert_candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate_receipt.commitments_hash == assert_candidate.commitments.hash() =>
{
response_sender.send(Ok(ValidationResult::Valid(
@@ -550,14 +550,14 @@ fn backing_works() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_ab &&
validation_code == validation_code_ab &&
*pov == pov_ab && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate_receipt.commitments_hash == candidate_a_commitments_hash =>
{
response_sender.send(Ok(
@@ -729,14 +729,14 @@ fn backing_works_while_validation_ongoing() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_abc &&
validation_code == validation_code_abc &&
*pov == pov_abc && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate_a_commitments_hash == candidate_receipt.commitments_hash =>
{
// we never validate the candidate. our local node
@@ -890,14 +890,14 @@ fn backing_misbehavior_works() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_a &&
validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate_a_commitments_hash == candidate_receipt.commitments_hash =>
{
response_sender.send(Ok(
@@ -1057,14 +1057,14 @@ fn backing_dont_second_invalid() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_a &&
validation_code == validation_code_a &&
*pov == pov_block_a && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate_a.commitments.hash() == candidate_receipt.commitments_hash =>
{
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn))).unwrap();
@@ -1097,14 +1097,14 @@ fn backing_dont_second_invalid() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_b &&
validation_code == validation_code_b &&
*pov == pov_block_b && &candidate_receipt.descriptor == candidate_b.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate_b.commitments.hash() == candidate_receipt.commitments_hash =>
{
response_sender.send(Ok(
@@ -1224,14 +1224,14 @@ fn backing_second_after_first_fails_works() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_a &&
validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash =>
{
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn))).unwrap();
@@ -1368,14 +1368,14 @@ fn backing_works_after_failed_validation() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
) if validation_data == pvd_a &&
validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash =>
{
response_sender.send(Err(ValidationFailed("Internal test error".into()))).unwrap();
@@ -1634,13 +1634,13 @@ fn retry_works() {
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
..
},
) if validation_data == pvd_a &&
validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash
);
virtual_overseer
@@ -232,14 +232,14 @@ async fn assert_validate_seconded_candidate(
validation_code,
candidate_receipt,
pov,
exec_timeout_kind,
exec_kind,
response_sender,
..
}) if &validation_data == assert_pvd &&
&validation_code == assert_validation_code &&
&*pov == assert_pov &&
&candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing &&
exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash =>
{
response_sender.send(Ok(ValidationResult::Valid(
@@ -49,8 +49,8 @@ use polkadot_primitives::{
DEFAULT_LENIENT_PREPARATION_TIMEOUT, DEFAULT_PRECHECK_PREPARATION_TIMEOUT,
},
CandidateCommitments, CandidateDescriptor, CandidateReceipt, ExecutorParams, Hash,
OccupiedCoreAssumption, PersistedValidationData, PvfExecTimeoutKind, PvfPrepTimeoutKind,
ValidationCode, ValidationCodeHash,
OccupiedCoreAssumption, PersistedValidationData, PvfExecKind, PvfPrepKind, ValidationCode,
ValidationCodeHash,
};
use parity_scale_codec::Encode;
@@ -73,12 +73,6 @@ mod tests;
const LOG_TARGET: &'static str = "parachain::candidate-validation";
/// The amount of time to wait before retrying after a retry-able backing validation error. We use a
/// lower value for the backing case, to fit within the lower backing timeout.
#[cfg(not(test))]
const PVF_BACKING_EXECUTION_RETRY_DELAY: Duration = Duration::from_millis(500);
#[cfg(test)]
const PVF_BACKING_EXECUTION_RETRY_DELAY: Duration = Duration::from_millis(200);
/// The amount of time to wait before retrying after a retry-able approval validation error. We use
/// a higher value for the approval case since we have more time, and if we wait longer it is more
/// likely that transient conditions will resolve.
@@ -163,7 +157,7 @@ async fn run<Context>(
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
..
} => {
@@ -180,7 +174,7 @@ async fn run<Context>(
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
&metrics,
)
.await;
@@ -198,7 +192,7 @@ async fn run<Context>(
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
..
} => {
@@ -215,7 +209,7 @@ async fn run<Context>(
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
&metrics,
)
.await;
@@ -357,7 +351,7 @@ where
return PreCheckOutcome::Invalid
};
let timeout = pvf_prep_timeout(&executor_params, PvfPrepTimeoutKind::Precheck);
let timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Precheck);
let pvf = match sp_maybe_compressed_blob::decompress(
&validation_code.0,
@@ -501,7 +495,7 @@ async fn validate_from_chain_state<Sender>(
candidate_receipt: CandidateReceipt,
pov: Arc<PoV>,
executor_params: ExecutorParams,
exec_timeout_kind: PvfExecTimeoutKind,
exec_kind: PvfExecKind,
metrics: &Metrics,
) -> Result<ValidationResult, ValidationFailed>
where
@@ -521,7 +515,7 @@ where
candidate_receipt.clone(),
pov,
executor_params,
exec_timeout_kind,
exec_kind,
metrics,
)
.await;
@@ -557,7 +551,7 @@ async fn validate_candidate_exhaustive(
candidate_receipt: CandidateReceipt,
pov: Arc<PoV>,
executor_params: ExecutorParams,
exec_timeout_kind: PvfExecTimeoutKind,
exec_kind: PvfExecKind,
metrics: &Metrics,
) -> Result<ValidationResult, ValidationFailed> {
let _timer = metrics.time_validate_candidate_exhaustive();
@@ -616,15 +610,32 @@ async fn validate_candidate_exhaustive(
relay_parent_storage_root: persisted_validation_data.relay_parent_storage_root,
};
let result = validation_backend
.validate_candidate_with_retry(
raw_validation_code.to_vec(),
pvf_exec_timeout(&executor_params, exec_timeout_kind),
exec_timeout_kind,
params,
executor_params,
)
.await;
let result = match exec_kind {
// Retry is disabled to reduce the chance of nondeterministic blocks getting backed and
// honest backers getting slashed.
PvfExecKind::Backing => {
let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare);
let exec_timeout = pvf_exec_timeout(&executor_params, exec_kind);
let pvf = PvfPrepData::from_code(
raw_validation_code.to_vec(),
executor_params,
prep_timeout,
PrepareJobKind::Compilation,
);
validation_backend.validate_candidate(pvf, exec_timeout, params.encode()).await
},
PvfExecKind::Approval =>
validation_backend
.validate_candidate_with_retry(
raw_validation_code.to_vec(),
pvf_exec_timeout(&executor_params, exec_kind),
params,
executor_params,
PVF_APPROVAL_EXECUTION_RETRY_DELAY,
)
.await,
};
if let Err(ref error) = result {
gum::info!(target: LOG_TARGET, ?para_id, ?error, "Failed to validate candidate");
@@ -709,8 +720,8 @@ trait ValidationBackend {
encoded_params: Vec<u8>,
) -> Result<WasmValidationResult, ValidationError>;
/// Tries executing a PVF. Will retry once if an error is encountered that may have been
/// transient.
/// Tries executing a PVF for the approval subsystem. Will retry once if an error is encountered
/// that may have been transient.
///
/// NOTE: Should retry only on errors that are a result of execution itself, and not of
/// preparation.
@@ -718,11 +729,11 @@ trait ValidationBackend {
&mut self,
raw_validation_code: Vec<u8>,
exec_timeout: Duration,
exec_timeout_kind: PvfExecTimeoutKind,
params: ValidationParams,
executor_params: ExecutorParams,
retry_delay: Duration,
) -> Result<WasmValidationResult, ValidationError> {
let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepTimeoutKind::Lenient);
let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare);
// Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap.
let pvf = PvfPrepData::from_code(
raw_validation_code,
@@ -740,11 +751,6 @@ trait ValidationBackend {
return validation_result
}
let retry_delay = match exec_timeout_kind {
PvfExecTimeoutKind::Backing => PVF_BACKING_EXECUTION_RETRY_DELAY,
PvfExecTimeoutKind::Approval => PVF_APPROVAL_EXECUTION_RETRY_DELAY,
};
// Allow limited retries for each kind of error.
let mut num_death_retries_left = 1;
let mut num_job_error_retries_left = 1;
@@ -867,22 +873,41 @@ fn perform_basic_checks(
Ok(())
}
fn pvf_prep_timeout(executor_params: &ExecutorParams, kind: PvfPrepTimeoutKind) -> Duration {
/// To determine the amount of timeout time for the pvf execution.
///
/// Precheck
/// The time period after which the preparation worker is considered
/// unresponsive and will be killed.
///
/// Prepare
///The time period after which the preparation worker is considered
/// unresponsive and will be killed.
fn pvf_prep_timeout(executor_params: &ExecutorParams, kind: PvfPrepKind) -> Duration {
if let Some(timeout) = executor_params.pvf_prep_timeout(kind) {
return timeout
}
match kind {
PvfPrepTimeoutKind::Precheck => DEFAULT_PRECHECK_PREPARATION_TIMEOUT,
PvfPrepTimeoutKind::Lenient => DEFAULT_LENIENT_PREPARATION_TIMEOUT,
PvfPrepKind::Precheck => DEFAULT_PRECHECK_PREPARATION_TIMEOUT,
PvfPrepKind::Prepare => DEFAULT_LENIENT_PREPARATION_TIMEOUT,
}
}
fn pvf_exec_timeout(executor_params: &ExecutorParams, kind: PvfExecTimeoutKind) -> Duration {
/// To determine the amount of timeout time for the pvf execution.
///
/// Backing subsystem
/// The amount of time to spend on execution during backing.
///
/// Approval subsystem
/// The amount of time to spend on execution during approval or disputes.
/// This should be much longer than the backing execution timeout to ensure that in the
/// absence of extremely large disparities between hardware, blocks that pass backing are
/// considered executable by approval checkers or dispute participants.
fn pvf_exec_timeout(executor_params: &ExecutorParams, kind: PvfExecKind) -> Duration {
if let Some(timeout) = executor_params.pvf_exec_timeout(kind) {
return timeout
}
match kind {
PvfExecTimeoutKind::Backing => DEFAULT_BACKING_EXECUTION_TIMEOUT,
PvfExecTimeoutKind::Approval => DEFAULT_APPROVAL_EXECUTION_TIMEOUT,
PvfExecKind::Backing => DEFAULT_BACKING_EXECUTION_TIMEOUT,
PvfExecKind::Approval => DEFAULT_APPROVAL_EXECUTION_TIMEOUT,
}
}
@@ -436,7 +436,7 @@ fn candidate_validation_ok_is_ok() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
))
.unwrap();
@@ -488,7 +488,7 @@ fn candidate_validation_bad_return_is_invalid() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
))
.unwrap();
@@ -496,6 +496,33 @@ fn candidate_validation_bad_return_is_invalid() {
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::Timeout));
}
fn perform_basic_checks_on_valid_candidate(
pov: &PoV,
validation_code: &ValidationCode,
validation_data: &PersistedValidationData,
head_data_hash: Hash,
) -> CandidateDescriptor {
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
head_data_hash,
head_data_hash,
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov,
&validation_code.hash(),
);
assert!(check.is_ok());
descriptor
}
// Test that we vote valid if we get `AmbiguousWorkerDeath`, retry, and then succeed.
#[test]
fn candidate_validation_one_ambiguous_error_is_valid() {
@@ -505,24 +532,12 @@ fn candidate_validation_one_ambiguous_error_is_valid() {
let head_data = HeadData(vec![1, 1, 1]);
let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
head_data.hash(),
dummy_hash(),
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
let descriptor = perform_basic_checks_on_valid_candidate(
&pov,
&validation_code.hash(),
&validation_code,
&validation_data,
head_data.hash(),
);
assert!(check.is_ok());
let validation_result = WasmValidationResult {
head_data,
@@ -554,7 +569,7 @@ fn candidate_validation_one_ambiguous_error_is_valid() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Approval,
&Default::default(),
))
.unwrap();
@@ -576,24 +591,12 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
let pov = PoV { block_data: BlockData(vec![1; 32]) };
let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
dummy_hash(),
dummy_hash(),
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
let descriptor = perform_basic_checks_on_valid_candidate(
&pov,
&validation_code.hash(),
&validation_code,
&validation_data,
dummy_hash(),
);
assert!(check.is_ok());
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
@@ -607,7 +610,7 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Approval,
&Default::default(),
))
.unwrap();
@@ -615,58 +618,79 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::ExecutionError(_)));
}
// Test that we retry on internal errors.
// Test that we retry for approval on internal errors.
#[test]
fn candidate_validation_retry_internal_errors() {
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() };
let pov = PoV { block_data: BlockData(vec![1; 32]) };
let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
dummy_hash(),
dummy_hash(),
Sr25519Keyring::Alice,
let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Approval,
vec![
Err(InternalValidationError::HostCommunication("foo".into()).into()),
// Throw an AJD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousJobDeath(
"baz".into(),
))),
// Throw another internal error.
Err(InternalValidationError::HostCommunication("bar".into()).into()),
],
);
assert_matches!(v, Err(ValidationFailed(s)) if s.contains("bar"));
}
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov,
&validation_code.hash(),
);
assert!(check.is_ok());
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
let v = executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
// Test that we don't retry for backing on internal errors.
#[test]
fn candidate_validation_dont_retry_internal_errors() {
let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Backing,
vec![
Err(InternalValidationError::HostCommunication("foo".into()).into()),
// Throw an AWD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
// Throw another internal error.
Err(InternalValidationError::HostCommunication("bar".into()).into()),
]),
validation_data,
validation_code,
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
&Default::default(),
));
],
);
assert_matches!(v, Err(ValidationFailed(s)) if s.contains("bar"));
assert_matches!(v, Err(ValidationFailed(s)) if s.contains("foo"));
}
// Test that we retry on panic errors.
// Test that we retry for approval on panic errors.
#[test]
fn candidate_validation_retry_panic_errors() {
let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Approval,
vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
// Throw an AWD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
// Throw another panic error.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
],
);
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(s))) if s == "bar".to_string());
}
// Test that we don't retry for backing on panic errors.
#[test]
fn candidate_validation_dont_retry_panic_errors() {
let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Backing,
vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
// Throw an AWD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
// Throw another panic error.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
],
);
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(s))) if s == "foo".to_string());
}
fn candidate_validation_retry_on_error_helper(
exec_kind: PvfExecKind,
mock_errors: Vec<Result<WasmValidationResult, ValidationError>>,
) -> Result<ValidationResult, ValidationFailed> {
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() };
let pov = PoV { block_data: BlockData(vec![1; 32]) };
@@ -693,26 +717,16 @@ fn candidate_validation_retry_panic_errors() {
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
let v = executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
// Throw an AJD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousJobDeath(
"baz".into(),
))),
// Throw another panic error.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
]),
return executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(mock_errors),
validation_data,
validation_code,
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
exec_kind,
&Default::default(),
));
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(s))) if s == "bar".to_string());
}
#[test]
@@ -752,7 +766,7 @@ fn candidate_validation_timeout_is_internal_error() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
));
@@ -797,7 +811,7 @@ fn candidate_validation_commitment_hash_mismatch_is_invalid() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
))
.unwrap();
@@ -846,7 +860,7 @@ fn candidate_validation_code_mismatch_is_invalid() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
))
.unwrap();
@@ -903,7 +917,7 @@ fn compressed_code_works() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
));
@@ -954,7 +968,7 @@ fn code_decompression_failure_is_error() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
));
@@ -1006,7 +1020,7 @@ fn pov_decompression_failure_is_invalid() {
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
PvfExecKind::Backing,
&Default::default(),
));
@@ -32,7 +32,7 @@ use polkadot_node_subsystem::{
};
use polkadot_node_subsystem_util::runtime::get_validation_code_by_hash;
use polkadot_primitives::{
BlockNumber, CandidateHash, CandidateReceipt, Hash, PvfExecTimeoutKind, SessionIndex,
BlockNumber, CandidateHash, CandidateReceipt, Hash, PvfExecKind, SessionIndex,
};
use crate::LOG_TARGET;
@@ -386,7 +386,7 @@ async fn participate(
candidate_receipt: req.candidate_receipt().clone(),
pov: available_data.pov,
executor_params: req.executor_params(),
exec_timeout_kind: PvfExecTimeoutKind::Approval,
exec_kind: PvfExecKind::Approval,
response_sender: validation_tx,
})
.await;
@@ -115,8 +115,8 @@ pub async fn participation_full_happy_path(
assert_matches!(
ctx_handle.recv().await,
AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { candidate_receipt, exec_timeout_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => {
CandidateValidationMessage::ValidateFromExhaustive { candidate_receipt, exec_kind, response_sender, .. }
) if exec_kind == PvfExecKind::Approval => {
if expected_commitments_hash != candidate_receipt.commitments_hash {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))).unwrap();
} else {
@@ -450,8 +450,8 @@ fn cast_invalid_vote_if_validation_fails_or_is_invalid() {
assert_matches!(
ctx_handle.recv().await,
AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { exec_timeout_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => {
CandidateValidationMessage::ValidateFromExhaustive { exec_kind, response_sender, .. }
) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::Timeout))).unwrap();
},
"overseer did not receive candidate validation message",
@@ -487,8 +487,8 @@ fn cast_invalid_vote_if_commitments_dont_match() {
assert_matches!(
ctx_handle.recv().await,
AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { exec_timeout_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => {
CandidateValidationMessage::ValidateFromExhaustive { exec_kind, response_sender, .. }
) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))).unwrap();
},
"overseer did not receive candidate validation message",
@@ -524,8 +524,8 @@ fn cast_valid_vote_if_validation_passes() {
assert_matches!(
ctx_handle.recv().await,
AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { exec_timeout_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => {
CandidateValidationMessage::ValidateFromExhaustive { exec_kind, response_sender, .. }
) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Valid(dummy_candidate_commitments(None), PersistedValidationData::default()))).unwrap();
},
"overseer did not receive candidate validation message",
+18 -18
View File
@@ -30,7 +30,7 @@ use polkadot_node_subsystem::{
use polkadot_primitives::{
CandidateCommitments, CandidateDescriptor, CandidateReceipt, PersistedValidationData,
PvfExecTimeoutKind,
PvfExecKind,
};
use futures::channel::oneshot;
@@ -90,10 +90,10 @@ impl FakeCandidateValidation {
}
}
fn should_misbehave(&self, timeout: PvfExecTimeoutKind) -> bool {
fn should_misbehave(&self, timeout: PvfExecKind) -> bool {
match timeout {
PvfExecTimeoutKind::Backing => self.includes_backing(),
PvfExecTimeoutKind::Approval => self.includes_approval(),
PvfExecKind::Backing => self.includes_backing(),
PvfExecKind::Approval => self.includes_approval(),
}
}
}
@@ -279,13 +279,13 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
} => {
match self.fake_validation {
x if x.misbehaves_valid() && x.should_misbehave(exec_timeout_kind) => {
x if x.misbehaves_valid() && x.should_misbehave(exec_kind) => {
// Behave normally if the `PoV` is not known to be malicious.
if pov.block_data.0.as_slice() != MALICIOUS_POV {
return Some(FromOrchestra::Communication {
@@ -295,7 +295,7 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
})
@@ -333,14 +333,14 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
})
},
}
},
x if x.misbehaves_invalid() && x.should_misbehave(exec_timeout_kind) => {
x if x.misbehaves_invalid() && x.should_misbehave(exec_kind) => {
// Set the validation result to invalid with probability `p` and trigger a
// dispute
let behave_maliciously = self.distribution.sample(&mut rand::thread_rng());
@@ -373,7 +373,7 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
})
@@ -388,7 +388,7 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
}),
@@ -401,13 +401,13 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
..
},
} => {
match self.fake_validation {
x if x.misbehaves_valid() && x.should_misbehave(exec_timeout_kind) => {
x if x.misbehaves_valid() && x.should_misbehave(exec_kind) => {
// Behave normally if the `PoV` is not known to be malicious.
if pov.block_data.0.as_slice() != MALICIOUS_POV {
return Some(FromOrchestra::Communication {
@@ -415,7 +415,7 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
})
@@ -445,13 +445,13 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
}),
}
},
x if x.misbehaves_invalid() && x.should_misbehave(exec_timeout_kind) => {
x if x.misbehaves_invalid() && x.should_misbehave(exec_kind) => {
// Maliciously set the validation result to invalid for a valid candidate
// with probability `p`
let behave_maliciously = self.distribution.sample(&mut rand::thread_rng());
@@ -479,7 +479,7 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
})
@@ -491,7 +491,7 @@ where
candidate_receipt,
pov,
executor_params,
exec_timeout_kind,
exec_kind,
response_sender,
},
}),
@@ -32,7 +32,7 @@ use polkadot_overseer::{
gen::{FromOrchestra, SpawnedSubsystem},
HeadSupportsParachains, SubsystemError,
};
use polkadot_primitives::{CandidateReceipt, Hash, PvfExecTimeoutKind};
use polkadot_primitives::{CandidateReceipt, Hash, PvfExecKind};
struct AlwaysSupportsParachains;
@@ -77,7 +77,7 @@ impl Subsystem1 {
candidate_receipt,
pov: PoV { block_data: BlockData(Vec::new()) }.into(),
executor_params: Default::default(),
exec_timeout_kind: PvfExecTimeoutKind::Backing,
exec_kind: PvfExecKind::Backing,
response_sender: tx,
};
ctx.send_message(msg).await;
+3 -3
View File
@@ -30,7 +30,7 @@ use polkadot_node_subsystem_types::messages::{
};
use polkadot_primitives::{
CandidateHash, CandidateReceipt, CollatorPair, Id as ParaId, InvalidDisputeStatementKind,
PvfExecTimeoutKind, SessionIndex, ValidDisputeStatementKind, ValidatorIndex,
PvfExecKind, SessionIndex, ValidDisputeStatementKind, ValidatorIndex,
};
use crate::{
@@ -106,7 +106,7 @@ where
candidate_receipt,
pov: PoV { block_data: BlockData(Vec::new()) }.into(),
executor_params: Default::default(),
exec_timeout_kind: PvfExecTimeoutKind::Backing,
exec_kind: PvfExecKind::Backing,
response_sender: tx,
})
.await;
@@ -804,7 +804,7 @@ fn test_candidate_validation_msg() -> CandidateValidationMessage {
candidate_receipt,
pov,
executor_params: Default::default(),
exec_timeout_kind: PvfExecTimeoutKind::Backing,
exec_kind: PvfExecKind::Backing,
response_sender,
}
}
@@ -47,7 +47,7 @@ use polkadot_primitives::{
CommittedCandidateReceipt, CoreState, DisputeState, ExecutorParams, GroupIndex,
GroupRotationInfo, Hash, Header as BlockHeader, Id as ParaId, InboundDownwardMessage,
InboundHrmpMessage, MultiDisputeStatementSet, OccupiedCoreAssumption, PersistedValidationData,
PvfCheckStatement, PvfExecTimeoutKind, SessionIndex, SessionInfo, SignedAvailabilityBitfield,
PvfCheckStatement, PvfExecKind, SessionIndex, SessionInfo, SignedAvailabilityBitfield,
SignedAvailabilityBitfields, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex,
ValidatorSignature,
};
@@ -150,8 +150,8 @@ pub enum CandidateValidationMessage {
pov: Arc<PoV>,
/// Session's executor parameters
executor_params: ExecutorParams,
/// Execution timeout kind (backing/approvals)
exec_timeout_kind: PvfExecTimeoutKind,
/// Execution kind, used for timeouts and retries (backing/approvals)
exec_kind: PvfExecKind,
/// The sending side of the response channel
response_sender: oneshot::Sender<Result<ValidationResult, ValidationFailed>>,
},
@@ -175,8 +175,8 @@ pub enum CandidateValidationMessage {
pov: Arc<PoV>,
/// Session's executor parameters
executor_params: ExecutorParams,
/// Execution timeout kind (backing/approvals)
exec_timeout_kind: PvfExecTimeoutKind,
/// Execution kind, used for timeouts and retries (backing/approvals)
exec_kind: PvfExecKind,
/// The sending side of the response channel
response_sender: oneshot::Sender<Result<ValidationResult, ValidationFailed>>,
},