remove retry from backers on failed candidate validation (#2182)

Hey guys, as discussed I've changed the name to a more general one
`PvfExecKind`, is this good or too general?
Creating this as a draft, I still have to fix the tests.

Closes #1585

Kusama address: FkB6QEo8VnV3oifugNj5NeVG3Mvq1zFbrUu4P5YwRoe5mQN

---------

Co-authored-by: command-bot <>
Co-authored-by: Marcin S <marcin@realemail.net>
This commit is contained in:
jserrat
2023-11-20 11:00:19 +00:00
committed by GitHub
parent b35300c377
commit ede4a36262
18 changed files with 276 additions and 248 deletions
@@ -54,7 +54,7 @@ use polkadot_node_subsystem_util::{
}; };
use polkadot_primitives::{ use polkadot_primitives::{
ApprovalVote, BlockNumber, CandidateHash, CandidateIndex, CandidateReceipt, DisputeStatement, ApprovalVote, BlockNumber, CandidateHash, CandidateIndex, CandidateReceipt, DisputeStatement,
ExecutorParams, GroupIndex, Hash, PvfExecTimeoutKind, SessionIndex, SessionInfo, ExecutorParams, GroupIndex, Hash, PvfExecKind, SessionIndex, SessionInfo,
ValidDisputeStatementKind, ValidatorId, ValidatorIndex, ValidatorPair, ValidatorSignature, ValidDisputeStatementKind, ValidatorId, ValidatorIndex, ValidatorPair, ValidatorSignature,
}; };
use sc_keystore::LocalKeystore; use sc_keystore::LocalKeystore;
@@ -2867,7 +2867,7 @@ async fn launch_approval<Context>(
candidate_receipt: candidate.clone(), candidate_receipt: candidate.clone(),
pov: available_data.pov, pov: available_data.pov,
executor_params, executor_params,
exec_timeout_kind: PvfExecTimeoutKind::Approval, exec_kind: PvfExecKind::Approval,
response_sender: val_tx, response_sender: val_tx,
}) })
.await; .await;
@@ -2705,10 +2705,10 @@ async fn handle_double_assignment_import(
assert_matches!( assert_matches!(
overseer_recv(virtual_overseer).await, overseer_recv(virtual_overseer).await,
AllMessages::CandidateValidation(CandidateValidationMessage::ValidateFromExhaustive { AllMessages::CandidateValidation(CandidateValidationMessage::ValidateFromExhaustive {
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}) if exec_timeout_kind == PvfExecTimeoutKind::Approval => { }) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Valid(Default::default(), Default::default()))) response_sender.send(Ok(ValidationResult::Valid(Default::default(), Default::default())))
.unwrap(); .unwrap();
} }
+2 -2
View File
@@ -106,7 +106,7 @@ use polkadot_node_subsystem_util::{
use polkadot_primitives::{ use polkadot_primitives::{
BackedCandidate, CandidateCommitments, CandidateHash, CandidateReceipt, BackedCandidate, CandidateCommitments, CandidateHash, CandidateReceipt,
CommittedCandidateReceipt, CoreIndex, CoreState, ExecutorParams, Hash, Id as ParaId, CommittedCandidateReceipt, CoreIndex, CoreState, ExecutorParams, Hash, Id as ParaId,
PersistedValidationData, PvfExecTimeoutKind, SigningContext, ValidationCode, ValidatorId, PersistedValidationData, PvfExecKind, SigningContext, ValidationCode, ValidatorId,
ValidatorIndex, ValidatorSignature, ValidityAttestation, ValidatorIndex, ValidatorSignature, ValidityAttestation,
}; };
use sp_keystore::KeystorePtr; use sp_keystore::KeystorePtr;
@@ -566,7 +566,7 @@ async fn request_candidate_validation(
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind: PvfExecTimeoutKind::Backing, exec_kind: PvfExecKind::Backing,
response_sender: tx, response_sender: tx,
}) })
.await; .await;
+19 -19
View File
@@ -33,7 +33,7 @@ use polkadot_node_subsystem::{
}; };
use polkadot_node_subsystem_test_helpers as test_helpers; use polkadot_node_subsystem_test_helpers as test_helpers;
use polkadot_primitives::{ use polkadot_primitives::{
CandidateDescriptor, GroupRotationInfo, HeadData, PersistedValidationData, PvfExecTimeoutKind, CandidateDescriptor, GroupRotationInfo, HeadData, PersistedValidationData, PvfExecKind,
ScheduledCore, SessionIndex, LEGACY_MIN_BACKING_VOTES, ScheduledCore, SessionIndex, LEGACY_MIN_BACKING_VOTES,
}; };
use sp_application_crypto::AppCrypto; use sp_application_crypto::AppCrypto;
@@ -344,14 +344,14 @@ async fn assert_validate_from_exhaustive(
validation_data, validation_data,
validation_code, validation_code,
candidate_receipt, candidate_receipt,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == *assert_pvd && ) if validation_data == *assert_pvd &&
validation_code == *assert_validation_code && validation_code == *assert_validation_code &&
*pov == *assert_pov && &candidate_receipt.descriptor == assert_candidate.descriptor() && *pov == *assert_pov && &candidate_receipt.descriptor == assert_candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate_receipt.commitments_hash == assert_candidate.commitments.hash() => candidate_receipt.commitments_hash == assert_candidate.commitments.hash() =>
{ {
response_sender.send(Ok(ValidationResult::Valid( response_sender.send(Ok(ValidationResult::Valid(
@@ -550,14 +550,14 @@ fn backing_works() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_ab && ) if validation_data == pvd_ab &&
validation_code == validation_code_ab && validation_code == validation_code_ab &&
*pov == pov_ab && &candidate_receipt.descriptor == candidate_a.descriptor() && *pov == pov_ab && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate_receipt.commitments_hash == candidate_a_commitments_hash => candidate_receipt.commitments_hash == candidate_a_commitments_hash =>
{ {
response_sender.send(Ok( response_sender.send(Ok(
@@ -729,14 +729,14 @@ fn backing_works_while_validation_ongoing() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_abc && ) if validation_data == pvd_abc &&
validation_code == validation_code_abc && validation_code == validation_code_abc &&
*pov == pov_abc && &candidate_receipt.descriptor == candidate_a.descriptor() && *pov == pov_abc && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate_a_commitments_hash == candidate_receipt.commitments_hash => candidate_a_commitments_hash == candidate_receipt.commitments_hash =>
{ {
// we never validate the candidate. our local node // we never validate the candidate. our local node
@@ -890,14 +890,14 @@ fn backing_misbehavior_works() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_a && ) if validation_data == pvd_a &&
validation_code == validation_code_a && validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate_a.descriptor() && *pov == pov_a && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate_a_commitments_hash == candidate_receipt.commitments_hash => candidate_a_commitments_hash == candidate_receipt.commitments_hash =>
{ {
response_sender.send(Ok( response_sender.send(Ok(
@@ -1057,14 +1057,14 @@ fn backing_dont_second_invalid() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_a && ) if validation_data == pvd_a &&
validation_code == validation_code_a && validation_code == validation_code_a &&
*pov == pov_block_a && &candidate_receipt.descriptor == candidate_a.descriptor() && *pov == pov_block_a && &candidate_receipt.descriptor == candidate_a.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate_a.commitments.hash() == candidate_receipt.commitments_hash => candidate_a.commitments.hash() == candidate_receipt.commitments_hash =>
{ {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn))).unwrap(); response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn))).unwrap();
@@ -1097,14 +1097,14 @@ fn backing_dont_second_invalid() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_b && ) if validation_data == pvd_b &&
validation_code == validation_code_b && validation_code == validation_code_b &&
*pov == pov_block_b && &candidate_receipt.descriptor == candidate_b.descriptor() && *pov == pov_block_b && &candidate_receipt.descriptor == candidate_b.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate_b.commitments.hash() == candidate_receipt.commitments_hash => candidate_b.commitments.hash() == candidate_receipt.commitments_hash =>
{ {
response_sender.send(Ok( response_sender.send(Ok(
@@ -1224,14 +1224,14 @@ fn backing_second_after_first_fails_works() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_a && ) if validation_data == pvd_a &&
validation_code == validation_code_a && validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() && *pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash => candidate.commitments.hash() == candidate_receipt.commitments_hash =>
{ {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn))).unwrap(); response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::BadReturn))).unwrap();
@@ -1368,14 +1368,14 @@ fn backing_works_after_failed_validation() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
) if validation_data == pvd_a && ) if validation_data == pvd_a &&
validation_code == validation_code_a && validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() && *pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash => candidate.commitments.hash() == candidate_receipt.commitments_hash =>
{ {
response_sender.send(Err(ValidationFailed("Internal test error".into()))).unwrap(); response_sender.send(Err(ValidationFailed("Internal test error".into()))).unwrap();
@@ -1634,13 +1634,13 @@ fn retry_works() {
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
.. ..
}, },
) if validation_data == pvd_a && ) if validation_data == pvd_a &&
validation_code == validation_code_a && validation_code == validation_code_a &&
*pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() && *pov == pov_a && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash candidate.commitments.hash() == candidate_receipt.commitments_hash
); );
virtual_overseer virtual_overseer
@@ -232,14 +232,14 @@ async fn assert_validate_seconded_candidate(
validation_code, validation_code,
candidate_receipt, candidate_receipt,
pov, pov,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}) if &validation_data == assert_pvd && }) if &validation_data == assert_pvd &&
&validation_code == assert_validation_code && &validation_code == assert_validation_code &&
&*pov == assert_pov && &*pov == assert_pov &&
&candidate_receipt.descriptor == candidate.descriptor() && &candidate_receipt.descriptor == candidate.descriptor() &&
exec_timeout_kind == PvfExecTimeoutKind::Backing && exec_kind == PvfExecKind::Backing &&
candidate.commitments.hash() == candidate_receipt.commitments_hash => candidate.commitments.hash() == candidate_receipt.commitments_hash =>
{ {
response_sender.send(Ok(ValidationResult::Valid( response_sender.send(Ok(ValidationResult::Valid(
@@ -49,8 +49,8 @@ use polkadot_primitives::{
DEFAULT_LENIENT_PREPARATION_TIMEOUT, DEFAULT_PRECHECK_PREPARATION_TIMEOUT, DEFAULT_LENIENT_PREPARATION_TIMEOUT, DEFAULT_PRECHECK_PREPARATION_TIMEOUT,
}, },
CandidateCommitments, CandidateDescriptor, CandidateReceipt, ExecutorParams, Hash, CandidateCommitments, CandidateDescriptor, CandidateReceipt, ExecutorParams, Hash,
OccupiedCoreAssumption, PersistedValidationData, PvfExecTimeoutKind, PvfPrepTimeoutKind, OccupiedCoreAssumption, PersistedValidationData, PvfExecKind, PvfPrepKind, ValidationCode,
ValidationCode, ValidationCodeHash, ValidationCodeHash,
}; };
use parity_scale_codec::Encode; use parity_scale_codec::Encode;
@@ -73,12 +73,6 @@ mod tests;
const LOG_TARGET: &'static str = "parachain::candidate-validation"; const LOG_TARGET: &'static str = "parachain::candidate-validation";
/// The amount of time to wait before retrying after a retry-able backing validation error. We use a
/// lower value for the backing case, to fit within the lower backing timeout.
#[cfg(not(test))]
const PVF_BACKING_EXECUTION_RETRY_DELAY: Duration = Duration::from_millis(500);
#[cfg(test)]
const PVF_BACKING_EXECUTION_RETRY_DELAY: Duration = Duration::from_millis(200);
/// The amount of time to wait before retrying after a retry-able approval validation error. We use /// The amount of time to wait before retrying after a retry-able approval validation error. We use
/// a higher value for the approval case since we have more time, and if we wait longer it is more /// a higher value for the approval case since we have more time, and if we wait longer it is more
/// likely that transient conditions will resolve. /// likely that transient conditions will resolve.
@@ -163,7 +157,7 @@ async fn run<Context>(
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
} => { } => {
@@ -180,7 +174,7 @@ async fn run<Context>(
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
&metrics, &metrics,
) )
.await; .await;
@@ -198,7 +192,7 @@ async fn run<Context>(
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
} => { } => {
@@ -215,7 +209,7 @@ async fn run<Context>(
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
&metrics, &metrics,
) )
.await; .await;
@@ -357,7 +351,7 @@ where
return PreCheckOutcome::Invalid return PreCheckOutcome::Invalid
}; };
let timeout = pvf_prep_timeout(&executor_params, PvfPrepTimeoutKind::Precheck); let timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Precheck);
let pvf = match sp_maybe_compressed_blob::decompress( let pvf = match sp_maybe_compressed_blob::decompress(
&validation_code.0, &validation_code.0,
@@ -501,7 +495,7 @@ async fn validate_from_chain_state<Sender>(
candidate_receipt: CandidateReceipt, candidate_receipt: CandidateReceipt,
pov: Arc<PoV>, pov: Arc<PoV>,
executor_params: ExecutorParams, executor_params: ExecutorParams,
exec_timeout_kind: PvfExecTimeoutKind, exec_kind: PvfExecKind,
metrics: &Metrics, metrics: &Metrics,
) -> Result<ValidationResult, ValidationFailed> ) -> Result<ValidationResult, ValidationFailed>
where where
@@ -521,7 +515,7 @@ where
candidate_receipt.clone(), candidate_receipt.clone(),
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
metrics, metrics,
) )
.await; .await;
@@ -557,7 +551,7 @@ async fn validate_candidate_exhaustive(
candidate_receipt: CandidateReceipt, candidate_receipt: CandidateReceipt,
pov: Arc<PoV>, pov: Arc<PoV>,
executor_params: ExecutorParams, executor_params: ExecutorParams,
exec_timeout_kind: PvfExecTimeoutKind, exec_kind: PvfExecKind,
metrics: &Metrics, metrics: &Metrics,
) -> Result<ValidationResult, ValidationFailed> { ) -> Result<ValidationResult, ValidationFailed> {
let _timer = metrics.time_validate_candidate_exhaustive(); let _timer = metrics.time_validate_candidate_exhaustive();
@@ -616,15 +610,32 @@ async fn validate_candidate_exhaustive(
relay_parent_storage_root: persisted_validation_data.relay_parent_storage_root, relay_parent_storage_root: persisted_validation_data.relay_parent_storage_root,
}; };
let result = validation_backend let result = match exec_kind {
// Retry is disabled to reduce the chance of nondeterministic blocks getting backed and
// honest backers getting slashed.
PvfExecKind::Backing => {
let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare);
let exec_timeout = pvf_exec_timeout(&executor_params, exec_kind);
let pvf = PvfPrepData::from_code(
raw_validation_code.to_vec(),
executor_params,
prep_timeout,
PrepareJobKind::Compilation,
);
validation_backend.validate_candidate(pvf, exec_timeout, params.encode()).await
},
PvfExecKind::Approval =>
validation_backend
.validate_candidate_with_retry( .validate_candidate_with_retry(
raw_validation_code.to_vec(), raw_validation_code.to_vec(),
pvf_exec_timeout(&executor_params, exec_timeout_kind), pvf_exec_timeout(&executor_params, exec_kind),
exec_timeout_kind,
params, params,
executor_params, executor_params,
PVF_APPROVAL_EXECUTION_RETRY_DELAY,
) )
.await; .await,
};
if let Err(ref error) = result { if let Err(ref error) = result {
gum::info!(target: LOG_TARGET, ?para_id, ?error, "Failed to validate candidate"); gum::info!(target: LOG_TARGET, ?para_id, ?error, "Failed to validate candidate");
@@ -709,8 +720,8 @@ trait ValidationBackend {
encoded_params: Vec<u8>, encoded_params: Vec<u8>,
) -> Result<WasmValidationResult, ValidationError>; ) -> Result<WasmValidationResult, ValidationError>;
/// Tries executing a PVF. Will retry once if an error is encountered that may have been /// Tries executing a PVF for the approval subsystem. Will retry once if an error is encountered
/// transient. /// that may have been transient.
/// ///
/// NOTE: Should retry only on errors that are a result of execution itself, and not of /// NOTE: Should retry only on errors that are a result of execution itself, and not of
/// preparation. /// preparation.
@@ -718,11 +729,11 @@ trait ValidationBackend {
&mut self, &mut self,
raw_validation_code: Vec<u8>, raw_validation_code: Vec<u8>,
exec_timeout: Duration, exec_timeout: Duration,
exec_timeout_kind: PvfExecTimeoutKind,
params: ValidationParams, params: ValidationParams,
executor_params: ExecutorParams, executor_params: ExecutorParams,
retry_delay: Duration,
) -> Result<WasmValidationResult, ValidationError> { ) -> Result<WasmValidationResult, ValidationError> {
let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepTimeoutKind::Lenient); let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare);
// Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap. // Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap.
let pvf = PvfPrepData::from_code( let pvf = PvfPrepData::from_code(
raw_validation_code, raw_validation_code,
@@ -740,11 +751,6 @@ trait ValidationBackend {
return validation_result return validation_result
} }
let retry_delay = match exec_timeout_kind {
PvfExecTimeoutKind::Backing => PVF_BACKING_EXECUTION_RETRY_DELAY,
PvfExecTimeoutKind::Approval => PVF_APPROVAL_EXECUTION_RETRY_DELAY,
};
// Allow limited retries for each kind of error. // Allow limited retries for each kind of error.
let mut num_death_retries_left = 1; let mut num_death_retries_left = 1;
let mut num_job_error_retries_left = 1; let mut num_job_error_retries_left = 1;
@@ -867,22 +873,41 @@ fn perform_basic_checks(
Ok(()) Ok(())
} }
fn pvf_prep_timeout(executor_params: &ExecutorParams, kind: PvfPrepTimeoutKind) -> Duration { /// To determine the amount of timeout time for the pvf execution.
///
/// Precheck
/// The time period after which the preparation worker is considered
/// unresponsive and will be killed.
///
/// Prepare
///The time period after which the preparation worker is considered
/// unresponsive and will be killed.
fn pvf_prep_timeout(executor_params: &ExecutorParams, kind: PvfPrepKind) -> Duration {
if let Some(timeout) = executor_params.pvf_prep_timeout(kind) { if let Some(timeout) = executor_params.pvf_prep_timeout(kind) {
return timeout return timeout
} }
match kind { match kind {
PvfPrepTimeoutKind::Precheck => DEFAULT_PRECHECK_PREPARATION_TIMEOUT, PvfPrepKind::Precheck => DEFAULT_PRECHECK_PREPARATION_TIMEOUT,
PvfPrepTimeoutKind::Lenient => DEFAULT_LENIENT_PREPARATION_TIMEOUT, PvfPrepKind::Prepare => DEFAULT_LENIENT_PREPARATION_TIMEOUT,
} }
} }
fn pvf_exec_timeout(executor_params: &ExecutorParams, kind: PvfExecTimeoutKind) -> Duration { /// To determine the amount of timeout time for the pvf execution.
///
/// Backing subsystem
/// The amount of time to spend on execution during backing.
///
/// Approval subsystem
/// The amount of time to spend on execution during approval or disputes.
/// This should be much longer than the backing execution timeout to ensure that in the
/// absence of extremely large disparities between hardware, blocks that pass backing are
/// considered executable by approval checkers or dispute participants.
fn pvf_exec_timeout(executor_params: &ExecutorParams, kind: PvfExecKind) -> Duration {
if let Some(timeout) = executor_params.pvf_exec_timeout(kind) { if let Some(timeout) = executor_params.pvf_exec_timeout(kind) {
return timeout return timeout
} }
match kind { match kind {
PvfExecTimeoutKind::Backing => DEFAULT_BACKING_EXECUTION_TIMEOUT, PvfExecKind::Backing => DEFAULT_BACKING_EXECUTION_TIMEOUT,
PvfExecTimeoutKind::Approval => DEFAULT_APPROVAL_EXECUTION_TIMEOUT, PvfExecKind::Approval => DEFAULT_APPROVAL_EXECUTION_TIMEOUT,
} }
} }
@@ -436,7 +436,7 @@ fn candidate_validation_ok_is_ok() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)) ))
.unwrap(); .unwrap();
@@ -488,7 +488,7 @@ fn candidate_validation_bad_return_is_invalid() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)) ))
.unwrap(); .unwrap();
@@ -496,6 +496,33 @@ fn candidate_validation_bad_return_is_invalid() {
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::Timeout)); assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::Timeout));
} }
fn perform_basic_checks_on_valid_candidate(
pov: &PoV,
validation_code: &ValidationCode,
validation_data: &PersistedValidationData,
head_data_hash: Hash,
) -> CandidateDescriptor {
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
head_data_hash,
head_data_hash,
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov,
&validation_code.hash(),
);
assert!(check.is_ok());
descriptor
}
// Test that we vote valid if we get `AmbiguousWorkerDeath`, retry, and then succeed. // Test that we vote valid if we get `AmbiguousWorkerDeath`, retry, and then succeed.
#[test] #[test]
fn candidate_validation_one_ambiguous_error_is_valid() { fn candidate_validation_one_ambiguous_error_is_valid() {
@@ -505,24 +532,12 @@ fn candidate_validation_one_ambiguous_error_is_valid() {
let head_data = HeadData(vec![1, 1, 1]); let head_data = HeadData(vec![1, 1, 1]);
let validation_code = ValidationCode(vec![2; 16]); let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor( let descriptor = perform_basic_checks_on_valid_candidate(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
head_data.hash(),
dummy_hash(),
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov, &pov,
&validation_code.hash(), &validation_code,
&validation_data,
head_data.hash(),
); );
assert!(check.is_ok());
let validation_result = WasmValidationResult { let validation_result = WasmValidationResult {
head_data, head_data,
@@ -554,7 +569,7 @@ fn candidate_validation_one_ambiguous_error_is_valid() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Approval,
&Default::default(), &Default::default(),
)) ))
.unwrap(); .unwrap();
@@ -576,24 +591,12 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
let pov = PoV { block_data: BlockData(vec![1; 32]) }; let pov = PoV { block_data: BlockData(vec![1; 32]) };
let validation_code = ValidationCode(vec![2; 16]); let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor( let descriptor = perform_basic_checks_on_valid_candidate(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
dummy_hash(),
dummy_hash(),
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov, &pov,
&validation_code.hash(), &validation_code,
&validation_data,
dummy_hash(),
); );
assert!(check.is_ok());
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() }; let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
@@ -607,7 +610,7 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Approval,
&Default::default(), &Default::default(),
)) ))
.unwrap(); .unwrap();
@@ -615,58 +618,79 @@ fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::ExecutionError(_))); assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::ExecutionError(_)));
} }
// Test that we retry on internal errors. // Test that we retry for approval on internal errors.
#[test] #[test]
fn candidate_validation_retry_internal_errors() { fn candidate_validation_retry_internal_errors() {
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() }; let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Approval,
let pov = PoV { block_data: BlockData(vec![1; 32]) }; vec![
let validation_code = ValidationCode(vec![2; 16]); Err(InternalValidationError::HostCommunication("foo".into()).into()),
// Throw an AJD error, we should still retry again.
let descriptor = make_valid_candidate_descriptor( Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousJobDeath(
ParaId::from(1_u32), "baz".into(),
dummy_hash(), ))),
validation_data.hash(), // Throw another internal error.
pov.hash(), Err(InternalValidationError::HostCommunication("bar".into()).into()),
validation_code.hash(), ],
dummy_hash(),
dummy_hash(),
Sr25519Keyring::Alice,
); );
assert_matches!(v, Err(ValidationFailed(s)) if s.contains("bar"));
}
let check = perform_basic_checks( // Test that we don't retry for backing on internal errors.
&descriptor, #[test]
validation_data.max_pov_size, fn candidate_validation_dont_retry_internal_errors() {
&pov, let v = candidate_validation_retry_on_error_helper(
&validation_code.hash(), PvfExecKind::Backing,
); vec![
assert!(check.is_ok());
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
let v = executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
Err(InternalValidationError::HostCommunication("foo".into()).into()), Err(InternalValidationError::HostCommunication("foo".into()).into()),
// Throw an AWD error, we should still retry again. // Throw an AWD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)), Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
// Throw another internal error. // Throw another internal error.
Err(InternalValidationError::HostCommunication("bar".into()).into()), Err(InternalValidationError::HostCommunication("bar".into()).into()),
]), ],
validation_data, );
validation_code,
candidate_receipt,
Arc::new(pov),
ExecutorParams::default(),
PvfExecTimeoutKind::Backing,
&Default::default(),
));
assert_matches!(v, Err(ValidationFailed(s)) if s.contains("bar")); assert_matches!(v, Err(ValidationFailed(s)) if s.contains("foo"));
} }
// Test that we retry on panic errors. // Test that we retry for approval on panic errors.
#[test] #[test]
fn candidate_validation_retry_panic_errors() { fn candidate_validation_retry_panic_errors() {
let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Approval,
vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
// Throw an AWD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
// Throw another panic error.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
],
);
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(s))) if s == "bar".to_string());
}
// Test that we don't retry for backing on panic errors.
#[test]
fn candidate_validation_dont_retry_panic_errors() {
let v = candidate_validation_retry_on_error_helper(
PvfExecKind::Backing,
vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
// Throw an AWD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
// Throw another panic error.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
],
);
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(s))) if s == "foo".to_string());
}
fn candidate_validation_retry_on_error_helper(
exec_kind: PvfExecKind,
mock_errors: Vec<Result<WasmValidationResult, ValidationError>>,
) -> Result<ValidationResult, ValidationFailed> {
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() }; let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() };
let pov = PoV { block_data: BlockData(vec![1; 32]) }; let pov = PoV { block_data: BlockData(vec![1; 32]) };
@@ -693,26 +717,16 @@ fn candidate_validation_retry_panic_errors() {
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() }; let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
let v = executor::block_on(validate_candidate_exhaustive( return executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(vec![ MockValidateCandidateBackend::with_hardcoded_result_list(mock_errors),
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("foo".into()))),
// Throw an AJD error, we should still retry again.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousJobDeath(
"baz".into(),
))),
// Throw another panic error.
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::JobError("bar".into()))),
]),
validation_data, validation_data,
validation_code, validation_code,
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, exec_kind,
&Default::default(), &Default::default(),
)); ));
assert_matches!(v, Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(s))) if s == "bar".to_string());
} }
#[test] #[test]
@@ -752,7 +766,7 @@ fn candidate_validation_timeout_is_internal_error() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)); ));
@@ -797,7 +811,7 @@ fn candidate_validation_commitment_hash_mismatch_is_invalid() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)) ))
.unwrap(); .unwrap();
@@ -846,7 +860,7 @@ fn candidate_validation_code_mismatch_is_invalid() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)) ))
.unwrap(); .unwrap();
@@ -903,7 +917,7 @@ fn compressed_code_works() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)); ));
@@ -954,7 +968,7 @@ fn code_decompression_failure_is_error() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)); ));
@@ -1006,7 +1020,7 @@ fn pov_decompression_failure_is_invalid() {
candidate_receipt, candidate_receipt,
Arc::new(pov), Arc::new(pov),
ExecutorParams::default(), ExecutorParams::default(),
PvfExecTimeoutKind::Backing, PvfExecKind::Backing,
&Default::default(), &Default::default(),
)); ));
@@ -32,7 +32,7 @@ use polkadot_node_subsystem::{
}; };
use polkadot_node_subsystem_util::runtime::get_validation_code_by_hash; use polkadot_node_subsystem_util::runtime::get_validation_code_by_hash;
use polkadot_primitives::{ use polkadot_primitives::{
BlockNumber, CandidateHash, CandidateReceipt, Hash, PvfExecTimeoutKind, SessionIndex, BlockNumber, CandidateHash, CandidateReceipt, Hash, PvfExecKind, SessionIndex,
}; };
use crate::LOG_TARGET; use crate::LOG_TARGET;
@@ -386,7 +386,7 @@ async fn participate(
candidate_receipt: req.candidate_receipt().clone(), candidate_receipt: req.candidate_receipt().clone(),
pov: available_data.pov, pov: available_data.pov,
executor_params: req.executor_params(), executor_params: req.executor_params(),
exec_timeout_kind: PvfExecTimeoutKind::Approval, exec_kind: PvfExecKind::Approval,
response_sender: validation_tx, response_sender: validation_tx,
}) })
.await; .await;
@@ -115,8 +115,8 @@ pub async fn participation_full_happy_path(
assert_matches!( assert_matches!(
ctx_handle.recv().await, ctx_handle.recv().await,
AllMessages::CandidateValidation( AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { candidate_receipt, exec_timeout_kind, response_sender, .. } CandidateValidationMessage::ValidateFromExhaustive { candidate_receipt, exec_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => { ) if exec_kind == PvfExecKind::Approval => {
if expected_commitments_hash != candidate_receipt.commitments_hash { if expected_commitments_hash != candidate_receipt.commitments_hash {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))).unwrap(); response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))).unwrap();
} else { } else {
@@ -450,8 +450,8 @@ fn cast_invalid_vote_if_validation_fails_or_is_invalid() {
assert_matches!( assert_matches!(
ctx_handle.recv().await, ctx_handle.recv().await,
AllMessages::CandidateValidation( AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { exec_timeout_kind, response_sender, .. } CandidateValidationMessage::ValidateFromExhaustive { exec_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => { ) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::Timeout))).unwrap(); response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::Timeout))).unwrap();
}, },
"overseer did not receive candidate validation message", "overseer did not receive candidate validation message",
@@ -487,8 +487,8 @@ fn cast_invalid_vote_if_commitments_dont_match() {
assert_matches!( assert_matches!(
ctx_handle.recv().await, ctx_handle.recv().await,
AllMessages::CandidateValidation( AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { exec_timeout_kind, response_sender, .. } CandidateValidationMessage::ValidateFromExhaustive { exec_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => { ) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))).unwrap(); response_sender.send(Ok(ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch))).unwrap();
}, },
"overseer did not receive candidate validation message", "overseer did not receive candidate validation message",
@@ -524,8 +524,8 @@ fn cast_valid_vote_if_validation_passes() {
assert_matches!( assert_matches!(
ctx_handle.recv().await, ctx_handle.recv().await,
AllMessages::CandidateValidation( AllMessages::CandidateValidation(
CandidateValidationMessage::ValidateFromExhaustive { exec_timeout_kind, response_sender, .. } CandidateValidationMessage::ValidateFromExhaustive { exec_kind, response_sender, .. }
) if exec_timeout_kind == PvfExecTimeoutKind::Approval => { ) if exec_kind == PvfExecKind::Approval => {
response_sender.send(Ok(ValidationResult::Valid(dummy_candidate_commitments(None), PersistedValidationData::default()))).unwrap(); response_sender.send(Ok(ValidationResult::Valid(dummy_candidate_commitments(None), PersistedValidationData::default()))).unwrap();
}, },
"overseer did not receive candidate validation message", "overseer did not receive candidate validation message",
+18 -18
View File
@@ -30,7 +30,7 @@ use polkadot_node_subsystem::{
use polkadot_primitives::{ use polkadot_primitives::{
CandidateCommitments, CandidateDescriptor, CandidateReceipt, PersistedValidationData, CandidateCommitments, CandidateDescriptor, CandidateReceipt, PersistedValidationData,
PvfExecTimeoutKind, PvfExecKind,
}; };
use futures::channel::oneshot; use futures::channel::oneshot;
@@ -90,10 +90,10 @@ impl FakeCandidateValidation {
} }
} }
fn should_misbehave(&self, timeout: PvfExecTimeoutKind) -> bool { fn should_misbehave(&self, timeout: PvfExecKind) -> bool {
match timeout { match timeout {
PvfExecTimeoutKind::Backing => self.includes_backing(), PvfExecKind::Backing => self.includes_backing(),
PvfExecTimeoutKind::Approval => self.includes_approval(), PvfExecKind::Approval => self.includes_approval(),
} }
} }
} }
@@ -279,13 +279,13 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
} => { } => {
match self.fake_validation { match self.fake_validation {
x if x.misbehaves_valid() && x.should_misbehave(exec_timeout_kind) => { x if x.misbehaves_valid() && x.should_misbehave(exec_kind) => {
// Behave normally if the `PoV` is not known to be malicious. // Behave normally if the `PoV` is not known to be malicious.
if pov.block_data.0.as_slice() != MALICIOUS_POV { if pov.block_data.0.as_slice() != MALICIOUS_POV {
return Some(FromOrchestra::Communication { return Some(FromOrchestra::Communication {
@@ -295,7 +295,7 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}) })
@@ -333,14 +333,14 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}) })
}, },
} }
}, },
x if x.misbehaves_invalid() && x.should_misbehave(exec_timeout_kind) => { x if x.misbehaves_invalid() && x.should_misbehave(exec_kind) => {
// Set the validation result to invalid with probability `p` and trigger a // Set the validation result to invalid with probability `p` and trigger a
// dispute // dispute
let behave_maliciously = self.distribution.sample(&mut rand::thread_rng()); let behave_maliciously = self.distribution.sample(&mut rand::thread_rng());
@@ -373,7 +373,7 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}) })
@@ -388,7 +388,7 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}), }),
@@ -401,13 +401,13 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
.. ..
}, },
} => { } => {
match self.fake_validation { match self.fake_validation {
x if x.misbehaves_valid() && x.should_misbehave(exec_timeout_kind) => { x if x.misbehaves_valid() && x.should_misbehave(exec_kind) => {
// Behave normally if the `PoV` is not known to be malicious. // Behave normally if the `PoV` is not known to be malicious.
if pov.block_data.0.as_slice() != MALICIOUS_POV { if pov.block_data.0.as_slice() != MALICIOUS_POV {
return Some(FromOrchestra::Communication { return Some(FromOrchestra::Communication {
@@ -415,7 +415,7 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}) })
@@ -445,13 +445,13 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}), }),
} }
}, },
x if x.misbehaves_invalid() && x.should_misbehave(exec_timeout_kind) => { x if x.misbehaves_invalid() && x.should_misbehave(exec_kind) => {
// Maliciously set the validation result to invalid for a valid candidate // Maliciously set the validation result to invalid for a valid candidate
// with probability `p` // with probability `p`
let behave_maliciously = self.distribution.sample(&mut rand::thread_rng()); let behave_maliciously = self.distribution.sample(&mut rand::thread_rng());
@@ -479,7 +479,7 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}) })
@@ -491,7 +491,7 @@ where
candidate_receipt, candidate_receipt,
pov, pov,
executor_params, executor_params,
exec_timeout_kind, exec_kind,
response_sender, response_sender,
}, },
}), }),
@@ -32,7 +32,7 @@ use polkadot_overseer::{
gen::{FromOrchestra, SpawnedSubsystem}, gen::{FromOrchestra, SpawnedSubsystem},
HeadSupportsParachains, SubsystemError, HeadSupportsParachains, SubsystemError,
}; };
use polkadot_primitives::{CandidateReceipt, Hash, PvfExecTimeoutKind}; use polkadot_primitives::{CandidateReceipt, Hash, PvfExecKind};
struct AlwaysSupportsParachains; struct AlwaysSupportsParachains;
@@ -77,7 +77,7 @@ impl Subsystem1 {
candidate_receipt, candidate_receipt,
pov: PoV { block_data: BlockData(Vec::new()) }.into(), pov: PoV { block_data: BlockData(Vec::new()) }.into(),
executor_params: Default::default(), executor_params: Default::default(),
exec_timeout_kind: PvfExecTimeoutKind::Backing, exec_kind: PvfExecKind::Backing,
response_sender: tx, response_sender: tx,
}; };
ctx.send_message(msg).await; ctx.send_message(msg).await;
+3 -3
View File
@@ -30,7 +30,7 @@ use polkadot_node_subsystem_types::messages::{
}; };
use polkadot_primitives::{ use polkadot_primitives::{
CandidateHash, CandidateReceipt, CollatorPair, Id as ParaId, InvalidDisputeStatementKind, CandidateHash, CandidateReceipt, CollatorPair, Id as ParaId, InvalidDisputeStatementKind,
PvfExecTimeoutKind, SessionIndex, ValidDisputeStatementKind, ValidatorIndex, PvfExecKind, SessionIndex, ValidDisputeStatementKind, ValidatorIndex,
}; };
use crate::{ use crate::{
@@ -106,7 +106,7 @@ where
candidate_receipt, candidate_receipt,
pov: PoV { block_data: BlockData(Vec::new()) }.into(), pov: PoV { block_data: BlockData(Vec::new()) }.into(),
executor_params: Default::default(), executor_params: Default::default(),
exec_timeout_kind: PvfExecTimeoutKind::Backing, exec_kind: PvfExecKind::Backing,
response_sender: tx, response_sender: tx,
}) })
.await; .await;
@@ -804,7 +804,7 @@ fn test_candidate_validation_msg() -> CandidateValidationMessage {
candidate_receipt, candidate_receipt,
pov, pov,
executor_params: Default::default(), executor_params: Default::default(),
exec_timeout_kind: PvfExecTimeoutKind::Backing, exec_kind: PvfExecKind::Backing,
response_sender, response_sender,
} }
} }
@@ -47,7 +47,7 @@ use polkadot_primitives::{
CommittedCandidateReceipt, CoreState, DisputeState, ExecutorParams, GroupIndex, CommittedCandidateReceipt, CoreState, DisputeState, ExecutorParams, GroupIndex,
GroupRotationInfo, Hash, Header as BlockHeader, Id as ParaId, InboundDownwardMessage, GroupRotationInfo, Hash, Header as BlockHeader, Id as ParaId, InboundDownwardMessage,
InboundHrmpMessage, MultiDisputeStatementSet, OccupiedCoreAssumption, PersistedValidationData, InboundHrmpMessage, MultiDisputeStatementSet, OccupiedCoreAssumption, PersistedValidationData,
PvfCheckStatement, PvfExecTimeoutKind, SessionIndex, SessionInfo, SignedAvailabilityBitfield, PvfCheckStatement, PvfExecKind, SessionIndex, SessionInfo, SignedAvailabilityBitfield,
SignedAvailabilityBitfields, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex, SignedAvailabilityBitfields, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex,
ValidatorSignature, ValidatorSignature,
}; };
@@ -150,8 +150,8 @@ pub enum CandidateValidationMessage {
pov: Arc<PoV>, pov: Arc<PoV>,
/// Session's executor parameters /// Session's executor parameters
executor_params: ExecutorParams, executor_params: ExecutorParams,
/// Execution timeout kind (backing/approvals) /// Execution kind, used for timeouts and retries (backing/approvals)
exec_timeout_kind: PvfExecTimeoutKind, exec_kind: PvfExecKind,
/// The sending side of the response channel /// The sending side of the response channel
response_sender: oneshot::Sender<Result<ValidationResult, ValidationFailed>>, response_sender: oneshot::Sender<Result<ValidationResult, ValidationFailed>>,
}, },
@@ -175,8 +175,8 @@ pub enum CandidateValidationMessage {
pov: Arc<PoV>, pov: Arc<PoV>,
/// Session's executor parameters /// Session's executor parameters
executor_params: ExecutorParams, executor_params: ExecutorParams,
/// Execution timeout kind (backing/approvals) /// Execution kind, used for timeouts and retries (backing/approvals)
exec_timeout_kind: PvfExecTimeoutKind, exec_kind: PvfExecKind,
/// The sending side of the response channel /// The sending side of the response channel
response_sender: oneshot::Sender<Result<ValidationResult, ValidationFailed>>, response_sender: oneshot::Sender<Result<ValidationResult, ValidationFailed>>,
}, },
+5 -5
View File
@@ -48,11 +48,11 @@ pub use v6::{
HorizontalMessages, HrmpChannelId, Id, InboundDownwardMessage, InboundHrmpMessage, IndexedVec, HorizontalMessages, HrmpChannelId, Id, InboundDownwardMessage, InboundHrmpMessage, IndexedVec,
InherentData, InvalidDisputeStatementKind, Moment, MultiDisputeStatementSet, Nonce, InherentData, InvalidDisputeStatementKind, Moment, MultiDisputeStatementSet, Nonce,
OccupiedCore, OccupiedCoreAssumption, OutboundHrmpMessage, ParathreadClaim, ParathreadEntry, OccupiedCore, OccupiedCoreAssumption, OutboundHrmpMessage, ParathreadClaim, ParathreadEntry,
PersistedValidationData, PvfCheckStatement, PvfExecTimeoutKind, PvfPrepTimeoutKind, PersistedValidationData, PvfCheckStatement, PvfExecKind, PvfPrepKind, RuntimeMetricLabel,
RuntimeMetricLabel, RuntimeMetricLabelValue, RuntimeMetricLabelValues, RuntimeMetricLabels, RuntimeMetricLabelValue, RuntimeMetricLabelValues, RuntimeMetricLabels, RuntimeMetricOp,
RuntimeMetricOp, RuntimeMetricUpdate, ScheduledCore, ScrapedOnChainVotes, SessionIndex, RuntimeMetricUpdate, ScheduledCore, ScrapedOnChainVotes, SessionIndex, SessionInfo, Signature,
SessionInfo, Signature, Signed, SignedAvailabilityBitfield, SignedAvailabilityBitfields, Signed, SignedAvailabilityBitfield, SignedAvailabilityBitfields, SignedStatement,
SignedStatement, SigningContext, Slot, UncheckedSigned, UncheckedSignedAvailabilityBitfield, SigningContext, Slot, UncheckedSigned, UncheckedSignedAvailabilityBitfield,
UncheckedSignedAvailabilityBitfields, UncheckedSignedStatement, UpgradeGoAhead, UncheckedSignedAvailabilityBitfields, UncheckedSignedStatement, UpgradeGoAhead,
UpgradeRestriction, UpwardMessage, ValidDisputeStatementKind, ValidationCode, UpgradeRestriction, UpwardMessage, ValidDisputeStatementKind, ValidationCode,
ValidationCodeHash, ValidatorId, ValidatorIndex, ValidatorSignature, ValidityAttestation, ValidationCodeHash, ValidatorId, ValidatorIndex, ValidatorSignature, ValidityAttestation,
+16 -23
View File
@@ -21,7 +21,7 @@
//! by the first element of the vector). Decoding to a usable semantics structure is //! by the first element of the vector). Decoding to a usable semantics structure is
//! done in `polkadot-node-core-pvf`. //! done in `polkadot-node-core-pvf`.
use crate::{BlakeTwo256, HashT as _, PvfExecTimeoutKind, PvfPrepTimeoutKind}; use crate::{BlakeTwo256, HashT as _, PvfExecKind, PvfPrepKind};
use parity_scale_codec::{Decode, Encode}; use parity_scale_codec::{Decode, Encode};
use polkadot_core_primitives::Hash; use polkadot_core_primitives::Hash;
use scale_info::TypeInfo; use scale_info::TypeInfo;
@@ -45,7 +45,7 @@ pub const PRECHECK_MEM_MAX_LO: u64 = 256 * 1024 * 1024;
pub const PRECHECK_MEM_MAX_HI: u64 = 16 * 1024 * 1024 * 1024; pub const PRECHECK_MEM_MAX_HI: u64 = 16 * 1024 * 1024 * 1024;
// Default PVF timeouts. Must never be changed! Use executor environment parameters to adjust them. // Default PVF timeouts. Must never be changed! Use executor environment parameters to adjust them.
// See also `PvfPrepTimeoutKind` and `PvfExecTimeoutKind` docs. // See also `PvfPrepKind` and `PvfExecKind` docs.
/// Default PVF preparation timeout for prechecking requests. /// Default PVF preparation timeout for prechecking requests.
pub const DEFAULT_PRECHECK_PREPARATION_TIMEOUT: Duration = Duration::from_secs(60); pub const DEFAULT_PRECHECK_PREPARATION_TIMEOUT: Duration = Duration::from_secs(60);
@@ -99,12 +99,12 @@ pub enum ExecutorParam {
/// Always ensure that `precheck_timeout` < `lenient_timeout`. /// Always ensure that `precheck_timeout` < `lenient_timeout`.
/// When absent, the default values will be used. /// When absent, the default values will be used.
#[codec(index = 5)] #[codec(index = 5)]
PvfPrepTimeout(PvfPrepTimeoutKind, u64), PvfPrepTimeout(PvfPrepKind, u64),
/// PVF execution timeouts, in millisecond. /// PVF execution timeouts, in millisecond.
/// Always ensure that `backing_timeout` < `approval_timeout`. /// Always ensure that `backing_timeout` < `approval_timeout`.
/// When absent, the default values will be used. /// When absent, the default values will be used.
#[codec(index = 6)] #[codec(index = 6)]
PvfExecTimeout(PvfExecTimeoutKind, u64), PvfExecTimeout(PvfExecKind, u64),
/// Enables WASM bulk memory proposal /// Enables WASM bulk memory proposal
#[codec(index = 7)] #[codec(index = 7)]
WasmExtBulkMemory, WasmExtBulkMemory,
@@ -174,7 +174,7 @@ impl ExecutorParams {
} }
/// Returns a PVF preparation timeout, if any /// Returns a PVF preparation timeout, if any
pub fn pvf_prep_timeout(&self, kind: PvfPrepTimeoutKind) -> Option<Duration> { pub fn pvf_prep_timeout(&self, kind: PvfPrepKind) -> Option<Duration> {
for param in &self.0 { for param in &self.0 {
if let ExecutorParam::PvfPrepTimeout(k, timeout) = param { if let ExecutorParam::PvfPrepTimeout(k, timeout) = param {
if kind == *k { if kind == *k {
@@ -186,7 +186,7 @@ impl ExecutorParams {
} }
/// Returns a PVF execution timeout, if any /// Returns a PVF execution timeout, if any
pub fn pvf_exec_timeout(&self, kind: PvfExecTimeoutKind) -> Option<Duration> { pub fn pvf_exec_timeout(&self, kind: PvfExecKind) -> Option<Duration> {
for param in &self.0 { for param in &self.0 {
if let ExecutorParam::PvfExecTimeout(k, timeout) = param { if let ExecutorParam::PvfExecTimeout(k, timeout) = param {
if kind == *k { if kind == *k {
@@ -242,12 +242,12 @@ impl ExecutorParams {
StackNativeMax(_) => "StackNativeMax", StackNativeMax(_) => "StackNativeMax",
PrecheckingMaxMemory(_) => "PrecheckingMaxMemory", PrecheckingMaxMemory(_) => "PrecheckingMaxMemory",
PvfPrepTimeout(kind, _) => match kind { PvfPrepTimeout(kind, _) => match kind {
PvfPrepTimeoutKind::Precheck => "PvfPrepTimeoutKind::Precheck", PvfPrepKind::Precheck => "PvfPrepKind::Precheck",
PvfPrepTimeoutKind::Lenient => "PvfPrepTimeoutKind::Lenient", PvfPrepKind::Prepare => "PvfPrepKind::Prepare",
}, },
PvfExecTimeout(kind, _) => match kind { PvfExecTimeout(kind, _) => match kind {
PvfExecTimeoutKind::Backing => "PvfExecTimeoutKind::Backing", PvfExecKind::Backing => "PvfExecKind::Backing",
PvfExecTimeoutKind::Approval => "PvfExecTimeoutKind::Approval", PvfExecKind::Approval => "PvfExecKind::Approval",
}, },
WasmExtBulkMemory => "WasmExtBulkMemory", WasmExtBulkMemory => "WasmExtBulkMemory",
}; };
@@ -297,30 +297,23 @@ impl ExecutorParams {
} }
if let (Some(precheck), Some(lenient)) = ( if let (Some(precheck), Some(lenient)) = (
seen.get("PvfPrepTimeoutKind::Precheck") seen.get("PvfPrepKind::Precheck")
.or(Some(&DEFAULT_PRECHECK_PREPARATION_TIMEOUT_MS)), .or(Some(&DEFAULT_PRECHECK_PREPARATION_TIMEOUT_MS)),
seen.get("PvfPrepTimeoutKind::Lenient") seen.get("PvfPrepKind::Prepare")
.or(Some(&DEFAULT_LENIENT_PREPARATION_TIMEOUT_MS)), .or(Some(&DEFAULT_LENIENT_PREPARATION_TIMEOUT_MS)),
) { ) {
if *precheck >= *lenient { if *precheck >= *lenient {
return Err(IncompatibleValues( return Err(IncompatibleValues("PvfPrepKind::Precheck", "PvfPrepKind::Prepare"))
"PvfPrepTimeoutKind::Precheck",
"PvfPrepTimeoutKind::Lenient",
))
} }
} }
if let (Some(backing), Some(approval)) = ( if let (Some(backing), Some(approval)) = (
seen.get("PvfExecTimeoutKind::Backing") seen.get("PvfExecKind::Backing").or(Some(&DEFAULT_BACKING_EXECUTION_TIMEOUT_MS)),
.or(Some(&DEFAULT_BACKING_EXECUTION_TIMEOUT_MS)), seen.get("PvfExecKind::Approval")
seen.get("PvfExecTimeoutKind::Approval")
.or(Some(&DEFAULT_APPROVAL_EXECUTION_TIMEOUT_MS)), .or(Some(&DEFAULT_APPROVAL_EXECUTION_TIMEOUT_MS)),
) { ) {
if *backing >= *approval { if *backing >= *approval {
return Err(IncompatibleValues( return Err(IncompatibleValues("PvfExecKind::Backing", "PvfExecKind::Approval"))
"PvfExecTimeoutKind::Backing",
"PvfExecTimeoutKind::Approval",
))
} }
} }
+9 -17
View File
@@ -1781,30 +1781,22 @@ impl<T: Encode> WellKnownKey<T> {
} }
} }
/// Type discriminator for PVF preparation timeouts /// Type discriminator for PVF preparation.
#[derive(Encode, Decode, TypeInfo, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] #[derive(Encode, Decode, TypeInfo, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum PvfPrepTimeoutKind { pub enum PvfPrepKind {
/// For prechecking requests, the time period after which the preparation worker is considered /// For prechecking requests.
/// unresponsive and will be killed.
Precheck, Precheck,
/// For execution and heads-up requests, the time period after which the preparation worker is /// For execution and heads-up requests.
/// considered unresponsive and will be killed. More lenient than the timeout for prechecking Prepare,
/// to prevent honest validators from timing out on valid PVFs.
Lenient,
} }
/// Type discriminator for PVF execution timeouts /// Type discriminator for PVF execution.
#[derive(Encode, Decode, TypeInfo, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] #[derive(Encode, Decode, TypeInfo, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum PvfExecTimeoutKind { pub enum PvfExecKind {
/// The amount of time to spend on execution during backing. /// For backing requests.
Backing, Backing,
/// For approval and dispute request.
/// The amount of time to spend on execution during approval or disputes.
///
/// This should be much longer than the backing execution timeout to ensure that in the
/// absence of extremely large disparities between hardware, blocks that pass backing are
/// considered executable by approval checkers or dispute participants.
Approval, Approval,
} }
@@ -31,6 +31,10 @@ hopefully resolve. We use a more brief delay here (1 second as opposed to 15
minutes for preparation (see above)), because a successful execution must happen minutes for preparation (see above)), because a successful execution must happen
in a short amount of time. in a short amount of time.
If the execution fails during the backing phase, we won't retry to reduce the chance of
supporting nondeterministic candidates. This reduces the chance of nondeterministic blocks
getting backed and honest backers getting slashed.
We currently know of the following specific cases that will lead to a retried We currently know of the following specific cases that will lead to a retried
execution request: execution request:
@@ -17,7 +17,7 @@
use crate::configuration::*; use crate::configuration::*;
use frame_benchmarking::{benchmarks, BenchmarkError, BenchmarkResult}; use frame_benchmarking::{benchmarks, BenchmarkError, BenchmarkResult};
use frame_system::RawOrigin; use frame_system::RawOrigin;
use primitives::{ExecutorParam, ExecutorParams, PvfExecTimeoutKind, PvfPrepTimeoutKind}; use primitives::{ExecutorParam, ExecutorParams, PvfExecKind, PvfPrepKind};
use sp_runtime::traits::One; use sp_runtime::traits::One;
benchmarks! { benchmarks! {
@@ -41,10 +41,10 @@ benchmarks! {
ExecutorParam::StackNativeMax(256 * 1024 * 1024), ExecutorParam::StackNativeMax(256 * 1024 * 1024),
ExecutorParam::WasmExtBulkMemory, ExecutorParam::WasmExtBulkMemory,
ExecutorParam::PrecheckingMaxMemory(2 * 1024 * 1024 * 1024), ExecutorParam::PrecheckingMaxMemory(2 * 1024 * 1024 * 1024),
ExecutorParam::PvfPrepTimeout(PvfPrepTimeoutKind::Precheck, 60_000), ExecutorParam::PvfPrepTimeout(PvfPrepKind::Precheck, 60_000),
ExecutorParam::PvfPrepTimeout(PvfPrepTimeoutKind::Lenient, 360_000), ExecutorParam::PvfPrepTimeout(PvfPrepKind::Prepare, 360_000),
ExecutorParam::PvfExecTimeout(PvfExecTimeoutKind::Backing, 2_000), ExecutorParam::PvfExecTimeout(PvfExecKind::Backing, 2_000),
ExecutorParam::PvfExecTimeout(PvfExecTimeoutKind::Approval, 12_000), ExecutorParam::PvfExecTimeout(PvfExecKind::Approval, 12_000),
][..])) ][..]))
set_config_with_perbill {}: set_on_demand_fee_variability(RawOrigin::Root, Perbill::from_percent(100)) set_config_with_perbill {}: set_on_demand_fee_variability(RawOrigin::Root, Perbill::from_percent(100))