Retry failed PVF execution (AmbiguousWorkerDeath) (#6235)

* Fix a couple of typos

* Retry failed PVF execution

PVF execution that fails due to AmbiguousWorkerDeath should be retried once.
This should reduce the occurrence of failures due to transient conditions.

Closes #6195

* Address a couple of nits

* Write tests; refactor (add `validate_candidate_with_retry`)

* Update node/core/candidate-validation/src/lib.rs

Co-authored-by: Andronik <write@reusable.software>

Co-authored-by: eskimor <eskimor@users.noreply.github.com>
Co-authored-by: Andronik <write@reusable.software>
This commit is contained in:
Marcin S
2022-11-08 15:36:36 -05:00
committed by GitHub
parent c261353380
commit 6d7f33e612
5 changed files with 185 additions and 31 deletions
@@ -345,12 +345,19 @@ fn check_does_not_match() {
}
struct MockValidateCandidateBackend {
result: Result<WasmValidationResult, ValidationError>,
result_list: Vec<Result<WasmValidationResult, ValidationError>>,
num_times_called: usize,
}
impl MockValidateCandidateBackend {
fn with_hardcoded_result(result: Result<WasmValidationResult, ValidationError>) -> Self {
Self { result }
Self { result_list: vec![result], num_times_called: 0 }
}
fn with_hardcoded_result_list(
result_list: Vec<Result<WasmValidationResult, ValidationError>>,
) -> Self {
Self { result_list, num_times_called: 0 }
}
}
@@ -358,11 +365,16 @@ impl MockValidateCandidateBackend {
impl ValidationBackend for MockValidateCandidateBackend {
async fn validate_candidate(
&mut self,
_raw_validation_code: Vec<u8>,
_pvf: Pvf,
_timeout: Duration,
_params: ValidationParams,
_encoded_params: Vec<u8>,
) -> Result<WasmValidationResult, ValidationError> {
self.result.clone()
// This is expected to panic if called more times than expected, indicating an error in the
// test.
let result = self.result_list[self.num_times_called].clone();
self.num_times_called += 1;
result
}
async fn precheck_pvf(&mut self, _pvf: Pvf) -> Result<(), PrepareError> {
@@ -468,7 +480,7 @@ fn candidate_validation_bad_return_is_invalid() {
let v = executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result(Err(
ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath),
ValidationError::InvalidCandidate(WasmInvalidCandidate::HardTimeout),
)),
validation_data,
validation_code,
@@ -479,6 +491,122 @@ fn candidate_validation_bad_return_is_invalid() {
))
.unwrap();
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::Timeout));
}
#[test]
fn candidate_validation_one_ambiguous_error_is_valid() {
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() };
let pov = PoV { block_data: BlockData(vec![1; 32]) };
let head_data = HeadData(vec![1, 1, 1]);
let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
head_data.hash(),
dummy_hash(),
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov,
&validation_code.hash(),
);
assert!(check.is_ok());
let validation_result = WasmValidationResult {
head_data,
new_validation_code: Some(vec![2, 2, 2].into()),
upward_messages: Vec::new(),
horizontal_messages: Vec::new(),
processed_downward_messages: 0,
hrmp_watermark: 0,
};
let commitments = CandidateCommitments {
head_data: validation_result.head_data.clone(),
upward_messages: validation_result.upward_messages.clone(),
horizontal_messages: validation_result.horizontal_messages.clone(),
new_validation_code: validation_result.new_validation_code.clone(),
processed_downward_messages: validation_result.processed_downward_messages,
hrmp_watermark: validation_result.hrmp_watermark,
};
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: commitments.hash() };
let v = executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
Ok(validation_result),
]),
validation_data.clone(),
validation_code,
candidate_receipt,
Arc::new(pov),
Duration::from_secs(0),
&Default::default(),
))
.unwrap();
assert_matches!(v, ValidationResult::Valid(outputs, used_validation_data) => {
assert_eq!(outputs.head_data, HeadData(vec![1, 1, 1]));
assert_eq!(outputs.upward_messages, Vec::<UpwardMessage>::new());
assert_eq!(outputs.horizontal_messages, Vec::new());
assert_eq!(outputs.new_validation_code, Some(vec![2, 2, 2].into()));
assert_eq!(outputs.hrmp_watermark, 0);
assert_eq!(used_validation_data, validation_data);
});
}
#[test]
fn candidate_validation_multiple_ambiguous_errors_is_invalid() {
let validation_data = PersistedValidationData { max_pov_size: 1024, ..Default::default() };
let pov = PoV { block_data: BlockData(vec![1; 32]) };
let validation_code = ValidationCode(vec![2; 16]);
let descriptor = make_valid_candidate_descriptor(
ParaId::from(1_u32),
dummy_hash(),
validation_data.hash(),
pov.hash(),
validation_code.hash(),
dummy_hash(),
dummy_hash(),
Sr25519Keyring::Alice,
);
let check = perform_basic_checks(
&descriptor,
validation_data.max_pov_size,
&pov,
&validation_code.hash(),
);
assert!(check.is_ok());
let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() };
let v = executor::block_on(validate_candidate_exhaustive(
MockValidateCandidateBackend::with_hardcoded_result_list(vec![
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
Err(ValidationError::InvalidCandidate(WasmInvalidCandidate::AmbiguousWorkerDeath)),
]),
validation_data,
validation_code,
candidate_receipt,
Arc::new(pov),
Duration::from_secs(0),
&Default::default(),
))
.unwrap();
assert_matches!(v, ValidationResult::Invalid(InvalidCandidate::ExecutionError(_)));
}
@@ -779,9 +907,9 @@ impl MockPreCheckBackend {
impl ValidationBackend for MockPreCheckBackend {
async fn validate_candidate(
&mut self,
_raw_validation_code: Vec<u8>,
_pvf: Pvf,
_timeout: Duration,
_params: ValidationParams,
_encoded_params: Vec<u8>,
) -> Result<WasmValidationResult, ValidationError> {
unreachable!()
}