mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-15 13:51:11 +00:00
PVF: re-preparing artifact on failed runtime construction (#3187)
resolve https://github.com/paritytech/polkadot-sdk/issues/3139 - [x] use a distinguishable error for `execute_artifact` - [x] remove artifact in case of a `RuntimeConstruction` error during the execution - [x] augment the `validate_candidate_with_retry` of `ValidationBackend` with the case of retriable `RuntimeConstruction` error during the execution - [x] update the book (https://paritytech.github.io/polkadot-sdk/book/node/utility/pvf-host-and-workers.html#retrying-execution-requests) - [x] add a test - [x] run zombienet tests --------- Co-authored-by: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com>
This commit is contained in:
@@ -695,6 +695,8 @@ async fn validate_candidate_exhaustive(
|
||||
))),
|
||||
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(err))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(err))),
|
||||
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::RuntimeConstruction(err))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(err))),
|
||||
|
||||
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err))) =>
|
||||
Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(format!(
|
||||
@@ -780,40 +782,50 @@ trait ValidationBackend {
|
||||
return validation_result
|
||||
}
|
||||
|
||||
macro_rules! break_if_no_retries_left {
|
||||
($counter:ident) => {
|
||||
if $counter > 0 {
|
||||
$counter -= 1;
|
||||
} else {
|
||||
break
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Allow limited retries for each kind of error.
|
||||
let mut num_death_retries_left = 1;
|
||||
let mut num_job_error_retries_left = 1;
|
||||
let mut num_internal_retries_left = 1;
|
||||
let mut num_runtime_construction_retries_left = 1;
|
||||
loop {
|
||||
// Stop retrying if we exceeded the timeout.
|
||||
if total_time_start.elapsed() + retry_delay > exec_timeout {
|
||||
break
|
||||
}
|
||||
|
||||
let mut retry_immediately = false;
|
||||
match validation_result {
|
||||
Err(ValidationError::PossiblyInvalid(
|
||||
PossiblyInvalidError::AmbiguousWorkerDeath |
|
||||
PossiblyInvalidError::AmbiguousJobDeath(_),
|
||||
)) =>
|
||||
if num_death_retries_left > 0 {
|
||||
num_death_retries_left -= 1;
|
||||
} else {
|
||||
break
|
||||
},
|
||||
)) => break_if_no_retries_left!(num_death_retries_left),
|
||||
|
||||
Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(_))) =>
|
||||
if num_job_error_retries_left > 0 {
|
||||
num_job_error_retries_left -= 1;
|
||||
} else {
|
||||
break
|
||||
},
|
||||
break_if_no_retries_left!(num_job_error_retries_left),
|
||||
|
||||
Err(ValidationError::Internal(_)) =>
|
||||
if num_internal_retries_left > 0 {
|
||||
num_internal_retries_left -= 1;
|
||||
} else {
|
||||
break
|
||||
},
|
||||
break_if_no_retries_left!(num_internal_retries_left),
|
||||
|
||||
Err(ValidationError::PossiblyInvalid(
|
||||
PossiblyInvalidError::RuntimeConstruction(_),
|
||||
)) => {
|
||||
break_if_no_retries_left!(num_runtime_construction_retries_left);
|
||||
self.precheck_pvf(pvf.clone()).await?;
|
||||
// In this case the error is deterministic
|
||||
// And a retry forces the ValidationBackend
|
||||
// to re-prepare the artifact so
|
||||
// there is no need to wait before the retry
|
||||
retry_immediately = true;
|
||||
},
|
||||
|
||||
Ok(_) | Err(ValidationError::Invalid(_) | ValidationError::Preparation(_)) => break,
|
||||
}
|
||||
@@ -821,8 +833,11 @@ trait ValidationBackend {
|
||||
// If we got a possibly transient error, retry once after a brief delay, on the
|
||||
// assumption that the conditions that caused this error may have resolved on their own.
|
||||
{
|
||||
// Wait a brief delay before retrying.
|
||||
futures_timer::Delay::new(retry_delay).await;
|
||||
// In case of many transient errors it is necessary to wait a little bit
|
||||
// for the error to be probably resolved
|
||||
if !retry_immediately {
|
||||
futures_timer::Delay::new(retry_delay).await;
|
||||
}
|
||||
|
||||
let new_timeout = exec_timeout.saturating_sub(total_time_start.elapsed());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user