mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-06-11 17:41:08 +00:00
Log PVF retries (#6504)
This commit is contained in:
@@ -604,6 +604,7 @@ async fn validate_candidate_exhaustive(
|
||||
|
||||
#[async_trait]
|
||||
trait ValidationBackend {
|
||||
/// Tries executing a PVF a single time (no retries).
|
||||
async fn validate_candidate(
|
||||
&mut self,
|
||||
pvf: Pvf,
|
||||
@@ -611,6 +612,8 @@ trait ValidationBackend {
|
||||
encoded_params: Vec<u8>,
|
||||
) -> Result<WasmValidationResult, ValidationError>;
|
||||
|
||||
/// Tries executing a PVF. Will retry once if an error is encountered that may have been
|
||||
/// transient.
|
||||
async fn validate_candidate_with_retry(
|
||||
&mut self,
|
||||
raw_validation_code: Vec<u8>,
|
||||
@@ -620,7 +623,7 @@ trait ValidationBackend {
|
||||
// Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap.
|
||||
let pvf = Pvf::from_code(raw_validation_code);
|
||||
|
||||
let validation_result =
|
||||
let mut validation_result =
|
||||
self.validate_candidate(pvf.clone(), timeout, params.encode()).await;
|
||||
|
||||
// If we get an AmbiguousWorkerDeath error, retry once after a brief delay, on the
|
||||
@@ -630,12 +633,19 @@ trait ValidationBackend {
|
||||
{
|
||||
// Wait a brief delay before retrying.
|
||||
futures_timer::Delay::new(PVF_EXECUTION_RETRY_DELAY).await;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?pvf,
|
||||
"Re-trying failed candidate validation due to AmbiguousWorkerDeath."
|
||||
);
|
||||
|
||||
// Encode the params again when re-trying. We expect the retry case to be relatively
|
||||
// rare, and we want to avoid unconditionally cloning data.
|
||||
self.validate_candidate(pvf, timeout, params.encode()).await
|
||||
} else {
|
||||
validation_result
|
||||
validation_result = self.validate_candidate(pvf, timeout, params.encode()).await;
|
||||
}
|
||||
|
||||
validation_result
|
||||
}
|
||||
|
||||
async fn precheck_pvf(&mut self, pvf: Pvf) -> Result<Duration, PrepareError>;
|
||||
|
||||
@@ -525,6 +525,16 @@ async fn handle_execute_pvf(
|
||||
},
|
||||
ArtifactState::FailedToProcess { last_time_failed, num_failures, error } => {
|
||||
if can_retry_prepare_after_failure(*last_time_failed, *num_failures, error) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?pvf,
|
||||
?artifact_id,
|
||||
?last_time_failed,
|
||||
%num_failures,
|
||||
%error,
|
||||
"handle_execute_pvf: Re-trying failed PVF preparation."
|
||||
);
|
||||
|
||||
// If we are allowed to retry the failed prepare job, change the state to
|
||||
// Preparing and re-queue this job.
|
||||
*state = ArtifactState::Preparing {
|
||||
@@ -585,6 +595,16 @@ async fn handle_heads_up(
|
||||
},
|
||||
ArtifactState::FailedToProcess { last_time_failed, num_failures, error } => {
|
||||
if can_retry_prepare_after_failure(*last_time_failed, *num_failures, error) {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
?active_pvf,
|
||||
?artifact_id,
|
||||
?last_time_failed,
|
||||
%num_failures,
|
||||
%error,
|
||||
"handle_heads_up: Re-trying failed PVF preparation."
|
||||
);
|
||||
|
||||
// If we are allowed to retry the failed prepare job, change the state to
|
||||
// Preparing and re-queue this job.
|
||||
*state = ArtifactState::Preparing {
|
||||
@@ -1393,7 +1413,7 @@ mod tests {
|
||||
}
|
||||
|
||||
// Test that multiple execution requests don't trigger preparation retries if the first one
|
||||
// failed due to reproducible error (e.g. Prevalidation).
|
||||
// failed due to a reproducible error (e.g. Prevalidation).
|
||||
#[async_std::test]
|
||||
async fn test_execute_prepare_no_retry() {
|
||||
let mut test = Builder::default().build();
|
||||
|
||||
Reference in New Issue
Block a user