pre-checking: Reject failed PVFs (#6492)

* pre-checking: Reject failed PVFs

* paras: immediately reject any PVF that cannot reach a supermajority

* Make the `quorum` reject condition a bit more clear semantically

* Add comment

* Update implementer's guide

* Update a link

Not related to the rest of the PR, but I randomly noticed and fixed this.

* Update runtime/parachains/src/paras/tests.rs

Co-authored-by: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com>

* Remove unneeded loop

* Log PVF retries using `info!`

* Change retry logs to `warn!` and add preparation failure log

* Log PVF execution failure

* Clarify why we reject failed PVFs

* Fix PVF reject runtime benchmarks

Co-authored-by: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com>
This commit is contained in:
Marcin S
2023-01-12 04:24:42 -05:00
committed by GitHub
parent ae74d33a93
commit 2efa3bab98
11 changed files with 126 additions and 74 deletions
+30 -14
View File
@@ -223,32 +223,48 @@ fn handle_job_finish(
artifact_id: ArtifactId,
result_tx: ResultSender,
) {
let (idle_worker, result) = match outcome {
Outcome::Ok { result_descriptor, duration: _, idle_worker } => {
let (idle_worker, result, duration) = match outcome {
Outcome::Ok { result_descriptor, duration, idle_worker } => {
// TODO: propagate the soft timeout
(Some(idle_worker), Ok(result_descriptor))
(Some(idle_worker), Ok(result_descriptor), Some(duration))
},
Outcome::InvalidCandidate { err, idle_worker } => (
Some(idle_worker),
Err(ValidationError::InvalidCandidate(InvalidCandidate::WorkerReportedError(err))),
None,
),
Outcome::InternalError { err, idle_worker } =>
(Some(idle_worker), Err(ValidationError::InternalError(err))),
(Some(idle_worker), Err(ValidationError::InternalError(err)), None),
Outcome::HardTimeout =>
(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout))),
Outcome::IoErr =>
(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))),
(None, Err(ValidationError::InvalidCandidate(InvalidCandidate::HardTimeout)), None),
Outcome::IoErr => (
None,
Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath)),
None,
),
};
queue.metrics.execute_finished();
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?artifact_id.code_hash,
?worker,
worker_rip = idle_worker.is_none(),
"execute worker concluded",
);
if let Err(ref err) = result {
gum::warn!(
target: LOG_TARGET,
?artifact_id,
?worker,
worker_rip = idle_worker.is_none(),
"execution worker concluded, error occurred: {:?}",
err
);
} else {
gum::debug!(
target: LOG_TARGET,
?artifact_id,
?worker,
worker_rip = idle_worker.is_none(),
?duration,
"execute worker concluded successfully",
);
}
// First we send the result. It may fail due to the other end of the channel being dropped,
// that's legitimate and we don't treat that as an error.