Move PVF timeouts to executor environment parameters (#6823)

* Move PVF timeouts to executor environment parameters

* Typo

Co-authored-by: Marcin S. <marcin@realemail.net>

* Fix comments

* Change handle_import_statements to FatalResult (#6820)

* Changing dispute db errors to fatal

* fmt

* Change node-key for bootnodes (#6772)

* Additional tracing in `provisioner`, `vote_selection` and `dispute-coordinator` (#6775)

* Additional tracing in `provisioner`, `vote_selection`

* Add `fetched_onchain_disputes` metric to provisioner

* Some tracelines in dispute-coordinator

TODO: cherry pick this in the initial branch!!!

* Remove spammy logs

* Remove some trace lines

* Rename and fix things

* Fix comments

* Typo

* Minor fixes

* Add codec indexes; Remove macro

---------

Co-authored-by: Marcin S. <marcin@realemail.net>
Co-authored-by: Bradley Olson <34992650+BradleyOlson64@users.noreply.github.com>
Co-authored-by: Petr Mensik <petr.mensik1@gmail.com>
Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io>
This commit is contained in:
s0me0ne-unkn0wn
2023-03-08 23:43:51 +01:00
committed by GitHub
parent 1c2215a75a
commit 03d4af104f
25 changed files with 359 additions and 415 deletions
+69 -119
View File
@@ -25,7 +25,7 @@ use crate::{
error::PrepareError,
execute,
metrics::Metrics,
prepare, PrepareResult, Priority, PvfWithExecutorParams, ValidationError, LOG_TARGET,
prepare, PrepareResult, Priority, PvfPrepData, ValidationError, LOG_TARGET,
};
use always_assert::never;
use futures::{
@@ -40,17 +40,6 @@ use std::{
time::{Duration, SystemTime},
};
/// For prechecking requests, the time period after which the preparation worker is considered
/// unresponsive and will be killed.
// NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric.
pub const PRECHECK_PREPARATION_TIMEOUT: Duration = Duration::from_secs(60);
/// For execution and heads-up requests, the time period after which the preparation worker is
/// considered unresponsive and will be killed. More lenient than the timeout for prechecking to
/// prevent honest validators from timing out on valid PVFs.
// NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric.
pub const LENIENT_PREPARATION_TIMEOUT: Duration = Duration::from_secs(360);
/// The time period after which a failed preparation artifact is considered ready to be retried.
/// Note that we will only retry if another request comes in after this cooldown has passed.
#[cfg(not(test))]
@@ -84,11 +73,11 @@ impl ValidationHost {
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn precheck_pvf(
&mut self,
pvf_with_params: PvfWithExecutorParams,
pvf: PvfPrepData,
result_tx: PrepareResultSender,
) -> Result<(), String> {
self.to_host_tx
.send(ToHost::PrecheckPvf { pvf_with_params, result_tx })
.send(ToHost::PrecheckPvf { pvf, result_tx })
.await
.map_err(|_| "the inner loop hung up".to_string())
}
@@ -102,16 +91,16 @@ impl ValidationHost {
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn execute_pvf(
&mut self,
pvf_with_params: PvfWithExecutorParams,
execution_timeout: Duration,
pvf: PvfPrepData,
exec_timeout: Duration,
params: Vec<u8>,
priority: Priority,
result_tx: ResultSender,
) -> Result<(), String> {
self.to_host_tx
.send(ToHost::ExecutePvf(ExecutePvfInputs {
pvf_with_params,
execution_timeout,
pvf,
exec_timeout,
params,
priority,
result_tx,
@@ -126,10 +115,7 @@ impl ValidationHost {
/// situations this function should return immediately.
///
/// Returns an error if the request cannot be sent to the validation host, i.e. if it shut down.
pub async fn heads_up(
&mut self,
active_pvfs: Vec<PvfWithExecutorParams>,
) -> Result<(), String> {
pub async fn heads_up(&mut self, active_pvfs: Vec<PvfPrepData>) -> Result<(), String> {
self.to_host_tx
.send(ToHost::HeadsUp { active_pvfs })
.await
@@ -138,14 +124,14 @@ impl ValidationHost {
}
enum ToHost {
PrecheckPvf { pvf_with_params: PvfWithExecutorParams, result_tx: PrepareResultSender },
PrecheckPvf { pvf: PvfPrepData, result_tx: PrepareResultSender },
ExecutePvf(ExecutePvfInputs),
HeadsUp { active_pvfs: Vec<PvfWithExecutorParams> },
HeadsUp { active_pvfs: Vec<PvfPrepData> },
}
struct ExecutePvfInputs {
pvf_with_params: PvfWithExecutorParams,
execution_timeout: Duration,
pvf: PvfPrepData,
exec_timeout: Duration,
params: Vec<u8>,
priority: Priority,
result_tx: ResultSender,
@@ -267,7 +253,7 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future<O
/// to the given result sender.
#[derive(Debug)]
struct PendingExecutionRequest {
execution_timeout: Duration,
exec_timeout: Duration,
params: Vec<u8>,
executor_params: ExecutorParams,
result_tx: ResultSender,
@@ -282,13 +268,13 @@ impl AwaitingPrepare {
fn add(
&mut self,
artifact_id: ArtifactId,
execution_timeout: Duration,
exec_timeout: Duration,
params: Vec<u8>,
executor_params: ExecutorParams,
result_tx: ResultSender,
) {
self.0.entry(artifact_id).or_default().push(PendingExecutionRequest {
execution_timeout,
exec_timeout,
params,
executor_params,
result_tx,
@@ -427,8 +413,8 @@ async fn handle_to_host(
to_host: ToHost,
) -> Result<(), Fatal> {
match to_host {
ToHost::PrecheckPvf { pvf_with_params, result_tx } => {
handle_precheck_pvf(artifacts, prepare_queue, pvf_with_params, result_tx).await?;
ToHost::PrecheckPvf { pvf, result_tx } => {
handle_precheck_pvf(artifacts, prepare_queue, pvf, result_tx).await?;
},
ToHost::ExecutePvf(inputs) => {
handle_execute_pvf(
@@ -450,16 +436,17 @@ async fn handle_to_host(
/// Handles PVF prechecking requests.
///
/// This tries to prepare the PVF by compiling the WASM blob within a given timeout ([`PRECHECK_PREPARATION_TIMEOUT`]).
/// This tries to prepare the PVF by compiling the WASM blob within a timeout set in
/// `PvfPrepData`.
///
/// If the prepare job failed previously, we may retry it under certain conditions.
async fn handle_precheck_pvf(
artifacts: &mut Artifacts,
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
pvf_with_params: PvfWithExecutorParams,
pvf: PvfPrepData,
result_sender: PrepareResultSender,
) -> Result<(), Fatal> {
let artifact_id = pvf_with_params.as_artifact_id();
let artifact_id = pvf.as_artifact_id();
if let Some(state) = artifacts.artifact_state_mut(&artifact_id) {
match state {
@@ -477,15 +464,8 @@ async fn handle_precheck_pvf(
}
} else {
artifacts.insert_preparing(artifact_id, vec![result_sender]);
send_prepare(
prepare_queue,
prepare::ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params,
preparation_timeout: PRECHECK_PREPARATION_TIMEOUT,
},
)
.await?;
send_prepare(prepare_queue, prepare::ToQueue::Enqueue { priority: Priority::Normal, pvf })
.await?;
}
Ok(())
}
@@ -507,9 +487,8 @@ async fn handle_execute_pvf(
awaiting_prepare: &mut AwaitingPrepare,
inputs: ExecutePvfInputs,
) -> Result<(), Fatal> {
let ExecutePvfInputs { pvf_with_params, execution_timeout, params, priority, result_tx } =
inputs;
let artifact_id = pvf_with_params.as_artifact_id();
let ExecutePvfInputs { pvf, exec_timeout, params, priority, result_tx } = inputs;
let artifact_id = pvf.as_artifact_id();
if let Some(state) = artifacts.artifact_state_mut(&artifact_id) {
match state {
@@ -521,9 +500,9 @@ async fn handle_execute_pvf(
execute_queue,
execute::ToQueue::Enqueue {
artifact: ArtifactPathId::new(artifact_id, cache_path),
execution_timeout,
exec_timeout,
params,
executor_params: (*pvf_with_params.executor_params()).clone(),
executor_params: (*pvf.executor_params()).clone(),
result_tx,
},
)
@@ -532,9 +511,9 @@ async fn handle_execute_pvf(
ArtifactState::Preparing { .. } => {
awaiting_prepare.add(
artifact_id,
execution_timeout,
exec_timeout,
params,
(*pvf_with_params.executor_params()).clone(),
(*pvf.executor_params()).clone(),
result_tx,
);
},
@@ -542,7 +521,7 @@ async fn handle_execute_pvf(
if can_retry_prepare_after_failure(*last_time_failed, *num_failures, error) {
gum::warn!(
target: LOG_TARGET,
?pvf_with_params,
?pvf,
?artifact_id,
?last_time_failed,
%num_failures,
@@ -556,22 +535,15 @@ async fn handle_execute_pvf(
waiting_for_response: Vec::new(),
num_failures: *num_failures,
};
let executor_params = (*pvf_with_params.executor_params()).clone();
send_prepare(
prepare_queue,
prepare::ToQueue::Enqueue {
priority,
pvf_with_params,
preparation_timeout: LENIENT_PREPARATION_TIMEOUT,
},
)
.await?;
let executor_params = (*pvf.executor_params()).clone();
send_prepare(prepare_queue, prepare::ToQueue::Enqueue { priority, pvf })
.await?;
// Add an execution request that will wait to run after this prepare job has
// finished.
awaiting_prepare.add(
artifact_id,
execution_timeout,
exec_timeout,
params,
executor_params,
result_tx,
@@ -584,20 +556,12 @@ async fn handle_execute_pvf(
} else {
// Artifact is unknown: register it and enqueue a job with the corresponding priority and
// PVF.
let executor_params = (*pvf_with_params.executor_params()).clone();
let executor_params = (*pvf.executor_params()).clone();
artifacts.insert_preparing(artifact_id.clone(), Vec::new());
send_prepare(
prepare_queue,
prepare::ToQueue::Enqueue {
priority,
pvf_with_params,
preparation_timeout: LENIENT_PREPARATION_TIMEOUT,
},
)
.await?;
send_prepare(prepare_queue, prepare::ToQueue::Enqueue { priority, pvf }).await?;
// Add an execution request that will wait to run after this prepare job has finished.
awaiting_prepare.add(artifact_id, execution_timeout, params, executor_params, result_tx);
awaiting_prepare.add(artifact_id, exec_timeout, params, executor_params, result_tx);
}
Ok(())
@@ -606,7 +570,7 @@ async fn handle_execute_pvf(
async fn handle_heads_up(
artifacts: &mut Artifacts,
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
active_pvfs: Vec<PvfWithExecutorParams>,
active_pvfs: Vec<PvfPrepData>,
) -> Result<(), Fatal> {
let now = SystemTime::now();
@@ -642,8 +606,7 @@ async fn handle_heads_up(
prepare_queue,
prepare::ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params: active_pvf,
preparation_timeout: LENIENT_PREPARATION_TIMEOUT,
pvf: active_pvf,
},
)
.await?;
@@ -656,11 +619,7 @@ async fn handle_heads_up(
send_prepare(
prepare_queue,
prepare::ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params: active_pvf,
preparation_timeout: LENIENT_PREPARATION_TIMEOUT,
},
prepare::ToQueue::Enqueue { priority: Priority::Normal, pvf: active_pvf },
)
.await?;
}
@@ -722,7 +681,7 @@ async fn handle_prepare_done(
// It's finally time to dispatch all the execution requests that were waiting for this artifact
// to be prepared.
let pending_requests = awaiting_prepare.take(&artifact_id);
for PendingExecutionRequest { execution_timeout, params, executor_params, result_tx } in
for PendingExecutionRequest { exec_timeout, params, executor_params, result_tx } in
pending_requests
{
if result_tx.is_canceled() {
@@ -741,7 +700,7 @@ async fn handle_prepare_done(
execute_queue,
execute::ToQueue::Enqueue {
artifact: ArtifactPathId::new(artifact_id.clone(), cache_path),
execution_timeout,
exec_timeout,
params,
executor_params,
result_tx,
@@ -858,13 +817,14 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream<Item = ()>
}
#[cfg(test)]
mod tests {
pub(crate) mod tests {
use super::*;
use crate::{prepare::PrepareStats, InvalidCandidate, PrepareError};
use assert_matches::assert_matches;
use futures::future::BoxFuture;
const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
pub(crate) const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
#[tokio::test]
async fn pulse_test() {
@@ -882,7 +842,7 @@ mod tests {
/// Creates a new PVF which artifact id can be uniquely identified by the given number.
fn artifact_id(descriminator: u32) -> ArtifactId {
PvfWithExecutorParams::from_discriminator(descriminator).as_artifact_id()
PvfPrepData::from_discriminator(descriminator).as_artifact_id()
}
fn artifact_path(descriminator: u32) -> PathBuf {
@@ -1091,7 +1051,7 @@ mod tests {
let mut test = builder.build();
let mut host = test.host_handle();
host.heads_up(vec![PvfWithExecutorParams::from_discriminator(1)]).await.unwrap();
host.heads_up(vec![PvfPrepData::from_discriminator(1)]).await.unwrap();
let to_sweeper_rx = &mut test.to_sweeper_rx;
run_until(
@@ -1105,7 +1065,7 @@ mod tests {
// Extend TTL for the first artifact and make sure we don't receive another file removal
// request.
host.heads_up(vec![PvfWithExecutorParams::from_discriminator(1)]).await.unwrap();
host.heads_up(vec![PvfPrepData::from_discriminator(1)]).await.unwrap();
test.poll_ensure_to_sweeper_is_empty().await;
}
@@ -1116,7 +1076,7 @@ mod tests {
let (result_tx, result_rx_pvf_1_1) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf1".to_vec(),
Priority::Normal,
@@ -1127,7 +1087,7 @@ mod tests {
let (result_tx, result_rx_pvf_1_2) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf1".to_vec(),
Priority::Critical,
@@ -1138,7 +1098,7 @@ mod tests {
let (result_tx, result_rx_pvf_2) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(2),
PvfPrepData::from_discriminator(2),
TEST_EXECUTION_TIMEOUT,
b"pvf2".to_vec(),
Priority::Normal,
@@ -1216,9 +1176,7 @@ mod tests {
// First, test a simple precheck request.
let (result_tx, result_rx) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(1), result_tx)
.await
.unwrap();
host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap();
// The queue received the prepare request.
assert_matches!(
@@ -1242,9 +1200,7 @@ mod tests {
let mut precheck_receivers = Vec::new();
for _ in 0..3 {
let (result_tx, result_rx) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(2), result_tx)
.await
.unwrap();
host.precheck_pvf(PvfPrepData::from_discriminator(2), result_tx).await.unwrap();
precheck_receivers.push(result_rx);
}
// Received prepare request.
@@ -1279,7 +1235,7 @@ mod tests {
// Send PVF for the execution and request the prechecking for it.
let (result_tx, result_rx_execute) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf2".to_vec(),
Priority::Critical,
@@ -1294,9 +1250,7 @@ mod tests {
);
let (result_tx, result_rx) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(1), result_tx)
.await
.unwrap();
host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap();
// Suppose the preparation failed, the execution queue is empty and both
// "clients" receive their results.
@@ -1318,15 +1272,13 @@ mod tests {
let mut precheck_receivers = Vec::new();
for _ in 0..3 {
let (result_tx, result_rx) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(2), result_tx)
.await
.unwrap();
host.precheck_pvf(PvfPrepData::from_discriminator(2), result_tx).await.unwrap();
precheck_receivers.push(result_rx);
}
let (result_tx, _result_rx_execute) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(2),
PvfPrepData::from_discriminator(2),
TEST_EXECUTION_TIMEOUT,
b"pvf2".to_vec(),
Priority::Critical,
@@ -1366,9 +1318,7 @@ mod tests {
// Submit a precheck request that fails.
let (result_tx, result_rx) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(1), result_tx)
.await
.unwrap();
host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap();
// The queue received the prepare request.
assert_matches!(
@@ -1390,7 +1340,7 @@ mod tests {
// Submit another precheck request.
let (result_tx_2, result_rx_2) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(1), result_tx_2)
host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx_2)
.await
.unwrap();
@@ -1406,7 +1356,7 @@ mod tests {
// Submit another precheck request.
let (result_tx_3, result_rx_3) = oneshot::channel();
host.precheck_pvf(PvfWithExecutorParams::from_discriminator(1), result_tx_3)
host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx_3)
.await
.unwrap();
@@ -1428,7 +1378,7 @@ mod tests {
// Submit a execute request that fails.
let (result_tx, result_rx) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf".to_vec(),
Priority::Critical,
@@ -1458,7 +1408,7 @@ mod tests {
// Submit another execute request. We shouldn't try to prepare again, yet.
let (result_tx_2, result_rx_2) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf".to_vec(),
Priority::Critical,
@@ -1480,7 +1430,7 @@ mod tests {
// Submit another execute request.
let (result_tx_3, result_rx_3) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf".to_vec(),
Priority::Critical,
@@ -1530,7 +1480,7 @@ mod tests {
// Submit an execute request that fails.
let (result_tx, result_rx) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf".to_vec(),
Priority::Critical,
@@ -1563,7 +1513,7 @@ mod tests {
// Submit another execute request.
let (result_tx_2, result_rx_2) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf".to_vec(),
Priority::Critical,
@@ -1588,7 +1538,7 @@ mod tests {
// Submit another execute request.
let (result_tx_3, result_rx_3) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf".to_vec(),
Priority::Critical,
@@ -1615,7 +1565,7 @@ mod tests {
let mut host = test.host_handle();
// Submit a heads-up request that fails.
host.heads_up(vec![PvfWithExecutorParams::from_discriminator(1)]).await.unwrap();
host.heads_up(vec![PvfPrepData::from_discriminator(1)]).await.unwrap();
// The queue received the prepare request.
assert_matches!(
@@ -1632,7 +1582,7 @@ mod tests {
.unwrap();
// Submit another heads-up request.
host.heads_up(vec![PvfWithExecutorParams::from_discriminator(1)]).await.unwrap();
host.heads_up(vec![PvfPrepData::from_discriminator(1)]).await.unwrap();
// Assert the prepare queue is empty.
test.poll_ensure_to_prepare_queue_is_empty().await;
@@ -1641,7 +1591,7 @@ mod tests {
futures_timer::Delay::new(PREPARE_FAILURE_COOLDOWN).await;
// Submit another heads-up request.
host.heads_up(vec![PvfWithExecutorParams::from_discriminator(1)]).await.unwrap();
host.heads_up(vec![PvfPrepData::from_discriminator(1)]).await.unwrap();
// Assert the prepare queue contains the request.
assert_matches!(
@@ -1657,7 +1607,7 @@ mod tests {
let (result_tx, result_rx) = oneshot::channel();
host.execute_pvf(
PvfWithExecutorParams::from_discriminator(1),
PvfPrepData::from_discriminator(1),
TEST_EXECUTION_TIMEOUT,
b"pvf1".to_vec(),
Priority::Normal,