Move PVF timeouts to executor environment parameters (#6823)

* Move PVF timeouts to executor environment parameters

* Typo

Co-authored-by: Marcin S. <marcin@realemail.net>

* Fix comments

* Change handle_import_statements to FatalResult (#6820)

* Changing dispute db errors to fatal

* fmt

* Change node-key for bootnodes (#6772)

* Additional tracing in `provisioner`, `vote_selection` and `dispute-coordinator` (#6775)

* Additional tracing in `provisioner`, `vote_selection`

* Add `fetched_onchain_disputes` metric to provisioner

* Some tracelines in dispute-coordinator

TODO: cherry pick this in the initial branch!!!

* Remove spammy logs

* Remove some trace lines

* Rename and fix things

* Fix comments

* Typo

* Minor fixes

* Add codec indexes; Remove macro

---------

Co-authored-by: Marcin S. <marcin@realemail.net>
Co-authored-by: Bradley Olson <34992650+BradleyOlson64@users.noreply.github.com>
Co-authored-by: Petr Mensik <petr.mensik1@gmail.com>
Co-authored-by: Tsvetomir Dimitrov <tsvetomir@parity.io>
This commit is contained in:
s0me0ne-unkn0wn
2023-03-08 23:43:51 +01:00
committed by GitHub
parent 1c2215a75a
commit 03d4af104f
25 changed files with 359 additions and 415 deletions
+6 -21
View File
@@ -18,7 +18,7 @@ use super::worker::{self, Outcome};
use crate::{
error::{PrepareError, PrepareResult},
metrics::Metrics,
pvf::PvfWithExecutorParams,
pvf::PvfPrepData,
worker_common::{IdleWorker, WorkerHandle},
LOG_TARGET,
};
@@ -65,12 +65,7 @@ pub enum ToPool {
///
/// In either case, the worker is considered busy and no further `StartWork` messages should be
/// sent until either `Concluded` or `Rip` message is received.
StartWork {
worker: Worker,
pvf_with_params: PvfWithExecutorParams,
artifact_path: PathBuf,
preparation_timeout: Duration,
},
StartWork { worker: Worker, pvf: PvfPrepData, artifact_path: PathBuf },
}
/// A message sent from pool to its client.
@@ -214,7 +209,7 @@ fn handle_to_pool(
metrics.prepare_worker().on_begin_spawn();
mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed());
},
ToPool::StartWork { worker, pvf_with_params, artifact_path, preparation_timeout } => {
ToPool::StartWork { worker, pvf, artifact_path } => {
if let Some(data) = spawned.get_mut(worker) {
if let Some(idle) = data.idle.take() {
let preparation_timer = metrics.time_preparation();
@@ -223,10 +218,9 @@ fn handle_to_pool(
metrics.clone(),
worker,
idle,
pvf_with_params,
pvf,
cache_path.to_owned(),
artifact_path,
preparation_timeout,
preparation_timer,
)
.boxed(),
@@ -272,21 +266,12 @@ async fn start_work_task<Timer>(
metrics: Metrics,
worker: Worker,
idle: IdleWorker,
pvf_with_params: PvfWithExecutorParams,
pvf: PvfPrepData,
cache_path: PathBuf,
artifact_path: PathBuf,
preparation_timeout: Duration,
_preparation_timer: Option<Timer>,
) -> PoolEvent {
let outcome = worker::start_work(
&metrics,
idle,
pvf_with_params,
&cache_path,
artifact_path,
preparation_timeout,
)
.await;
let outcome = worker::start_work(&metrics, idle, pvf, &cache_path, artifact_path).await;
PoolEvent::StartWork(worker, outcome)
}
+33 -85
View File
@@ -18,17 +18,18 @@
use super::pool::{self, Worker};
use crate::{
artifacts::ArtifactId, metrics::Metrics, PrepareResult, Priority, PvfWithExecutorParams,
LOG_TARGET,
artifacts::ArtifactId, metrics::Metrics, PrepareResult, Priority, PvfPrepData, LOG_TARGET,
};
use always_assert::{always, never};
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
use std::{
collections::{HashMap, VecDeque},
path::PathBuf,
time::Duration,
};
#[cfg(test)]
use std::time::Duration;
/// A request to pool.
#[derive(Debug)]
pub enum ToQueue {
@@ -36,11 +37,7 @@ pub enum ToQueue {
///
/// Note that it is incorrect to enqueue the same PVF again without first receiving the
/// [`FromQueue`] response.
Enqueue {
priority: Priority,
pvf_with_params: PvfWithExecutorParams,
preparation_timeout: Duration,
},
Enqueue { priority: Priority, pvf: PvfPrepData },
}
/// A response from queue.
@@ -85,9 +82,7 @@ slotmap::new_key_type! { pub struct Job; }
struct JobData {
/// The priority of this job. Can be bumped.
priority: Priority,
pvf_with_params: PvfWithExecutorParams,
/// The timeout for the preparation job.
preparation_timeout: Duration,
pvf: PvfPrepData,
worker: Option<Worker>,
}
@@ -215,8 +210,8 @@ impl Queue {
async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fatal> {
match to_queue {
ToQueue::Enqueue { priority, pvf_with_params, preparation_timeout } => {
handle_enqueue(queue, priority, pvf_with_params, preparation_timeout).await?;
ToQueue::Enqueue { priority, pvf } => {
handle_enqueue(queue, priority, pvf).await?;
},
}
Ok(())
@@ -225,19 +220,18 @@ async fn handle_to_queue(queue: &mut Queue, to_queue: ToQueue) -> Result<(), Fat
async fn handle_enqueue(
queue: &mut Queue,
priority: Priority,
pvf_with_params: PvfWithExecutorParams,
preparation_timeout: Duration,
pvf: PvfPrepData,
) -> Result<(), Fatal> {
gum::debug!(
target: LOG_TARGET,
validation_code_hash = ?pvf_with_params.code_hash(),
validation_code_hash = ?pvf.code_hash(),
?priority,
?preparation_timeout,
preparation_timeout = ?pvf.prep_timeout,
"PVF is enqueued for preparation.",
);
queue.metrics.prepare_enqueued();
let artifact_id = pvf_with_params.as_artifact_id();
let artifact_id = pvf.as_artifact_id();
if never!(
queue.artifact_id_to_job.contains_key(&artifact_id),
"second Enqueue sent for a known artifact"
@@ -254,10 +248,7 @@ async fn handle_enqueue(
return Ok(())
}
let job =
queue
.jobs
.insert(JobData { priority, pvf_with_params, preparation_timeout, worker: None });
let job = queue.jobs.insert(JobData { priority, pvf, worker: None });
queue.artifact_id_to_job.insert(artifact_id, job);
if let Some(available) = find_idle_worker(queue) {
@@ -348,7 +339,7 @@ async fn handle_worker_concluded(
// this can't be None;
// qed.
let job_data = never_none!(queue.jobs.remove(job));
let artifact_id = job_data.pvf_with_params.as_artifact_id();
let artifact_id = job_data.pvf.as_artifact_id();
queue.artifact_id_to_job.remove(&artifact_id);
@@ -434,7 +425,7 @@ async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fat
async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> {
let job_data = &mut queue.jobs[job];
let artifact_id = job_data.pvf_with_params.as_artifact_id();
let artifact_id = job_data.pvf.as_artifact_id();
let artifact_path = artifact_id.path(&queue.cache_path);
job_data.worker = Some(worker);
@@ -443,12 +434,7 @@ async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal
send_pool(
&mut queue.to_pool_tx,
pool::ToPool::StartWork {
worker,
pvf_with_params: job_data.pvf_with_params.clone(),
artifact_path,
preparation_timeout: job_data.preparation_timeout,
},
pool::ToPool::StartWork { worker, pvf: job_data.pvf.clone(), artifact_path },
)
.await?;
@@ -503,9 +489,7 @@ pub fn start(
mod tests {
use super::*;
use crate::{
error::PrepareError,
host::{LENIENT_PREPARATION_TIMEOUT, PRECHECK_PREPARATION_TIMEOUT},
prepare::PrepareStats,
error::PrepareError, host::tests::TEST_PREPARATION_TIMEOUT, prepare::PrepareStats,
};
use assert_matches::assert_matches;
use futures::{future::BoxFuture, FutureExt};
@@ -513,8 +497,8 @@ mod tests {
use std::task::Poll;
/// Creates a new PVF which artifact id can be uniquely identified by the given number.
fn pvf_with_params(descriminator: u32) -> PvfWithExecutorParams {
PvfWithExecutorParams::from_discriminator(descriminator)
fn pvf(discriminator: u32) -> PvfPrepData {
PvfPrepData::from_discriminator(discriminator)
}
async fn run_until<R>(
@@ -621,11 +605,7 @@ mod tests {
async fn properly_concludes() {
let mut test = Test::new(2, 2);
test.send_queue(ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params: pvf_with_params(1),
preparation_timeout: PRECHECK_PREPARATION_TIMEOUT,
});
test.send_queue(ToQueue::Enqueue { priority: Priority::Normal, pvf: pvf(1) });
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
let w = test.workers.insert(());
@@ -636,10 +616,7 @@ mod tests {
result: Ok(PrepareStats::default()),
});
assert_eq!(
test.poll_and_recv_from_queue().await.artifact_id,
pvf_with_params(1).as_artifact_id()
);
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
}
#[tokio::test]
@@ -647,22 +624,12 @@ mod tests {
let mut test = Test::new(2, 3);
let priority = Priority::Normal;
let preparation_timeout = PRECHECK_PREPARATION_TIMEOUT;
test.send_queue(ToQueue::Enqueue {
priority,
pvf_with_params: PvfWithExecutorParams::from_discriminator(1),
preparation_timeout,
});
test.send_queue(ToQueue::Enqueue {
priority,
pvf_with_params: PvfWithExecutorParams::from_discriminator(2),
preparation_timeout,
});
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(1) });
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(2) });
// Start a non-precheck preparation for this one.
test.send_queue(ToQueue::Enqueue {
priority,
pvf_with_params: PvfWithExecutorParams::from_discriminator(3),
preparation_timeout: LENIENT_PREPARATION_TIMEOUT,
pvf: PvfPrepData::from_discriminator_and_timeout(3, TEST_PREPARATION_TIMEOUT * 3),
});
// Receive only two spawns.
@@ -690,8 +657,7 @@ mod tests {
// Enqueue a critical job.
test.send_queue(ToQueue::Enqueue {
priority: Priority::Critical,
pvf_with_params: PvfWithExecutorParams::from_discriminator(4),
preparation_timeout,
pvf: PvfPrepData::from_discriminator(4),
});
// 2 out of 2 are working, but there is a critical job incoming. That means that spawning
@@ -702,12 +668,10 @@ mod tests {
#[tokio::test]
async fn cull_unwanted() {
let mut test = Test::new(1, 2);
let preparation_timeout = PRECHECK_PREPARATION_TIMEOUT;
test.send_queue(ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params: PvfWithExecutorParams::from_discriminator(1),
preparation_timeout,
pvf: PvfPrepData::from_discriminator(1),
});
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
let w1 = test.workers.insert(());
@@ -717,8 +681,7 @@ mod tests {
// Enqueue a critical job, which warrants spawning over the soft limit.
test.send_queue(ToQueue::Enqueue {
priority: Priority::Critical,
pvf_with_params: PvfWithExecutorParams::from_discriminator(2),
preparation_timeout,
pvf: PvfPrepData::from_discriminator(2),
});
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
@@ -742,22 +705,12 @@ mod tests {
let mut test = Test::new(2, 2);
let priority = Priority::Normal;
let preparation_timeout = PRECHECK_PREPARATION_TIMEOUT;
test.send_queue(ToQueue::Enqueue {
priority,
pvf_with_params: PvfWithExecutorParams::from_discriminator(1),
preparation_timeout,
});
test.send_queue(ToQueue::Enqueue {
priority,
pvf_with_params: PvfWithExecutorParams::from_discriminator(2),
preparation_timeout,
});
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(1) });
test.send_queue(ToQueue::Enqueue { priority, pvf: PvfPrepData::from_discriminator(2) });
// Start a non-precheck preparation for this one.
test.send_queue(ToQueue::Enqueue {
priority,
pvf_with_params: PvfWithExecutorParams::from_discriminator(3),
preparation_timeout: LENIENT_PREPARATION_TIMEOUT,
pvf: PvfPrepData::from_discriminator_and_timeout(3, TEST_PREPARATION_TIMEOUT * 3),
});
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
@@ -782,10 +735,7 @@ mod tests {
// Since there is still work, the queue requested one extra worker to spawn to handle the
// remaining enqueued work items.
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
assert_eq!(
test.poll_and_recv_from_queue().await.artifact_id,
pvf_with_params(1).as_artifact_id()
);
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
}
#[tokio::test]
@@ -794,8 +744,7 @@ mod tests {
test.send_queue(ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params: PvfWithExecutorParams::from_discriminator(1),
preparation_timeout: PRECHECK_PREPARATION_TIMEOUT,
pvf: PvfPrepData::from_discriminator(1),
});
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
@@ -819,8 +768,7 @@ mod tests {
test.send_queue(ToQueue::Enqueue {
priority: Priority::Normal,
pvf_with_params: PvfWithExecutorParams::from_discriminator(1),
preparation_timeout: PRECHECK_PREPARATION_TIMEOUT,
pvf: PvfPrepData::from_discriminator(1),
});
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
+21 -36
View File
@@ -24,7 +24,7 @@ use crate::{
error::{PrepareError, PrepareResult},
metrics::Metrics,
prepare::PrepareStats,
pvf::PvfWithExecutorParams,
pvf::PvfPrepData,
worker_common::{
bytes_to_path, cpu_time_monitor_loop, framed_recv, framed_send, path_to_bytes,
spawn_with_program_path, tmpfile_in, worker_event_loop, IdleWorker, SpawnErr, WorkerHandle,
@@ -84,10 +84,9 @@ pub enum Outcome {
pub async fn start_work(
metrics: &Metrics,
worker: IdleWorker,
pvf_with_params: PvfWithExecutorParams,
pvf: PvfPrepData,
cache_path: &Path,
artifact_path: PathBuf,
preparation_timeout: Duration,
) -> Outcome {
let IdleWorker { stream, pid } = worker;
@@ -99,9 +98,8 @@ pub async fn start_work(
);
with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move {
if let Err(err) =
send_request(&mut stream, pvf_with_params, &tmp_file, preparation_timeout).await
{
let preparation_timeout = pvf.prep_timeout;
if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await {
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
@@ -273,27 +271,22 @@ where
async fn send_request(
stream: &mut UnixStream,
pvf_with_params: PvfWithExecutorParams,
pvf: PvfPrepData,
tmp_file: &Path,
preparation_timeout: Duration,
) -> io::Result<()> {
framed_send(stream, &pvf_with_params.encode()).await?;
framed_send(stream, &pvf.encode()).await?;
framed_send(stream, path_to_bytes(tmp_file)).await?;
framed_send(stream, &preparation_timeout.encode()).await?;
Ok(())
}
async fn recv_request(
stream: &mut UnixStream,
) -> io::Result<(PvfWithExecutorParams, PathBuf, Duration)> {
let pvf_with_params = framed_recv(stream).await?;
let pvf_with_params =
PvfWithExecutorParams::decode(&mut &pvf_with_params[..]).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!("prepare pvf recv_request: failed to decode PvfWithExecutorParams: {}", e),
)
})?;
async fn recv_request(stream: &mut UnixStream) -> io::Result<(PvfPrepData, PathBuf)> {
let pvf = framed_recv(stream).await?;
let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!("prepare pvf recv_request: failed to decode PvfPrepData: {}", e),
)
})?;
let tmp_file = framed_recv(stream).await?;
let tmp_file = bytes_to_path(&tmp_file).ok_or_else(|| {
io::Error::new(
@@ -301,14 +294,7 @@ async fn recv_request(
"prepare pvf recv_request: non utf-8 artifact path".to_string(),
)
})?;
let preparation_timeout = framed_recv(stream).await?;
let preparation_timeout = Duration::decode(&mut &preparation_timeout[..]).map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
format!("prepare pvf recv_request: failed to decode duration: {:?}", e),
)
})?;
Ok((pvf_with_params, tmp_file, preparation_timeout))
Ok((pvf, tmp_file))
}
async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<()> {
@@ -360,7 +346,7 @@ pub fn worker_entrypoint(socket_path: &str) {
worker_event_loop("prepare", socket_path, |rt_handle, mut stream| async move {
loop {
let worker_pid = std::process::id();
let (pvf_with_params, dest, preparation_timeout) = recv_request(&mut stream).await?;
let (pvf, dest) = recv_request(&mut stream).await?;
gum::debug!(
target: LOG_TARGET,
%worker_pid,
@@ -368,6 +354,7 @@ pub fn worker_entrypoint(socket_path: &str) {
);
let cpu_time_start = ProcessTime::now();
let preparation_timeout = pvf.prep_timeout;
// Run the memory tracker.
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
@@ -385,7 +372,7 @@ pub fn worker_entrypoint(socket_path: &str) {
// Spawn another thread for preparation.
let prepare_fut = rt_handle
.spawn_blocking(move || {
let result = prepare_artifact(pvf_with_params);
let result = prepare_artifact(pvf);
// Get the `ru_maxrss` stat. If supported, call getrusage for the thread.
#[cfg(target_os = "linux")]
@@ -467,16 +454,14 @@ pub fn worker_entrypoint(socket_path: &str) {
});
}
fn prepare_artifact(
pvf_with_params: PvfWithExecutorParams,
) -> Result<CompiledArtifact, PrepareError> {
fn prepare_artifact(pvf: PvfPrepData) -> Result<CompiledArtifact, PrepareError> {
panic::catch_unwind(|| {
let blob = match crate::executor_intf::prevalidate(&pvf_with_params.code()) {
let blob = match crate::executor_intf::prevalidate(&pvf.code()) {
Err(err) => return Err(PrepareError::Prevalidation(format!("{:?}", err))),
Ok(b) => b,
};
match crate::executor_intf::prepare(blob, &pvf_with_params.executor_params()) {
match crate::executor_intf::prepare(blob, &pvf.executor_params()) {
Ok(compiled_artifact) => Ok(CompiledArtifact::new(compiled_artifact)),
Err(err) => Err(PrepareError::Preparation(format!("{:?}", err))),
}