mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-31 09:51:02 +00:00
Use CPU clock timeout for PVF jobs (#6282)
* Put in skeleton logic for CPU-time-preparation Still needed: - Flesh out logic - Refactor some spots - Tests * Continue filling in logic for prepare worker CPU time changes * Fix compiler errors * Update lenience factor * Fix some clippy lints for PVF module * Fix compilation errors * Address some review comments * Add logging * Add another log * Address some review comments; change Mutex to AtomicBool * Refactor handling response bytes * Add CPU clock timeout logic for execute jobs * Properly handle AtomicBool flag * Use `Ordering::Relaxed` * Refactor thread coordination logic * Fix bug * Add some timing information to execute tests * Add section about the mitigation to the IG * minor: Change more `Ordering`s to `Relaxed` * candidate-validation: Fix build errors
This commit is contained in:
@@ -364,16 +364,14 @@ async fn handle_worker_concluded(
|
||||
// the pool up to the hard cap.
|
||||
spawn_extra_worker(queue, false).await?;
|
||||
}
|
||||
} else if queue.limits.should_cull(queue.workers.len() + queue.spawn_inflight) {
|
||||
// We no longer need services of this worker. Kill it.
|
||||
queue.workers.remove(worker);
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::Kill(worker)).await?;
|
||||
} else {
|
||||
if queue.limits.should_cull(queue.workers.len() + queue.spawn_inflight) {
|
||||
// We no longer need services of this worker. Kill it.
|
||||
queue.workers.remove(worker);
|
||||
send_pool(&mut queue.to_pool_tx, pool::ToPool::Kill(worker)).await?;
|
||||
} else {
|
||||
// see if there are more work available and schedule it.
|
||||
if let Some(job) = queue.unscheduled.next() {
|
||||
assign(queue, worker, job).await?;
|
||||
}
|
||||
// see if there are more work available and schedule it.
|
||||
if let Some(job) = queue.unscheduled.next() {
|
||||
assign(queue, worker, job).await?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -618,7 +616,11 @@ mod tests {
|
||||
|
||||
let w = test.workers.insert(());
|
||||
test.send_from_pool(pool::FromPool::Spawned(w));
|
||||
test.send_from_pool(pool::FromPool::Concluded { worker: w, rip: false, result: Ok(()) });
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w,
|
||||
rip: false,
|
||||
result: Ok(Duration::default()),
|
||||
});
|
||||
|
||||
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
|
||||
}
|
||||
@@ -647,7 +649,11 @@ mod tests {
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: false, result: Ok(()) });
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: false,
|
||||
result: Ok(Duration::default()),
|
||||
});
|
||||
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
@@ -693,7 +699,11 @@ mod tests {
|
||||
// That's a bit silly in this context, but in production there will be an entire pool up
|
||||
// to the `soft_capacity` of workers and it doesn't matter which one to cull. Either way,
|
||||
// we just check that edge case of an edge case works.
|
||||
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: false, result: Ok(()) });
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: false,
|
||||
result: Ok(Duration::default()),
|
||||
});
|
||||
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1));
|
||||
}
|
||||
|
||||
@@ -719,7 +729,11 @@ mod tests {
|
||||
assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. });
|
||||
|
||||
// Conclude worker 1 and rip it.
|
||||
test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: true, result: Ok(()) });
|
||||
test.send_from_pool(pool::FromPool::Concluded {
|
||||
worker: w1,
|
||||
rip: true,
|
||||
result: Ok(Duration::default()),
|
||||
});
|
||||
|
||||
// Since there is still work, the queue requested one extra worker to spawn to handle the
|
||||
// remaining enqueued work items.
|
||||
|
||||
@@ -18,8 +18,9 @@ use crate::{
|
||||
artifacts::CompiledArtifact,
|
||||
error::{PrepareError, PrepareResult},
|
||||
worker_common::{
|
||||
bytes_to_path, framed_recv, framed_send, path_to_bytes, spawn_with_program_path,
|
||||
tmpfile_in, worker_event_loop, IdleWorker, SpawnErr, WorkerHandle,
|
||||
bytes_to_path, cpu_time_monitor_loop, framed_recv, framed_send, path_to_bytes,
|
||||
spawn_with_program_path, tmpfile_in, worker_event_loop, IdleWorker, JobKind, SpawnErr,
|
||||
WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
|
||||
},
|
||||
LOG_TARGET,
|
||||
};
|
||||
@@ -27,10 +28,20 @@ use async_std::{
|
||||
io,
|
||||
os::unix::net::UnixStream,
|
||||
path::{Path, PathBuf},
|
||||
task,
|
||||
};
|
||||
use cpu_time::ProcessTime;
|
||||
use parity_scale_codec::{Decode, Encode};
|
||||
use sp_core::hexdisplay::HexDisplay;
|
||||
use std::{panic, sync::Arc, time::Duration};
|
||||
use std::{
|
||||
panic,
|
||||
sync::{
|
||||
atomic::{AtomicBool, Ordering},
|
||||
Arc,
|
||||
},
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
/// Spawns a new worker with the given program path that acts as the worker and the spawn timeout.
|
||||
///
|
||||
@@ -58,6 +69,13 @@ pub enum Outcome {
|
||||
DidNotMakeIt,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Selected {
|
||||
Done(PrepareResult),
|
||||
IoErr,
|
||||
Deadline,
|
||||
}
|
||||
|
||||
/// Given the idle token of a worker and parameters of work, communicates with the worker and
|
||||
/// returns the outcome.
|
||||
pub async fn start_work(
|
||||
@@ -77,7 +95,7 @@ pub async fn start_work(
|
||||
);
|
||||
|
||||
with_tmp_file(pid, cache_path, |tmp_file| async move {
|
||||
if let Err(err) = send_request(&mut stream, code, &tmp_file).await {
|
||||
if let Err(err) = send_request(&mut stream, code, &tmp_file, preparation_timeout).await {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
@@ -88,78 +106,52 @@ pub async fn start_work(
|
||||
}
|
||||
|
||||
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
|
||||
// worker may get killed, or something along these lines.
|
||||
// worker may get killed, or something along these lines. In that case we should propagate
|
||||
// the error to the pool.
|
||||
//
|
||||
// In that case we should propagate the error to the pool.
|
||||
// We use a generous timeout here. This is in addition to the one in the child process, in
|
||||
// case the child stalls. We have a wall clock timeout here in the host, but a CPU timeout
|
||||
// in the child. We want to use CPU time because it varies less than wall clock time under
|
||||
// load, but the CPU resources of the child can only be measured from the parent after the
|
||||
// child process terminates.
|
||||
let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR;
|
||||
let result = async_std::future::timeout(timeout, framed_recv(&mut stream)).await;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Selected {
|
||||
Done(PrepareResult),
|
||||
IoErr,
|
||||
Deadline,
|
||||
}
|
||||
|
||||
let selected =
|
||||
match async_std::future::timeout(preparation_timeout, framed_recv(&mut stream)).await {
|
||||
Ok(Ok(response_bytes)) => {
|
||||
// Received bytes from worker within the time limit.
|
||||
// By convention we expect encoded `PrepareResult`.
|
||||
if let Ok(result) = PrepareResult::decode(&mut response_bytes.as_slice()) {
|
||||
if result.is_ok() {
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"promoting WIP artifact {} to {}",
|
||||
tmp_file.display(),
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
async_std::fs::rename(&tmp_file, &artifact_path)
|
||||
.await
|
||||
.map(|_| Selected::Done(result))
|
||||
.unwrap_or_else(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to rename the artifact from {} to {}: {:?}",
|
||||
tmp_file.display(),
|
||||
artifact_path.display(),
|
||||
err,
|
||||
);
|
||||
Selected::IoErr
|
||||
})
|
||||
} else {
|
||||
Selected::Done(result)
|
||||
}
|
||||
} else {
|
||||
// We received invalid bytes from the worker.
|
||||
let bound_bytes = &response_bytes[..response_bytes.len().min(4)];
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"received unexpected response from the prepare worker: {}",
|
||||
HexDisplay::from(&bound_bytes),
|
||||
);
|
||||
Selected::IoErr
|
||||
}
|
||||
},
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
err,
|
||||
);
|
||||
Selected::IoErr
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out.
|
||||
Selected::Deadline
|
||||
},
|
||||
};
|
||||
let selected = match result {
|
||||
// Received bytes from worker within the time limit.
|
||||
Ok(Ok(response_bytes)) =>
|
||||
handle_response_bytes(
|
||||
response_bytes,
|
||||
pid,
|
||||
tmp_file,
|
||||
artifact_path,
|
||||
preparation_timeout,
|
||||
)
|
||||
.await,
|
||||
Ok(Err(err)) => {
|
||||
// Communication error within the time limit.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to recv a prepare response: {:?}",
|
||||
err,
|
||||
);
|
||||
Selected::IoErr
|
||||
},
|
||||
Err(_) => {
|
||||
// Timed out here on the host.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"did not recv a prepare response within the time limit",
|
||||
);
|
||||
Selected::Deadline
|
||||
},
|
||||
};
|
||||
|
||||
match selected {
|
||||
// Timed out on the child. This should already be logged by the child.
|
||||
Selected::Done(Err(PrepareError::TimedOut)) => Outcome::TimedOut,
|
||||
Selected::Done(result) =>
|
||||
Outcome::Concluded { worker: IdleWorker { stream, pid }, result },
|
||||
Selected::Deadline => Outcome::TimedOut,
|
||||
@@ -169,6 +161,76 @@ pub async fn start_work(
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles the case where we successfully received response bytes on the host from the child.
|
||||
async fn handle_response_bytes(
|
||||
response_bytes: Vec<u8>,
|
||||
pid: u32,
|
||||
tmp_file: PathBuf,
|
||||
artifact_path: PathBuf,
|
||||
preparation_timeout: Duration,
|
||||
) -> Selected {
|
||||
// By convention we expect encoded `PrepareResult`.
|
||||
let result = match PrepareResult::decode(&mut response_bytes.as_slice()) {
|
||||
Ok(result) => result,
|
||||
Err(_) => {
|
||||
// We received invalid bytes from the worker.
|
||||
let bound_bytes = &response_bytes[..response_bytes.len().min(4)];
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"received unexpected response from the prepare worker: {}",
|
||||
HexDisplay::from(&bound_bytes),
|
||||
);
|
||||
return Selected::IoErr
|
||||
},
|
||||
};
|
||||
let cpu_time_elapsed = match result {
|
||||
Ok(result) => result,
|
||||
Err(_) => return Selected::Done(result),
|
||||
};
|
||||
|
||||
if cpu_time_elapsed > preparation_timeout {
|
||||
// The job didn't complete within the timeout.
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"prepare job took {}ms cpu time, exceeded preparation timeout {}ms. Clearing WIP artifact {}",
|
||||
cpu_time_elapsed.as_millis(),
|
||||
preparation_timeout.as_millis(),
|
||||
tmp_file.display(),
|
||||
);
|
||||
|
||||
// Return a timeout error.
|
||||
//
|
||||
// NOTE: The artifact exists, but is located in a temporary file which
|
||||
// will be cleared by `with_tmp_file`.
|
||||
return Selected::Deadline
|
||||
}
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"promoting WIP artifact {} to {}",
|
||||
tmp_file.display(),
|
||||
artifact_path.display(),
|
||||
);
|
||||
|
||||
async_std::fs::rename(&tmp_file, &artifact_path)
|
||||
.await
|
||||
.map(|_| Selected::Done(result))
|
||||
.unwrap_or_else(|err| {
|
||||
gum::warn!(
|
||||
target: LOG_TARGET,
|
||||
worker_pid = %pid,
|
||||
"failed to rename the artifact from {} to {}: {:?}",
|
||||
tmp_file.display(),
|
||||
artifact_path.display(),
|
||||
err,
|
||||
);
|
||||
Selected::IoErr
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a temporary file for an artifact at the given cache path and execute the given
|
||||
/// future/closure passing the file path in.
|
||||
///
|
||||
@@ -218,13 +280,15 @@ async fn send_request(
|
||||
stream: &mut UnixStream,
|
||||
code: Arc<Vec<u8>>,
|
||||
tmp_file: &Path,
|
||||
preparation_timeout: Duration,
|
||||
) -> io::Result<()> {
|
||||
framed_send(stream, &code).await?;
|
||||
framed_send(stream, path_to_bytes(tmp_file)).await?;
|
||||
framed_send(stream, &preparation_timeout.encode()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<(Vec<u8>, PathBuf)> {
|
||||
async fn recv_request(stream: &mut UnixStream) -> io::Result<(Vec<u8>, PathBuf, Duration)> {
|
||||
let code = framed_recv(stream).await?;
|
||||
let tmp_file = framed_recv(stream).await?;
|
||||
let tmp_file = bytes_to_path(&tmp_file).ok_or_else(|| {
|
||||
@@ -233,7 +297,14 @@ async fn recv_request(stream: &mut UnixStream) -> io::Result<(Vec<u8>, PathBuf)>
|
||||
"prepare pvf recv_request: non utf-8 artifact path".to_string(),
|
||||
)
|
||||
})?;
|
||||
Ok((code, tmp_file))
|
||||
let preparation_timeout = framed_recv(stream).await?;
|
||||
let preparation_timeout = Duration::decode(&mut &preparation_timeout[..]).map_err(|_| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"prepare pvf recv_request: failed to decode duration".to_string(),
|
||||
)
|
||||
})?;
|
||||
Ok((code, tmp_file, preparation_timeout))
|
||||
}
|
||||
|
||||
/// The entrypoint that the spawned prepare worker should start with. The `socket_path` specifies
|
||||
@@ -241,7 +312,7 @@ async fn recv_request(stream: &mut UnixStream) -> io::Result<(Vec<u8>, PathBuf)>
|
||||
pub fn worker_entrypoint(socket_path: &str) {
|
||||
worker_event_loop("prepare", socket_path, |mut stream| async move {
|
||||
loop {
|
||||
let (code, dest) = recv_request(&mut stream).await?;
|
||||
let (code, dest, preparation_timeout) = recv_request(&mut stream).await?;
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
@@ -249,18 +320,54 @@ pub fn worker_entrypoint(socket_path: &str) {
|
||||
"worker: preparing artifact",
|
||||
);
|
||||
|
||||
let result = match prepare_artifact(&code) {
|
||||
// Create a lock flag. We set it when either thread finishes.
|
||||
let lock = Arc::new(AtomicBool::new(false));
|
||||
let cpu_time_start = ProcessTime::now();
|
||||
|
||||
// Spawn a new thread that runs the CPU time monitor. Continuously wakes up from
|
||||
// sleeping and then either sleeps for the remaining CPU time, or kills the process if
|
||||
// we exceed the CPU timeout.
|
||||
let (stream_2, cpu_time_start_2, preparation_timeout_2, lock_2) =
|
||||
(stream.clone(), cpu_time_start, preparation_timeout, lock.clone());
|
||||
let handle =
|
||||
thread::Builder::new().name("CPU time monitor".into()).spawn(move || {
|
||||
task::block_on(async {
|
||||
cpu_time_monitor_loop(
|
||||
JobKind::Prepare,
|
||||
stream_2,
|
||||
cpu_time_start_2,
|
||||
preparation_timeout_2,
|
||||
lock_2,
|
||||
)
|
||||
.await;
|
||||
})
|
||||
})?;
|
||||
|
||||
// Prepares the artifact in a separate thread.
|
||||
let result = match prepare_artifact(&code).await {
|
||||
Err(err) => {
|
||||
// Serialized error will be written into the socket.
|
||||
Err(err)
|
||||
},
|
||||
Ok(compiled_artifact) => {
|
||||
let cpu_time_elapsed = cpu_time_start.elapsed();
|
||||
|
||||
let lock_result =
|
||||
lock.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed);
|
||||
if lock_result.is_err() {
|
||||
// The other thread is still sending an error response over the socket. Wait on it and
|
||||
// return.
|
||||
let _ = handle.join();
|
||||
// Monitor thread detected timeout and likely already terminated the
|
||||
// process, nothing to do.
|
||||
continue
|
||||
}
|
||||
|
||||
// Write the serialized artifact into a temp file.
|
||||
// PVF host only keeps artifacts statuses in its memory,
|
||||
// successfully compiled code gets stored on the disk (and
|
||||
// consequently deserialized by execute-workers). The prepare
|
||||
// worker is only required to send an empty `Ok` to the pool
|
||||
// to indicate the success.
|
||||
//
|
||||
// PVF host only keeps artifacts statuses in its memory, successfully compiled code gets stored
|
||||
// on the disk (and consequently deserialized by execute-workers). The prepare worker is only
|
||||
// required to send `Ok` to the pool to indicate the success.
|
||||
|
||||
gum::debug!(
|
||||
target: LOG_TARGET,
|
||||
@@ -270,7 +377,7 @@ pub fn worker_entrypoint(socket_path: &str) {
|
||||
);
|
||||
async_std::fs::write(&dest, &compiled_artifact).await?;
|
||||
|
||||
Ok(())
|
||||
Ok(cpu_time_elapsed)
|
||||
},
|
||||
};
|
||||
|
||||
@@ -279,7 +386,7 @@ pub fn worker_entrypoint(socket_path: &str) {
|
||||
});
|
||||
}
|
||||
|
||||
fn prepare_artifact(code: &[u8]) -> Result<CompiledArtifact, PrepareError> {
|
||||
async fn prepare_artifact(code: &[u8]) -> Result<CompiledArtifact, PrepareError> {
|
||||
panic::catch_unwind(|| {
|
||||
let blob = match crate::executor_intf::prevalidate(code) {
|
||||
Err(err) => return Err(PrepareError::Prevalidation(format!("{:?}", err))),
|
||||
|
||||
Reference in New Issue
Block a user