PVF: Refactor workers into separate crates, remove host dependency (#7253)

* PVF: Refactor workers into separate crates, remove host dependency

* Fix compile error

* Remove some leftover code

* Fix compile errors

* Update Cargo.lock

* Remove worker main.rs files

I accidentally copied these from the other PR. This PR isn't intended to
introduce standalone workers yet.

* Address review comments

* cargo fmt

* Update a couple of comments

* Update log targets
This commit is contained in:
Marcin S
2023-05-25 16:29:13 -04:00
committed by GitHub
parent 4146c26f3c
commit 8782dde411
50 changed files with 777 additions and 519 deletions
-33
View File
@@ -28,36 +28,3 @@ mod worker_intf;
pub use pool::start as start_pool;
pub use queue::{start as start_queue, FromQueue, ToQueue};
use parity_scale_codec::{Decode, Encode};
/// Preparation statistics, including the CPU time and memory taken.
#[derive(Debug, Clone, Default, Encode, Decode)]
pub struct PrepareStats {
/// The CPU time that elapsed for the preparation job.
pub cpu_time_elapsed: std::time::Duration,
/// The observed memory statistics for the preparation job.
pub memory_stats: MemoryStats,
}
/// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if
/// supported by the OS, `ru_maxrss`.
#[derive(Clone, Debug, Default, Encode, Decode)]
pub struct MemoryStats {
/// Memory stats from `tikv_jemalloc_ctl`.
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
pub memory_tracker_stats: Option<MemoryAllocationStats>,
/// `ru_maxrss` from `getrusage`. `None` if an error occurred.
#[cfg(target_os = "linux")]
pub max_rss: Option<i64>,
}
/// Statistics of collected memory metrics.
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
#[derive(Clone, Debug, Default, Encode, Decode)]
pub struct MemoryAllocationStats {
/// Total resident memory, in bytes.
pub resident: u64,
/// Total allocated memory, in bytes.
pub allocated: u64,
}
+5 -3
View File
@@ -16,16 +16,18 @@
use super::worker_intf::{self, Outcome};
use crate::{
error::{PrepareError, PrepareResult},
metrics::Metrics,
pvf::PvfPrepData,
worker_common::{IdleWorker, WorkerHandle},
worker_intf::{IdleWorker, WorkerHandle},
LOG_TARGET,
};
use always_assert::never;
use futures::{
channel::mpsc, future::BoxFuture, stream::FuturesUnordered, Future, FutureExt, StreamExt,
};
use polkadot_node_core_pvf_common::{
error::{PrepareError, PrepareResult},
pvf::PvfPrepData,
};
use slotmap::HopSlotMap;
use std::{
fmt,
+15 -11
View File
@@ -17,11 +17,10 @@
//! A queue that handles requests for PVF preparation.
use super::pool::{self, Worker};
use crate::{
artifacts::ArtifactId, metrics::Metrics, PrepareResult, Priority, PvfPrepData, LOG_TARGET,
};
use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, LOG_TARGET};
use always_assert::{always, never};
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
use polkadot_node_core_pvf_common::{error::PrepareResult, pvf::PvfPrepData};
use std::{
collections::{HashMap, VecDeque},
path::PathBuf,
@@ -231,7 +230,7 @@ async fn handle_enqueue(
);
queue.metrics.prepare_enqueued();
let artifact_id = pvf.as_artifact_id();
let artifact_id = ArtifactId::from_pvf_prep_data(&pvf);
if never!(
queue.artifact_id_to_job.contains_key(&artifact_id),
"second Enqueue sent for a known artifact"
@@ -339,7 +338,7 @@ async fn handle_worker_concluded(
// this can't be None;
// qed.
let job_data = never_none!(queue.jobs.remove(job));
let artifact_id = job_data.pvf.as_artifact_id();
let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf);
queue.artifact_id_to_job.remove(&artifact_id);
@@ -425,7 +424,7 @@ async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fat
async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> {
let job_data = &mut queue.jobs[job];
let artifact_id = job_data.pvf.as_artifact_id();
let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf);
let artifact_path = artifact_id.path(&queue.cache_path);
job_data.worker = Some(worker);
@@ -488,11 +487,10 @@ pub fn start(
#[cfg(test)]
mod tests {
use super::*;
use crate::{
error::PrepareError, host::tests::TEST_PREPARATION_TIMEOUT, prepare::PrepareStats,
};
use crate::host::tests::TEST_PREPARATION_TIMEOUT;
use assert_matches::assert_matches;
use futures::{future::BoxFuture, FutureExt};
use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats};
use slotmap::SlotMap;
use std::task::Poll;
@@ -616,7 +614,10 @@ mod tests {
result: Ok(PrepareStats::default()),
});
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
assert_eq!(
test.poll_and_recv_from_queue().await.artifact_id,
ArtifactId::from_pvf_prep_data(&pvf(1))
);
}
#[tokio::test]
@@ -735,7 +736,10 @@ mod tests {
// Since there is still work, the queue requested one extra worker to spawn to handle the
// remaining enqueued work items.
assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Spawn);
assert_eq!(test.poll_and_recv_from_queue().await.artifact_id, pvf(1).as_artifact_id());
assert_eq!(
test.poll_and_recv_from_queue().await.artifact_id,
ArtifactId::from_pvf_prep_data(&pvf(1))
);
}
#[tokio::test]
@@ -17,17 +17,20 @@
//! Host interface to the prepare worker.
use crate::{
error::{PrepareError, PrepareResult},
metrics::Metrics,
prepare::PrepareStats,
pvf::PvfPrepData,
worker_common::{
framed_recv, framed_send, path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker,
SpawnErr, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
worker_intf::{
path_to_bytes, spawn_with_program_path, tmpfile_in, IdleWorker, SpawnErr, WorkerHandle,
JOB_TIMEOUT_WALL_CLOCK_FACTOR,
},
LOG_TARGET,
};
use parity_scale_codec::{Decode, Encode};
use polkadot_node_core_pvf_common::{
error::{PrepareError, PrepareResult},
framed_recv, framed_send,
prepare::PrepareStats,
pvf::PvfPrepData,
};
use sp_core::hexdisplay::HexDisplay;
use std::{