Introduce metrics into PVF validation host (#3603)

This commit is contained in:
Sergei Shulepov
2021-08-20 11:50:47 +02:00
committed by GitHub
parent 8792d0e407
commit ad0e42537d
13 changed files with 325 additions and 24 deletions
+43 -7
View File
@@ -16,6 +16,7 @@
use super::worker::{self, Outcome};
use crate::{
metrics::Metrics,
worker_common::{IdleWorker, WorkerHandle},
LOG_TARGET,
};
@@ -111,6 +112,7 @@ struct Pool {
from_pool: mpsc::UnboundedSender<FromPool>,
spawned: HopSlotMap<Worker, WorkerData>,
mux: Mux,
metrics: Metrics,
}
/// A fatal error that warrants stopping the event loop of the pool.
@@ -125,6 +127,7 @@ async fn run(
mut from_pool,
mut spawned,
mut mux,
metrics,
}: Pool,
) {
macro_rules! break_if_fatal {
@@ -143,6 +146,7 @@ async fn run(
to_pool = to_pool.next() => {
let to_pool = break_if_fatal!(to_pool.ok_or(Fatal));
handle_to_pool(
&metrics,
&program_path,
&cache_path,
spawn_timeout,
@@ -151,14 +155,17 @@ async fn run(
to_pool,
)
}
ev = mux.select_next_some() => break_if_fatal!(handle_mux(&mut from_pool, &mut spawned, ev)),
ev = mux.select_next_some() => {
break_if_fatal!(handle_mux(&metrics, &mut from_pool, &mut spawned, ev))
}
}
break_if_fatal!(purge_dead(&mut from_pool, &mut spawned).await);
break_if_fatal!(purge_dead(&metrics, &mut from_pool, &mut spawned).await);
}
}
async fn purge_dead(
metrics: &Metrics,
from_pool: &mut mpsc::UnboundedSender<FromPool>,
spawned: &mut HopSlotMap<Worker, WorkerData>,
) -> Result<(), Fatal> {
@@ -177,7 +184,7 @@ async fn purge_dead(
}
}
for w in to_remove {
if spawned.remove(w).is_some() {
if attempt_retire(metrics, spawned, w) {
reply(from_pool, FromPool::Rip(w))?;
}
}
@@ -185,6 +192,7 @@ async fn purge_dead(
}
fn handle_to_pool(
metrics: &Metrics,
program_path: &Path,
cache_path: &Path,
spawn_timeout: Duration,
@@ -195,11 +203,13 @@ fn handle_to_pool(
match to_pool {
ToPool::Spawn => {
tracing::debug!(target: LOG_TARGET, "spawning a new prepare worker");
metrics.prepare_worker().on_begin_spawn();
mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed());
},
ToPool::StartWork { worker, code, artifact_path, background_priority } => {
if let Some(data) = spawned.get_mut(worker) {
if let Some(idle) = data.idle.take() {
let preparation_timer = metrics.time_preparation();
mux.push(
start_work_task(
worker,
@@ -208,6 +218,7 @@ fn handle_to_pool(
cache_path.to_owned(),
artifact_path,
background_priority,
preparation_timer,
)
.boxed(),
);
@@ -227,7 +238,7 @@ fn handle_to_pool(
ToPool::Kill(worker) => {
tracing::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
// It may be absent if it were previously already removed by `purge_dead`.
let _ = spawned.remove(worker);
let _ = attempt_retire(metrics, spawned, worker);
},
ToPool::BumpPriority(worker) =>
if let Some(data) = spawned.get(worker) {
@@ -252,13 +263,14 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Po
}
}
async fn start_work_task(
async fn start_work_task<Timer>(
worker: Worker,
idle: IdleWorker,
code: Arc<Vec<u8>>,
cache_path: PathBuf,
artifact_path: PathBuf,
background_priority: bool,
_preparation_timer: Option<Timer>,
) -> PoolEvent {
let outcome =
worker::start_work(idle, code, &cache_path, artifact_path, background_priority).await;
@@ -266,12 +278,15 @@ async fn start_work_task(
}
fn handle_mux(
metrics: &Metrics,
from_pool: &mut mpsc::UnboundedSender<FromPool>,
spawned: &mut HopSlotMap<Worker, WorkerData>,
event: PoolEvent,
) -> Result<(), Fatal> {
match event {
PoolEvent::Spawn(idle, handle) => {
metrics.prepare_worker().on_spawned();
let worker = spawned.insert(WorkerData { idle: Some(idle), handle });
reply(from_pool, FromPool::Spawned(worker))?;
@@ -300,14 +315,14 @@ fn handle_mux(
Ok(())
},
Outcome::Unreachable => {
if spawned.remove(worker).is_some() {
if attempt_retire(metrics, spawned, worker) {
reply(from_pool, FromPool::Rip(worker))?;
}
Ok(())
},
Outcome::DidntMakeIt => {
if spawned.remove(worker).is_some() {
if attempt_retire(metrics, spawned, worker) {
reply(from_pool, FromPool::Concluded(worker, true))?;
}
@@ -322,8 +337,28 @@ fn reply(from_pool: &mut mpsc::UnboundedSender<FromPool>, m: FromPool) -> Result
from_pool.unbounded_send(m).map_err(|_| Fatal)
}
/// Removes the given worker from the registry if it there. This will lead to dropping and hence
/// to killing the worker process.
///
/// Returns `true` if the worker exists and was removed and the process was killed.
///
/// This function takes care about counting the retired workers metric.
fn attempt_retire(
metrics: &Metrics,
spawned: &mut HopSlotMap<Worker, WorkerData>,
worker: Worker,
) -> bool {
if spawned.remove(worker).is_some() {
metrics.prepare_worker().on_retired();
true
} else {
false
}
}
/// Spins up the pool and returns the future that should be polled to make the pool functional.
pub fn start(
metrics: Metrics,
program_path: PathBuf,
cache_path: PathBuf,
spawn_timeout: Duration,
@@ -332,6 +367,7 @@ pub fn start(
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
let run = run(Pool {
metrics,
program_path,
cache_path,
spawn_timeout,
+11 -1
View File
@@ -17,7 +17,7 @@
//! A queue that handles requests for PVF preparation.
use super::pool::{self, Worker};
use crate::{artifacts::ArtifactId, Priority, Pvf, LOG_TARGET};
use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, Pvf, LOG_TARGET};
use always_assert::{always, never};
use async_std::path::PathBuf;
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
@@ -127,6 +127,8 @@ impl Unscheduled {
}
struct Queue {
metrics: Metrics,
to_queue_rx: mpsc::Receiver<ToQueue>,
from_queue_tx: mpsc::UnboundedSender<FromQueue>,
@@ -155,6 +157,7 @@ struct Fatal;
impl Queue {
fn new(
metrics: Metrics,
soft_capacity: usize,
hard_capacity: usize,
cache_path: PathBuf,
@@ -164,6 +167,7 @@ impl Queue {
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
) -> Self {
Self {
metrics,
to_queue_rx,
from_queue_tx,
to_pool_tx,
@@ -218,6 +222,7 @@ async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Resu
?priority,
"PVF is enqueued for preparation.",
);
queue.metrics.prepare_enqueued();
let artifact_id = pvf.as_artifact_id();
if never!(
@@ -316,6 +321,8 @@ async fn handle_worker_concluded(
worker: Worker,
rip: bool,
) -> Result<(), Fatal> {
queue.metrics.prepare_concluded();
macro_rules! never_none {
($expr:expr) => {
match $expr {
@@ -486,6 +493,7 @@ async fn send_pool(
/// Spins up the queue and returns the future that should be polled to make the queue functional.
pub fn start(
metrics: Metrics,
soft_capacity: usize,
hard_capacity: usize,
cache_path: PathBuf,
@@ -496,6 +504,7 @@ pub fn start(
let (from_queue_tx, from_queue_rx) = mpsc::unbounded();
let run = Queue::new(
metrics,
soft_capacity,
hard_capacity,
cache_path,
@@ -565,6 +574,7 @@ mod tests {
let workers: SlotMap<Worker, ()> = SlotMap::with_key();
let (to_queue_tx, from_queue_rx, run) = start(
Metrics::default(),
soft_capacity,
hard_capacity,
tempdir.path().to_owned().into(),