mirror of
https://github.com/pezkuwichain/pezkuwi-subxt.git
synced 2026-05-31 11:01:01 +00:00
Introduce metrics into PVF validation host (#3603)
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
|
||||
use super::worker::{self, Outcome};
|
||||
use crate::{
|
||||
metrics::Metrics,
|
||||
worker_common::{IdleWorker, WorkerHandle},
|
||||
LOG_TARGET,
|
||||
};
|
||||
@@ -111,6 +112,7 @@ struct Pool {
|
||||
from_pool: mpsc::UnboundedSender<FromPool>,
|
||||
spawned: HopSlotMap<Worker, WorkerData>,
|
||||
mux: Mux,
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
/// A fatal error that warrants stopping the event loop of the pool.
|
||||
@@ -125,6 +127,7 @@ async fn run(
|
||||
mut from_pool,
|
||||
mut spawned,
|
||||
mut mux,
|
||||
metrics,
|
||||
}: Pool,
|
||||
) {
|
||||
macro_rules! break_if_fatal {
|
||||
@@ -143,6 +146,7 @@ async fn run(
|
||||
to_pool = to_pool.next() => {
|
||||
let to_pool = break_if_fatal!(to_pool.ok_or(Fatal));
|
||||
handle_to_pool(
|
||||
&metrics,
|
||||
&program_path,
|
||||
&cache_path,
|
||||
spawn_timeout,
|
||||
@@ -151,14 +155,17 @@ async fn run(
|
||||
to_pool,
|
||||
)
|
||||
}
|
||||
ev = mux.select_next_some() => break_if_fatal!(handle_mux(&mut from_pool, &mut spawned, ev)),
|
||||
ev = mux.select_next_some() => {
|
||||
break_if_fatal!(handle_mux(&metrics, &mut from_pool, &mut spawned, ev))
|
||||
}
|
||||
}
|
||||
|
||||
break_if_fatal!(purge_dead(&mut from_pool, &mut spawned).await);
|
||||
break_if_fatal!(purge_dead(&metrics, &mut from_pool, &mut spawned).await);
|
||||
}
|
||||
}
|
||||
|
||||
async fn purge_dead(
|
||||
metrics: &Metrics,
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
) -> Result<(), Fatal> {
|
||||
@@ -177,7 +184,7 @@ async fn purge_dead(
|
||||
}
|
||||
}
|
||||
for w in to_remove {
|
||||
if spawned.remove(w).is_some() {
|
||||
if attempt_retire(metrics, spawned, w) {
|
||||
reply(from_pool, FromPool::Rip(w))?;
|
||||
}
|
||||
}
|
||||
@@ -185,6 +192,7 @@ async fn purge_dead(
|
||||
}
|
||||
|
||||
fn handle_to_pool(
|
||||
metrics: &Metrics,
|
||||
program_path: &Path,
|
||||
cache_path: &Path,
|
||||
spawn_timeout: Duration,
|
||||
@@ -195,11 +203,13 @@ fn handle_to_pool(
|
||||
match to_pool {
|
||||
ToPool::Spawn => {
|
||||
tracing::debug!(target: LOG_TARGET, "spawning a new prepare worker");
|
||||
metrics.prepare_worker().on_begin_spawn();
|
||||
mux.push(spawn_worker_task(program_path.to_owned(), spawn_timeout).boxed());
|
||||
},
|
||||
ToPool::StartWork { worker, code, artifact_path, background_priority } => {
|
||||
if let Some(data) = spawned.get_mut(worker) {
|
||||
if let Some(idle) = data.idle.take() {
|
||||
let preparation_timer = metrics.time_preparation();
|
||||
mux.push(
|
||||
start_work_task(
|
||||
worker,
|
||||
@@ -208,6 +218,7 @@ fn handle_to_pool(
|
||||
cache_path.to_owned(),
|
||||
artifact_path,
|
||||
background_priority,
|
||||
preparation_timer,
|
||||
)
|
||||
.boxed(),
|
||||
);
|
||||
@@ -227,7 +238,7 @@ fn handle_to_pool(
|
||||
ToPool::Kill(worker) => {
|
||||
tracing::debug!(target: LOG_TARGET, ?worker, "killing prepare worker");
|
||||
// It may be absent if it were previously already removed by `purge_dead`.
|
||||
let _ = spawned.remove(worker);
|
||||
let _ = attempt_retire(metrics, spawned, worker);
|
||||
},
|
||||
ToPool::BumpPriority(worker) =>
|
||||
if let Some(data) = spawned.get(worker) {
|
||||
@@ -252,13 +263,14 @@ async fn spawn_worker_task(program_path: PathBuf, spawn_timeout: Duration) -> Po
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_work_task(
|
||||
async fn start_work_task<Timer>(
|
||||
worker: Worker,
|
||||
idle: IdleWorker,
|
||||
code: Arc<Vec<u8>>,
|
||||
cache_path: PathBuf,
|
||||
artifact_path: PathBuf,
|
||||
background_priority: bool,
|
||||
_preparation_timer: Option<Timer>,
|
||||
) -> PoolEvent {
|
||||
let outcome =
|
||||
worker::start_work(idle, code, &cache_path, artifact_path, background_priority).await;
|
||||
@@ -266,12 +278,15 @@ async fn start_work_task(
|
||||
}
|
||||
|
||||
fn handle_mux(
|
||||
metrics: &Metrics,
|
||||
from_pool: &mut mpsc::UnboundedSender<FromPool>,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
event: PoolEvent,
|
||||
) -> Result<(), Fatal> {
|
||||
match event {
|
||||
PoolEvent::Spawn(idle, handle) => {
|
||||
metrics.prepare_worker().on_spawned();
|
||||
|
||||
let worker = spawned.insert(WorkerData { idle: Some(idle), handle });
|
||||
|
||||
reply(from_pool, FromPool::Spawned(worker))?;
|
||||
@@ -300,14 +315,14 @@ fn handle_mux(
|
||||
Ok(())
|
||||
},
|
||||
Outcome::Unreachable => {
|
||||
if spawned.remove(worker).is_some() {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(from_pool, FromPool::Rip(worker))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
Outcome::DidntMakeIt => {
|
||||
if spawned.remove(worker).is_some() {
|
||||
if attempt_retire(metrics, spawned, worker) {
|
||||
reply(from_pool, FromPool::Concluded(worker, true))?;
|
||||
}
|
||||
|
||||
@@ -322,8 +337,28 @@ fn reply(from_pool: &mut mpsc::UnboundedSender<FromPool>, m: FromPool) -> Result
|
||||
from_pool.unbounded_send(m).map_err(|_| Fatal)
|
||||
}
|
||||
|
||||
/// Removes the given worker from the registry if it there. This will lead to dropping and hence
|
||||
/// to killing the worker process.
|
||||
///
|
||||
/// Returns `true` if the worker exists and was removed and the process was killed.
|
||||
///
|
||||
/// This function takes care about counting the retired workers metric.
|
||||
fn attempt_retire(
|
||||
metrics: &Metrics,
|
||||
spawned: &mut HopSlotMap<Worker, WorkerData>,
|
||||
worker: Worker,
|
||||
) -> bool {
|
||||
if spawned.remove(worker).is_some() {
|
||||
metrics.prepare_worker().on_retired();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Spins up the pool and returns the future that should be polled to make the pool functional.
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
program_path: PathBuf,
|
||||
cache_path: PathBuf,
|
||||
spawn_timeout: Duration,
|
||||
@@ -332,6 +367,7 @@ pub fn start(
|
||||
let (from_pool_tx, from_pool_rx) = mpsc::unbounded();
|
||||
|
||||
let run = run(Pool {
|
||||
metrics,
|
||||
program_path,
|
||||
cache_path,
|
||||
spawn_timeout,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
//! A queue that handles requests for PVF preparation.
|
||||
|
||||
use super::pool::{self, Worker};
|
||||
use crate::{artifacts::ArtifactId, Priority, Pvf, LOG_TARGET};
|
||||
use crate::{artifacts::ArtifactId, metrics::Metrics, Priority, Pvf, LOG_TARGET};
|
||||
use always_assert::{always, never};
|
||||
use async_std::path::PathBuf;
|
||||
use futures::{channel::mpsc, stream::StreamExt as _, Future, SinkExt};
|
||||
@@ -127,6 +127,8 @@ impl Unscheduled {
|
||||
}
|
||||
|
||||
struct Queue {
|
||||
metrics: Metrics,
|
||||
|
||||
to_queue_rx: mpsc::Receiver<ToQueue>,
|
||||
from_queue_tx: mpsc::UnboundedSender<FromQueue>,
|
||||
|
||||
@@ -155,6 +157,7 @@ struct Fatal;
|
||||
|
||||
impl Queue {
|
||||
fn new(
|
||||
metrics: Metrics,
|
||||
soft_capacity: usize,
|
||||
hard_capacity: usize,
|
||||
cache_path: PathBuf,
|
||||
@@ -164,6 +167,7 @@ impl Queue {
|
||||
from_pool_rx: mpsc::UnboundedReceiver<pool::FromPool>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metrics,
|
||||
to_queue_rx,
|
||||
from_queue_tx,
|
||||
to_pool_tx,
|
||||
@@ -218,6 +222,7 @@ async fn handle_enqueue(queue: &mut Queue, priority: Priority, pvf: Pvf) -> Resu
|
||||
?priority,
|
||||
"PVF is enqueued for preparation.",
|
||||
);
|
||||
queue.metrics.prepare_enqueued();
|
||||
|
||||
let artifact_id = pvf.as_artifact_id();
|
||||
if never!(
|
||||
@@ -316,6 +321,8 @@ async fn handle_worker_concluded(
|
||||
worker: Worker,
|
||||
rip: bool,
|
||||
) -> Result<(), Fatal> {
|
||||
queue.metrics.prepare_concluded();
|
||||
|
||||
macro_rules! never_none {
|
||||
($expr:expr) => {
|
||||
match $expr {
|
||||
@@ -486,6 +493,7 @@ async fn send_pool(
|
||||
|
||||
/// Spins up the queue and returns the future that should be polled to make the queue functional.
|
||||
pub fn start(
|
||||
metrics: Metrics,
|
||||
soft_capacity: usize,
|
||||
hard_capacity: usize,
|
||||
cache_path: PathBuf,
|
||||
@@ -496,6 +504,7 @@ pub fn start(
|
||||
let (from_queue_tx, from_queue_rx) = mpsc::unbounded();
|
||||
|
||||
let run = Queue::new(
|
||||
metrics,
|
||||
soft_capacity,
|
||||
hard_capacity,
|
||||
cache_path,
|
||||
@@ -565,6 +574,7 @@ mod tests {
|
||||
let workers: SlotMap<Worker, ()> = SlotMap::with_key();
|
||||
|
||||
let (to_queue_tx, from_queue_rx, run) = start(
|
||||
Metrics::default(),
|
||||
soft_capacity,
|
||||
hard_capacity,
|
||||
tempdir.path().to_owned().into(),
|
||||
|
||||
Reference in New Issue
Block a user