Replace async-std with tokio in PVF subsystem (#6419)

* Replace async-std with tokio in PVF subsystem

* Rework workers to use `select!` instead of a mutex

The improvement in code readability is more important than the thread overhead.

* Remove unnecessary `fuse`

* Add explanation for `expect()`

* Update node/core/pvf/src/worker_common.rs

Co-authored-by: Bastian Köcher <info@kchr.de>

* Update node/core/pvf/src/worker_common.rs

Co-authored-by: Bastian Köcher <info@kchr.de>

* Address some review comments

* Shutdown tokio runtime

* Run cargo fmt

* Add a small note about retries

* Fix up merge

* Rework `cpu_time_monitor_loop` to return when other thread finishes

* Add error string to PrepareError::IoErr variant

* Log when artifacts fail to prepare

* Fix `cpu_time_monitor_loop`; fix test

* Fix text

* Fix a couple of potential minor data races.

First data race was due to logging in the CPU monitor thread even if the
job (other thread) finished. It can technically finish before or after the log.

Maybe best would be to move this log to the `select!`s, where we are guaranteed
to have chosen the timed-out branch, although there would be a bit of
duplication.

Also, it was possible for this thread to complete before we executed
`finished_tx.send` in the other thread, which would trigger an error as the
receiver has already been dropped. And right now, such a spurious error from
`send` would be returned even if the job otherwise succeeded.

* Update Cargo.lock

Co-authored-by: Bastian Köcher <info@kchr.de>
This commit is contained in:
Marcin S
2023-01-10 04:51:13 -05:00
committed by GitHub
parent 166b921912
commit 44fd95661c
18 changed files with 298 additions and 472 deletions
+29 -20
View File
@@ -28,7 +28,6 @@ use crate::{
prepare, PrepareResult, Priority, Pvf, ValidationError, LOG_TARGET,
};
use always_assert::never;
use async_std::path::{Path, PathBuf};
use futures::{
channel::{mpsc, oneshot},
Future, FutureExt, SinkExt, StreamExt,
@@ -36,6 +35,7 @@ use futures::{
use polkadot_parachain::primitives::ValidationResult;
use std::{
collections::HashMap,
path::{Path, PathBuf},
time::{Duration, SystemTime},
};
@@ -171,7 +171,7 @@ pub struct Config {
impl Config {
/// Create a new instance of the configuration.
pub fn new(cache_path: std::path::PathBuf, program_path: std::path::PathBuf) -> Self {
// Do not contaminate the other parts of the codebase with the types from `async_std`.
// Do not contaminate the other parts of the codebase with the types from `tokio`.
let cache_path = PathBuf::from(cache_path);
let program_path = PathBuf::from(program_path);
@@ -723,10 +723,19 @@ async fn handle_prepare_done(
*state = match result {
Ok(cpu_time_elapsed) =>
ArtifactState::Prepared { last_time_needed: SystemTime::now(), cpu_time_elapsed },
Err(error) => ArtifactState::FailedToProcess {
last_time_failed: SystemTime::now(),
num_failures: *num_failures + 1,
error,
Err(error) => {
gum::debug!(
target: LOG_TARGET,
artifact_id = ?artifact_id,
num_failures = ?num_failures,
"Failed to process artifact: {}",
error
);
ArtifactState::FailedToProcess {
last_time_failed: SystemTime::now(),
num_failures: *num_failures + 1,
error,
}
},
};
@@ -778,7 +787,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
match sweeper_rx.next().await {
None => break,
Some(condemned) => {
let result = async_std::fs::remove_file(&condemned).await;
let result = tokio::fs::remove_file(&condemned).await;
gum::trace!(
target: LOG_TARGET,
?result,
@@ -827,7 +836,7 @@ mod tests {
const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
#[async_std::test]
#[tokio::test]
async fn pulse_test() {
let pulse = pulse_every(Duration::from_millis(100));
futures::pin_mut!(pulse);
@@ -1017,19 +1026,19 @@ mod tests {
}
}
#[async_std::test]
#[tokio::test]
async fn shutdown_on_handle_drop() {
let test = Builder::default().build();
let join_handle = async_std::task::spawn(test.run);
let join_handle = tokio::task::spawn(test.run);
// Dropping the handle will lead to conclusion of the read part and thus will make the event
// loop to stop, which in turn will resolve the join handle.
drop(test.to_host_tx);
join_handle.await;
join_handle.await.unwrap();
}
#[async_std::test]
#[tokio::test]
async fn pruning() {
let mock_now = SystemTime::now() - Duration::from_millis(1000);
@@ -1059,7 +1068,7 @@ mod tests {
test.poll_ensure_to_sweeper_is_empty().await;
}
#[async_std::test]
#[tokio::test]
async fn execute_pvf_requests() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1159,7 +1168,7 @@ mod tests {
);
}
#[async_std::test]
#[tokio::test]
async fn precheck_pvf() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1214,7 +1223,7 @@ mod tests {
}
}
#[async_std::test]
#[tokio::test]
async fn test_prepare_done() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1301,7 +1310,7 @@ mod tests {
// Test that multiple prechecking requests do not trigger preparation retries if the first one
// failed.
#[async_std::test]
#[tokio::test]
async fn test_precheck_prepare_retry() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1344,7 +1353,7 @@ mod tests {
// Test that multiple execution requests trigger preparation retries if the first one failed due
// to a potentially non-reproducible error.
#[async_std::test]
#[tokio::test]
async fn test_execute_prepare_retry() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1414,7 +1423,7 @@ mod tests {
// Test that multiple execution requests don't trigger preparation retries if the first one
// failed due to a reproducible error (e.g. Prevalidation).
#[async_std::test]
#[tokio::test]
async fn test_execute_prepare_no_retry() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1480,7 +1489,7 @@ mod tests {
}
// Test that multiple heads-up requests trigger preparation retries if the first one failed.
#[async_std::test]
#[tokio::test]
async fn test_heads_up_prepare_retry() {
let mut test = Builder::default().build();
let mut host = test.host_handle();
@@ -1521,7 +1530,7 @@ mod tests {
);
}
#[async_std::test]
#[tokio::test]
async fn cancellation() {
let mut test = Builder::default().build();
let mut host = test.host_handle();