Use CPU clock timeout for PVF jobs (#6282)

* Put in skeleton logic for CPU-time-preparation

Still needed:
- Flesh out logic
- Refactor some spots
- Tests

* Continue filling in logic for prepare worker CPU time changes

* Fix compiler errors

* Update lenience factor

* Fix some clippy lints for PVF module

* Fix compilation errors

* Address some review comments

* Add logging

* Add another log

* Address some review comments; change Mutex to AtomicBool

* Refactor handling response bytes

* Add CPU clock timeout logic for execute jobs

* Properly handle AtomicBool flag

* Use `Ordering::Relaxed`

* Refactor thread coordination logic

* Fix bug

* Add some timing information to execute tests

* Add section about the mitigation to the IG

* minor: Change more `Ordering`s to `Relaxed`

* candidate-validation: Fix build errors
This commit is contained in:
Marcin S
2022-11-30 07:17:31 -05:00
committed by GitHub
parent c61860e9be
commit 28a4e90912
17 changed files with 536 additions and 170 deletions
+2 -2
View File
@@ -23,7 +23,7 @@ use polkadot_parachain::primitives::{
};
#[async_std::test]
async fn execute_good_on_parent() {
async fn execute_good_block_on_parent() {
let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
let block_data = BlockData { state: 0, add: 512 };
@@ -89,7 +89,7 @@ async fn execute_good_chain_on_parent() {
}
#[async_std::test]
async fn execute_bad_on_parent() {
async fn execute_bad_block_on_parent() {
let parent_head = HeadData { number: 0, parent_hash: [0; 32], post_state: hash_state(0) };
let block_data = BlockData {
+20 -4
View File
@@ -101,6 +101,7 @@ async fn terminates_on_timeout() {
#[async_std::test]
async fn parallel_execution() {
// Run some jobs that do not complete, thus timing out.
let host = TestHost::new();
let execute_pvf_future_1 = host.validate_candidate(
halt::wasm_binary_unwrap(),
@@ -124,11 +125,14 @@ async fn parallel_execution() {
let start = std::time::Instant::now();
let (_, _) = futures::join!(execute_pvf_future_1, execute_pvf_future_2);
// total time should be < 2 x EXECUTION_TIMEOUT_SEC
const EXECUTION_TIMEOUT_SEC: u64 = 3;
// Total time should be < 2 x TEST_EXECUTION_TIMEOUT (two workers run in parallel).
let duration = std::time::Instant::now().duration_since(start);
let max_duration = 2 * TEST_EXECUTION_TIMEOUT;
assert!(
std::time::Instant::now().duration_since(start) <
std::time::Duration::from_secs(EXECUTION_TIMEOUT_SEC * 2)
duration < max_duration,
"Expected duration {}ms to be less than {}ms",
duration.as_millis(),
max_duration.as_millis()
);
}
@@ -141,6 +145,7 @@ async fn execute_queue_doesnt_stall_if_workers_died() {
// Here we spawn 8 validation jobs for the `halt` PVF and share those between 5 workers. The
// first five jobs should timeout and the workers killed. For the next 3 jobs a new batch of
// workers should be spun up.
let start = std::time::Instant::now();
futures::future::join_all((0u8..=8).map(|_| {
host.validate_candidate(
halt::wasm_binary_unwrap(),
@@ -153,4 +158,15 @@ async fn execute_queue_doesnt_stall_if_workers_died() {
)
}))
.await;
// Total time should be >= 2 x TEST_EXECUTION_TIMEOUT (two separate sets of workers that should
// both timeout).
let duration = std::time::Instant::now().duration_since(start);
let max_duration = 2 * TEST_EXECUTION_TIMEOUT;
assert!(
duration >= max_duration,
"Expected duration {}ms to be greater than or equal to {}ms",
duration.as_millis(),
max_duration.as_millis()
);
}