Try to fix flaky temp-base-path-work test (#13505)

* Try to fix flaky `temp-base-path-work` test

The test is most of the time failing when checking if the database path was deleted. The assumption
is that it takes a little bit more time by the OS to actually clean up the temp path under high
load. The pr tries to fix this by checking multiple times if the path was deleted. Besides that it
also ensures that the tests that require the benchmark feature don't fail when compiled without the feature.

* ".git/.scripts/commands/fmt/fmt.sh"

* Capture signals earlier

* Rewrite tests to let them having one big timeout

* Remove unneeded dep

* Update bin/node/cli/tests/common.rs

Co-authored-by: Koute <koute@users.noreply.github.com>

* Review feedback

* Update bin/node/cli/tests/common.rs

Co-authored-by: Anton <anton.kalyaev@gmail.com>

---------

Co-authored-by: command-bot <>
Co-authored-by: Koute <koute@users.noreply.github.com>
Co-authored-by: Anton <anton.kalyaev@gmail.com>
This commit is contained in:
Bastian Köcher
2023-03-16 12:24:20 +01:00
committed by GitHub
parent 3708b156d9
commit 3e73b7557e
8 changed files with 274 additions and 266 deletions
+62 -74
View File
@@ -20,54 +20,26 @@
use assert_cmd::cargo::cargo_bin;
use nix::{
sys::signal::{kill, Signal::SIGINT},
sys::signal::{kill, Signal, Signal::SIGINT},
unistd::Pid,
};
use node_primitives::{Hash, Header};
use regex::Regex;
use std::{
io::{BufRead, BufReader, Read},
ops::{Deref, DerefMut},
path::Path,
process::{self, Child, Command, ExitStatus},
path::{Path, PathBuf},
process::{self, Child, Command},
time::Duration,
};
use tokio::time::timeout;
/// Wait for the given `child` the given number of `secs`.
///
/// Returns the `Some(exit status)` or `None` if the process did not finish in the given time.
pub fn wait_for(child: &mut Child, secs: u64) -> Result<ExitStatus, ()> {
let result = wait_timeout::ChildExt::wait_timeout(child, Duration::from_secs(5.min(secs)))
.map_err(|_| ())?;
if let Some(exit_status) = result {
Ok(exit_status)
} else {
if secs > 5 {
eprintln!("Child process taking over 5 seconds to exit gracefully");
let result = wait_timeout::ChildExt::wait_timeout(child, Duration::from_secs(secs - 5))
.map_err(|_| ())?;
if let Some(exit_status) = result {
return Ok(exit_status)
}
}
eprintln!("Took too long to exit (> {} seconds). Killing...", secs);
let _ = child.kill();
child.wait().unwrap();
Err(())
}
}
/// Wait for at least n blocks to be finalized within a specified time.
pub async fn wait_n_finalized_blocks(
n: usize,
timeout_secs: u64,
url: &str,
) -> Result<(), tokio::time::error::Elapsed> {
timeout(Duration::from_secs(timeout_secs), wait_n_finalized_blocks_from(n, url)).await
/// Run the given `future` and panic if the `timeout` is hit.
pub async fn run_with_timeout(timeout: Duration, future: impl futures::Future<Output = ()>) {
tokio::time::timeout(timeout, future).await.expect("Hit timeout");
}
/// Wait for at least n blocks to be finalized from a specified node
pub async fn wait_n_finalized_blocks_from(n: usize, url: &str) {
pub async fn wait_n_finalized_blocks(n: usize, url: &str) {
use substrate_rpc_client::{ws_client, ChainApi};
let mut built_blocks = std::collections::HashSet::new();
@@ -87,47 +59,55 @@ pub async fn wait_n_finalized_blocks_from(n: usize, url: &str) {
/// Run the node for a while (3 blocks)
pub async fn run_node_for_a_while(base_path: &Path, args: &[&str]) {
let mut cmd = Command::new(cargo_bin("substrate"))
.stdout(process::Stdio::piped())
.stderr(process::Stdio::piped())
.args(args)
.arg("-d")
.arg(base_path)
.spawn()
.unwrap();
run_with_timeout(Duration::from_secs(60 * 10), async move {
let mut cmd = Command::new(cargo_bin("substrate"))
.stdout(process::Stdio::piped())
.stderr(process::Stdio::piped())
.args(args)
.arg("-d")
.arg(base_path)
.spawn()
.unwrap();
let stderr = cmd.stderr.take().unwrap();
let stderr = cmd.stderr.take().unwrap();
let mut child = KillChildOnDrop(cmd);
let mut child = KillChildOnDrop(cmd);
let (ws_url, _) = find_ws_url_from_output(stderr);
let ws_url = extract_info_from_output(stderr).0.ws_url;
// Let it produce some blocks.
let _ = wait_n_finalized_blocks(3, 30, &ws_url).await;
// Let it produce some blocks.
wait_n_finalized_blocks(3, &ws_url).await;
assert!(child.try_wait().unwrap().is_none(), "the process should still be running");
child.assert_still_running();
// Stop the process
kill(Pid::from_raw(child.id().try_into().unwrap()), SIGINT).unwrap();
assert!(wait_for(&mut child, 40).map(|x| x.success()).unwrap());
}
/// Run the node asserting that it fails with an error
pub fn run_node_assert_fail(base_path: &Path, args: &[&str]) {
let mut cmd = Command::new(cargo_bin("substrate"));
let mut child = KillChildOnDrop(cmd.args(args).arg("-d").arg(base_path).spawn().unwrap());
// Let it produce some blocks, but it should die within 10 seconds.
assert_ne!(
wait_timeout::ChildExt::wait_timeout(&mut *child, Duration::from_secs(10)).unwrap(),
None,
"the process should not be running anymore"
);
// Stop the process
child.stop();
})
.await
}
pub struct KillChildOnDrop(pub Child);
impl KillChildOnDrop {
/// Stop the child and wait until it is finished.
///
/// Asserts if the exit status isn't success.
pub fn stop(&mut self) {
self.stop_with_signal(SIGINT);
}
/// Same as [`Self::stop`] but takes the `signal` that is sent to stop the child.
pub fn stop_with_signal(&mut self, signal: Signal) {
kill(Pid::from_raw(self.id().try_into().unwrap()), signal).unwrap();
assert!(self.wait().unwrap().success());
}
/// Asserts that the child is still running.
pub fn assert_still_running(&mut self) {
assert!(self.try_wait().unwrap().is_none(), "the process should still be running");
}
}
impl Drop for KillChildOnDrop {
fn drop(&mut self) {
let _ = self.0.kill();
@@ -148,18 +128,22 @@ impl DerefMut for KillChildOnDrop {
}
}
/// Read the WS address from the output.
/// Information extracted from a running node.
pub struct NodeInfo {
pub ws_url: String,
pub db_path: PathBuf,
}
/// Extract [`NodeInfo`] from a running node by parsing its output.
///
/// This is hack to get the actual binded sockaddr because
/// substrate assigns a random port if the specified port was already binded.
pub fn find_ws_url_from_output(read: impl Read + Send) -> (String, String) {
/// Returns the [`NodeInfo`] and all the read data.
pub fn extract_info_from_output(read: impl Read + Send) -> (NodeInfo, String) {
let mut data = String::new();
let ws_url = BufReader::new(read)
.lines()
.find_map(|line| {
let line =
line.expect("failed to obtain next line from stdout for WS address discovery");
let line = line.expect("failed to obtain next line while extracting node info");
data.push_str(&line);
data.push_str("\n");
@@ -176,5 +160,9 @@ pub fn find_ws_url_from_output(read: impl Read + Send) -> (String, String) {
panic!("We should get a WebSocket address")
});
(ws_url, data)
// Database path is printed before the ws url!
let re = Regex::new(r"Database: .+ at (\S+)").unwrap();
let db_path = PathBuf::from(re.captures(data.as_str()).unwrap().get(1).unwrap().as_str());
(NodeInfo { ws_url, db_path }, data)
}