[Fix] flaky node tests (#1534)

* [Fix] flaky node tests

* fix tokio ver

* fix errors/warnings

* fix more errors

* some fixe

* fix err

* fix arg order for the base path to work properly

* comments

* remove extra dependencies and waiting for blocks

* fix errors

* bump sleep
This commit is contained in:
Roman Useinov
2022-08-10 19:27:09 +02:00
committed by GitHub
parent fa9fabc9db
commit f85a56b0f7
7 changed files with 348 additions and 337 deletions
+180 -178
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -92,6 +92,8 @@ substrate-build-script-utils = { git = "https://github.com/paritytech/substrate"
assert_cmd = "2.0" assert_cmd = "2.0"
nix = "0.24" nix = "0.24"
tempfile = "3.3.0" tempfile = "3.3.0"
tokio = { version = "1.19.2", features = ["macros", "time", "parking_lot"] }
wait-timeout = "0.2"
[features] [features]
default = [] default = []
+108 -14
View File
@@ -14,25 +14,119 @@
// You should have received a copy of the GNU General Public License // You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>. // along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use std::{ #![cfg(unix)]
process::{Child, ExitStatus},
thread,
time::Duration,
};
/// Wait for the given `child` the given ammount of `secs`. use assert_cmd::cargo::cargo_bin;
use nix::{
sys::signal::{kill, Signal},
unistd::Pid,
};
use std::{
io::{BufRead, BufReader, Read},
ops::{Deref, DerefMut},
path::Path,
process::{self, Child, Command, ExitStatus},
};
use tokio::time::{sleep, Duration};
/// Wait for the given `child` the given number of `secs`.
/// ///
/// Returns the `Some(exit status)` or `None` if the process did not finish in the given time. /// Returns the `Some(exit status)` or `None` if the process did not finish in the given time.
pub fn wait_for(child: &mut Child, secs: usize) -> Option<ExitStatus> { pub fn wait_for(child: &mut Child, secs: u64) -> Result<ExitStatus, ()> {
for _ in 0..secs { let result = wait_timeout::ChildExt::wait_timeout(child, Duration::from_secs(5.min(secs)))
match child.try_wait().unwrap() { .map_err(|_| ())?;
Some(status) => return Some(status), if let Some(exit_status) = result {
None => thread::sleep(Duration::from_secs(1)), Ok(exit_status)
} else {
if secs > 5 {
eprintln!("Child process taking over 5 seconds to exit gracefully");
let result = wait_timeout::ChildExt::wait_timeout(child, Duration::from_secs(secs - 5))
.map_err(|_| ())?;
if let Some(exit_status) = result {
return Ok(exit_status)
} }
} }
eprintln!("Took to long to exit. Killing..."); eprintln!("Took too long to exit (> {} seconds). Killing...", secs);
let _ = child.kill(); let _ = child.kill();
child.wait().unwrap(); child.wait().unwrap();
Err(())
None }
}
/// Run the node for a while (till the RPC is up + 30 secs)
/// TODO: needs to be revisited to hit the RPC
pub async fn run_node_for_a_while(base_path: &Path, args: &[&str], signal: Signal) {
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.stdout(process::Stdio::piped())
.stderr(process::Stdio::piped())
.arg("-d")
.arg(base_path)
.args(args)
.spawn()
.unwrap();
let stderr = cmd.stderr.take().unwrap();
let mut child = KillChildOnDrop(cmd);
// TODO: use this instead of the timeout going forward?
let (_, _) = find_ws_url_from_output(stderr);
// TODO: Revisit this to find a better approach for collators
sleep(Duration::from_secs(120)).await;
assert!(child.try_wait().unwrap().is_none(), "the process should still be running");
// Stop the process
kill(Pid::from_raw(child.id().try_into().unwrap()), signal).unwrap();
assert!(wait_for(&mut child, 40).map(|x| x.success()).unwrap());
}
pub struct KillChildOnDrop(pub Child);
impl Drop for KillChildOnDrop {
fn drop(&mut self) {
let _ = self.0.kill();
}
}
impl Deref for KillChildOnDrop {
type Target = Child;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for KillChildOnDrop {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
/// Read the WS address from the output.
///
/// This is hack to get the actual bound sockaddr because
/// substrate assigns a random port if the specified port was already bound.
pub fn find_ws_url_from_output(read: impl Read + Send) -> (String, String) {
let mut data = String::new();
let ws_url = BufReader::new(read)
.lines()
.find_map(|line| {
let line =
line.expect("failed to obtain next line from stdout for WS address discovery");
data.push_str(&line);
// does the line contain our port (we expect this specific output from substrate).
let sock_addr = match line.split_once("Running JSON-RPC WS server: addr=") {
None => return None,
Some((_, after)) => after.split_once(",").unwrap().0,
};
Some(format!("ws://{}", sock_addr))
})
.expect("We should get a WebSocket address");
(ws_url, data)
} }
@@ -14,50 +14,26 @@
// You should have received a copy of the GNU General Public License // You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>. // along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin; use tempfile::tempdir;
use std::{convert::TryInto, fs, process::Command, thread, time::Duration};
mod common; mod common;
#[test] #[tokio::test]
#[cfg(unix)] #[cfg(unix)]
#[ignore] #[ignore]
fn polkadot_argument_parsing() { async fn polkadot_argument_parsing() {
use nix::{ use nix::sys::signal::Signal::{SIGINT, SIGTERM};
sys::signal::{ let base_dir = tempdir().expect("could not create a temp dir");
kill,
Signal::{self, SIGINT, SIGTERM},
},
unistd::Pid,
};
fn run_command_and_kill(signal: Signal) { let args = &[
let _ = fs::remove_dir_all("polkadot_argument_parsing");
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&[
"-d",
"polkadot_argument_parsing",
"--", "--",
"--dev", "--dev",
"--bootnodes", "--bootnodes",
"/ip4/127.0.0.1/tcp/30333/p2p/Qmbx43psh7LVkrYTRXisUpzCubbgYojkejzAgj5mteDnxy", "/ip4/127.0.0.1/tcp/30333/p2p/Qmbx43psh7LVkrYTRXisUpzCubbgYojkejzAgj5mteDnxy",
"--bootnodes", "--bootnodes",
"/ip4/127.0.0.1/tcp/50500/p2p/Qma6SpS7tzfCrhtgEVKR9Uhjmuv55ovC3kY6y6rPBxpWde", "/ip4/127.0.0.1/tcp/50500/p2p/Qma6SpS7tzfCrhtgEVKR9Uhjmuv55ovC3kY6y6rPBxpWde",
]) ];
.spawn()
.unwrap();
thread::sleep(Duration::from_secs(20)); common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running"); common::run_node_for_a_while(base_dir.path(), args, SIGTERM).await;
kill(Pid::from_raw(cmd.id().try_into().unwrap()), signal).unwrap();
assert_eq!(
common::wait_for(&mut cmd, 30).map(|x| x.success()),
Some(true),
"the process must exit gracefully after signal {}",
signal,
);
}
run_command_and_kill(SIGINT);
run_command_and_kill(SIGTERM);
} }
@@ -14,41 +14,20 @@
// You should have received a copy of the GNU General Public License // You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>. // along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin; use tempfile::tempdir;
use std::{convert::TryInto, fs, process::Command, thread, time::Duration};
mod common; mod common;
#[test] #[tokio::test]
#[cfg(unix)] #[cfg(unix)]
#[ignore] #[ignore]
fn interrupt_polkadot_mdns_issue_test() { async fn interrupt_polkadot_mdns_issue_test() {
use nix::{ use nix::sys::signal::Signal::{SIGINT, SIGTERM};
sys::signal::{
kill,
Signal::{self, SIGINT, SIGTERM},
},
unistd::Pid,
};
fn run_command_and_kill(signal: Signal) { let base_dir = tempdir().expect("could not create a temp dir");
let _ = fs::remove_dir_all("interrupt_polkadot_mdns_issue_test");
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&["-d", "interrupt_polkadot_mdns_issue_test", "--", "--dev"])
.spawn()
.unwrap();
thread::sleep(Duration::from_secs(20)); let args = &["--", "--dev"];
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
kill(Pid::from_raw(cmd.id().try_into().unwrap()), signal).unwrap();
assert_eq!(
common::wait_for(&mut cmd, 30).map(|x| x.success()),
Some(true),
"the process must exit gracefully after signal {}",
signal,
);
}
run_command_and_kill(SIGINT); common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
run_command_and_kill(SIGTERM); common::run_node_for_a_while(base_dir.path(), args, SIGTERM).await;
} }
@@ -15,59 +15,38 @@
// along with Substrate. If not, see <http://www.gnu.org/licenses/>. // along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin; use assert_cmd::cargo::cargo_bin;
use std::{convert::TryInto, process::Command, thread, time::Duration}; use nix::sys::signal::SIGINT;
use std::process::Command;
use tempfile::tempdir;
mod common; mod common;
#[test] #[tokio::test]
#[cfg(unix)] #[cfg(unix)]
#[ignore] #[ignore]
fn purge_chain_works() { async fn purge_chain_works() {
fn run_node_and_stop() -> tempfile::TempDir {
use nix::{
sys::signal::{kill, Signal::SIGINT},
unistd::Pid,
};
let base_path = tempfile::tempdir().unwrap();
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&["-d"])
.arg(base_path.path())
.args(&["--", "--dev"])
.spawn()
.unwrap();
// Let it produce some blocks.
thread::sleep(Duration::from_secs(30));
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
// Stop the process
kill(Pid::from_raw(cmd.id().try_into().unwrap()), SIGINT).unwrap();
assert!(common::wait_for(&mut cmd, 30).map(|x| x.success()).unwrap_or_default());
base_path
}
// Check that both databases are deleted // Check that both databases are deleted
{
let base_path = run_node_and_stop();
assert!(base_path.path().join("chains/local_testnet/db/full").exists()); let base_dir = tempdir().expect("could not create a temp dir");
assert!(base_path.path().join("polkadot/chains/dev/db/full").exists());
let args = &["--", "--dev"];
common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
assert!(base_dir.path().join("chains/local_testnet/db/full").exists());
assert!(base_dir.path().join("polkadot/chains/dev/db/full").exists());
let status = Command::new(cargo_bin("polkadot-parachain")) let status = Command::new(cargo_bin("polkadot-parachain"))
.args(&["purge-chain", "-d"]) .args(&["purge-chain", "-d"])
.arg(base_path.path()) .arg(base_dir.path())
.arg("-y") .arg("-y")
.status() .status()
.unwrap(); .unwrap();
assert!(status.success()); assert!(status.success());
// Make sure that the `parachain_local_testnet` chain folder exists, but the `db` is deleted. // Make sure that the `parachain_local_testnet` chain folder exists, but the `db` is deleted.
assert!(base_path.path().join("chains/local_testnet").exists()); assert!(base_dir.path().join("chains/local_testnet").exists());
assert!(!base_path.path().join("chains/local_testnet/db/full").exists()); assert!(!base_dir.path().join("chains/local_testnet/db/full").exists());
// assert!(base_path.path().join("polkadot/chains/dev").exists()); // assert!(base_path.path().join("polkadot/chains/dev").exists());
// assert!(!base_path.path().join("polkadot/chains/dev/db").exists()); // assert!(!base_path.path().join("polkadot/chains/dev/db").exists());
}
} }
@@ -14,41 +14,20 @@
// You should have received a copy of the GNU General Public License // You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>. // along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin; use tempfile::tempdir;
use std::{convert::TryInto, fs, process::Command, thread, time::Duration};
mod common; mod common;
#[test] #[tokio::test]
#[cfg(unix)] #[cfg(unix)]
#[ignore] #[ignore]
fn running_the_node_works_and_can_be_interrupted() { async fn running_the_node_works_and_can_be_interrupted() {
use nix::{ use nix::sys::signal::Signal::{SIGINT, SIGTERM};
sys::signal::{
kill,
Signal::{self, SIGINT, SIGTERM},
},
unistd::Pid,
};
fn run_command_and_kill(signal: Signal) { let base_dir = tempdir().expect("could not create a temp dir");
let _ = fs::remove_dir_all("interrupt_test");
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&["-d", "interrupt_test", "--", "--dev"])
.spawn()
.unwrap();
thread::sleep(Duration::from_secs(30)); let args = &["--", "--dev"];
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
kill(Pid::from_raw(cmd.id().try_into().unwrap()), signal).unwrap();
assert_eq!(
common::wait_for(&mut cmd, 30).map(|x| x.success()),
Some(true),
"the process must exit gracefully after signal {}",
signal,
);
}
run_command_and_kill(SIGINT); common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
run_command_and_kill(SIGTERM); common::run_node_for_a_while(base_dir.path(), args, SIGTERM).await;
} }