[Fix] flaky node tests (#1534)

* [Fix] flaky node tests

* fix tokio ver

* fix errors/warnings

* fix more errors

* some fixe

* fix err

* fix arg order for the base path to work properly

* comments

* remove extra dependencies and waiting for blocks

* fix errors

* bump sleep
This commit is contained in:
Roman Useinov
2022-08-10 19:27:09 +02:00
committed by GitHub
parent 530a0aaf0c
commit b0f2816b81
7 changed files with 348 additions and 337 deletions
Generated
+180 -178
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -92,6 +92,8 @@ substrate-build-script-utils = { git = "https://github.com/paritytech/substrate"
assert_cmd = "2.0"
nix = "0.24"
tempfile = "3.3.0"
tokio = { version = "1.19.2", features = ["macros", "time", "parking_lot"] }
wait-timeout = "0.2"
[features]
default = []
+108 -14
View File
@@ -14,25 +14,119 @@
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use std::{
process::{Child, ExitStatus},
thread,
time::Duration,
};
#![cfg(unix)]
/// Wait for the given `child` the given ammount of `secs`.
use assert_cmd::cargo::cargo_bin;
use nix::{
sys::signal::{kill, Signal},
unistd::Pid,
};
use std::{
io::{BufRead, BufReader, Read},
ops::{Deref, DerefMut},
path::Path,
process::{self, Child, Command, ExitStatus},
};
use tokio::time::{sleep, Duration};
/// Wait for the given `child` the given number of `secs`.
///
/// Returns the `Some(exit status)` or `None` if the process did not finish in the given time.
pub fn wait_for(child: &mut Child, secs: usize) -> Option<ExitStatus> {
for _ in 0..secs {
match child.try_wait().unwrap() {
Some(status) => return Some(status),
None => thread::sleep(Duration::from_secs(1)),
pub fn wait_for(child: &mut Child, secs: u64) -> Result<ExitStatus, ()> {
let result = wait_timeout::ChildExt::wait_timeout(child, Duration::from_secs(5.min(secs)))
.map_err(|_| ())?;
if let Some(exit_status) = result {
Ok(exit_status)
} else {
if secs > 5 {
eprintln!("Child process taking over 5 seconds to exit gracefully");
let result = wait_timeout::ChildExt::wait_timeout(child, Duration::from_secs(secs - 5))
.map_err(|_| ())?;
if let Some(exit_status) = result {
return Ok(exit_status)
}
}
eprintln!("Took to long to exit. Killing...");
eprintln!("Took too long to exit (> {} seconds). Killing...", secs);
let _ = child.kill();
child.wait().unwrap();
None
Err(())
}
}
/// Run the node for a while (till the RPC is up + 30 secs)
/// TODO: needs to be revisited to hit the RPC
pub async fn run_node_for_a_while(base_path: &Path, args: &[&str], signal: Signal) {
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.stdout(process::Stdio::piped())
.stderr(process::Stdio::piped())
.arg("-d")
.arg(base_path)
.args(args)
.spawn()
.unwrap();
let stderr = cmd.stderr.take().unwrap();
let mut child = KillChildOnDrop(cmd);
// TODO: use this instead of the timeout going forward?
let (_, _) = find_ws_url_from_output(stderr);
// TODO: Revisit this to find a better approach for collators
sleep(Duration::from_secs(120)).await;
assert!(child.try_wait().unwrap().is_none(), "the process should still be running");
// Stop the process
kill(Pid::from_raw(child.id().try_into().unwrap()), signal).unwrap();
assert!(wait_for(&mut child, 40).map(|x| x.success()).unwrap());
}
pub struct KillChildOnDrop(pub Child);
impl Drop for KillChildOnDrop {
fn drop(&mut self) {
let _ = self.0.kill();
}
}
impl Deref for KillChildOnDrop {
type Target = Child;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for KillChildOnDrop {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
/// Read the WS address from the output.
///
/// This is hack to get the actual bound sockaddr because
/// substrate assigns a random port if the specified port was already bound.
pub fn find_ws_url_from_output(read: impl Read + Send) -> (String, String) {
let mut data = String::new();
let ws_url = BufReader::new(read)
.lines()
.find_map(|line| {
let line =
line.expect("failed to obtain next line from stdout for WS address discovery");
data.push_str(&line);
// does the line contain our port (we expect this specific output from substrate).
let sock_addr = match line.split_once("Running JSON-RPC WS server: addr=") {
None => return None,
Some((_, after)) => after.split_once(",").unwrap().0,
};
Some(format!("ws://{}", sock_addr))
})
.expect("We should get a WebSocket address");
(ws_url, data)
}
@@ -14,50 +14,26 @@
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin;
use std::{convert::TryInto, fs, process::Command, thread, time::Duration};
use tempfile::tempdir;
mod common;
#[test]
#[tokio::test]
#[cfg(unix)]
#[ignore]
fn polkadot_argument_parsing() {
use nix::{
sys::signal::{
kill,
Signal::{self, SIGINT, SIGTERM},
},
unistd::Pid,
};
async fn polkadot_argument_parsing() {
use nix::sys::signal::Signal::{SIGINT, SIGTERM};
let base_dir = tempdir().expect("could not create a temp dir");
fn run_command_and_kill(signal: Signal) {
let _ = fs::remove_dir_all("polkadot_argument_parsing");
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&[
"-d",
"polkadot_argument_parsing",
let args = &[
"--",
"--dev",
"--bootnodes",
"/ip4/127.0.0.1/tcp/30333/p2p/Qmbx43psh7LVkrYTRXisUpzCubbgYojkejzAgj5mteDnxy",
"--bootnodes",
"/ip4/127.0.0.1/tcp/50500/p2p/Qma6SpS7tzfCrhtgEVKR9Uhjmuv55ovC3kY6y6rPBxpWde",
])
.spawn()
.unwrap();
];
thread::sleep(Duration::from_secs(20));
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
kill(Pid::from_raw(cmd.id().try_into().unwrap()), signal).unwrap();
assert_eq!(
common::wait_for(&mut cmd, 30).map(|x| x.success()),
Some(true),
"the process must exit gracefully after signal {}",
signal,
);
}
run_command_and_kill(SIGINT);
run_command_and_kill(SIGTERM);
common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
common::run_node_for_a_while(base_dir.path(), args, SIGTERM).await;
}
@@ -14,41 +14,20 @@
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin;
use std::{convert::TryInto, fs, process::Command, thread, time::Duration};
use tempfile::tempdir;
mod common;
#[test]
#[tokio::test]
#[cfg(unix)]
#[ignore]
fn interrupt_polkadot_mdns_issue_test() {
use nix::{
sys::signal::{
kill,
Signal::{self, SIGINT, SIGTERM},
},
unistd::Pid,
};
async fn interrupt_polkadot_mdns_issue_test() {
use nix::sys::signal::Signal::{SIGINT, SIGTERM};
fn run_command_and_kill(signal: Signal) {
let _ = fs::remove_dir_all("interrupt_polkadot_mdns_issue_test");
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&["-d", "interrupt_polkadot_mdns_issue_test", "--", "--dev"])
.spawn()
.unwrap();
let base_dir = tempdir().expect("could not create a temp dir");
thread::sleep(Duration::from_secs(20));
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
kill(Pid::from_raw(cmd.id().try_into().unwrap()), signal).unwrap();
assert_eq!(
common::wait_for(&mut cmd, 30).map(|x| x.success()),
Some(true),
"the process must exit gracefully after signal {}",
signal,
);
}
run_command_and_kill(SIGINT);
run_command_and_kill(SIGTERM);
let args = &["--", "--dev"];
common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
common::run_node_for_a_while(base_dir.path(), args, SIGTERM).await;
}
+16 -37
View File
@@ -15,59 +15,38 @@
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin;
use std::{convert::TryInto, process::Command, thread, time::Duration};
use nix::sys::signal::SIGINT;
use std::process::Command;
use tempfile::tempdir;
mod common;
#[test]
#[tokio::test]
#[cfg(unix)]
#[ignore]
fn purge_chain_works() {
fn run_node_and_stop() -> tempfile::TempDir {
use nix::{
sys::signal::{kill, Signal::SIGINT},
unistd::Pid,
};
let base_path = tempfile::tempdir().unwrap();
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&["-d"])
.arg(base_path.path())
.args(&["--", "--dev"])
.spawn()
.unwrap();
// Let it produce some blocks.
thread::sleep(Duration::from_secs(30));
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
// Stop the process
kill(Pid::from_raw(cmd.id().try_into().unwrap()), SIGINT).unwrap();
assert!(common::wait_for(&mut cmd, 30).map(|x| x.success()).unwrap_or_default());
base_path
}
async fn purge_chain_works() {
// Check that both databases are deleted
{
let base_path = run_node_and_stop();
assert!(base_path.path().join("chains/local_testnet/db/full").exists());
assert!(base_path.path().join("polkadot/chains/dev/db/full").exists());
let base_dir = tempdir().expect("could not create a temp dir");
let args = &["--", "--dev"];
common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
assert!(base_dir.path().join("chains/local_testnet/db/full").exists());
assert!(base_dir.path().join("polkadot/chains/dev/db/full").exists());
let status = Command::new(cargo_bin("polkadot-parachain"))
.args(&["purge-chain", "-d"])
.arg(base_path.path())
.arg(base_dir.path())
.arg("-y")
.status()
.unwrap();
assert!(status.success());
// Make sure that the `parachain_local_testnet` chain folder exists, but the `db` is deleted.
assert!(base_path.path().join("chains/local_testnet").exists());
assert!(!base_path.path().join("chains/local_testnet/db/full").exists());
assert!(base_dir.path().join("chains/local_testnet").exists());
assert!(!base_dir.path().join("chains/local_testnet/db/full").exists());
// assert!(base_path.path().join("polkadot/chains/dev").exists());
// assert!(!base_path.path().join("polkadot/chains/dev/db").exists());
}
}
@@ -14,41 +14,20 @@
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use assert_cmd::cargo::cargo_bin;
use std::{convert::TryInto, fs, process::Command, thread, time::Duration};
use tempfile::tempdir;
mod common;
#[test]
#[tokio::test]
#[cfg(unix)]
#[ignore]
fn running_the_node_works_and_can_be_interrupted() {
use nix::{
sys::signal::{
kill,
Signal::{self, SIGINT, SIGTERM},
},
unistd::Pid,
};
async fn running_the_node_works_and_can_be_interrupted() {
use nix::sys::signal::Signal::{SIGINT, SIGTERM};
fn run_command_and_kill(signal: Signal) {
let _ = fs::remove_dir_all("interrupt_test");
let mut cmd = Command::new(cargo_bin("polkadot-parachain"))
.args(&["-d", "interrupt_test", "--", "--dev"])
.spawn()
.unwrap();
let base_dir = tempdir().expect("could not create a temp dir");
thread::sleep(Duration::from_secs(30));
assert!(cmd.try_wait().unwrap().is_none(), "the process should still be running");
kill(Pid::from_raw(cmd.id().try_into().unwrap()), signal).unwrap();
assert_eq!(
common::wait_for(&mut cmd, 30).map(|x| x.success()),
Some(true),
"the process must exit gracefully after signal {}",
signal,
);
}
run_command_and_kill(SIGINT);
run_command_and_kill(SIGTERM);
let args = &["--", "--dev"];
common::run_node_for_a_while(base_dir.path(), args, SIGINT).await;
common::run_node_for_a_while(base_dir.path(), args, SIGTERM).await;
}