[CI] Add bootnode checking CI jobs (#6889)

* Add check_bootnode script and github action

* fix mktemp for linux machines

* Update check_bootnodes.sh

show logs to see what's going wrong

* fix ephemeral ports and fetch polkadot

* fix check-bootnodes.yml

* increase node spawn holdoff

* disable fail-fast

* refactor, separate out check_bootnodes and make it posix-compliant

* add new job for detecting new bootnodes

* fix check-bootnodes.yml

* only check all bootnodes on release

* Add test bad bootnode

REVERT ME before merging PR. Should cause the test to fail, then
when we remove it, we should succeed. Sadly doesn't account for a
new successful bootnode, should ask if we have one we can use for
testing.

* fix paths

* fix paths and git... hopefully

* this better work...

* fix

* test

* last test

* Revert "Add test bad bootnode"

This reverts commit 540dd9754a1f8e2d3fef33f7f5a033b8c2aa4dcb.

* Update check_bootnodes.sh

* optimisations

Begin polling the RPC node right after spawning, allowing us to break
early on detecting peers

* increase holdoff to 5 seconds

* dont delete chainspec til we kill the node

* Update check-bootnodes.yml

* Remove checking bootnodes on pushing of this branch

---------

Co-authored-by: parity-processbot <>
This commit is contained in:
Martin Pugh
2023-03-21 12:36:47 +00:00
committed by GitHub
parent 3264cb7b64
commit 270540cf46
5 changed files with 226 additions and 0 deletions
+71
View File
@@ -0,0 +1,71 @@
#!/usr/bin/env bash
# In this script, we check each bootnode for a given chainspec file and ensure they are contactable.
# We do this by removing every bootnode from the chainspec with the exception of the one
# we want to check. Then we spin up a node using this new chainspec, wait a little while
# and then check our local node's RPC endpoint for the number of peers. If the node hasn't
# been able to contact any other nodes, we can reason that the bootnode we used is not well-connected
# or is otherwise uncontactable.
# shellcheck source=scripts/ci/common/lib.sh
source "$(dirname "${0}")/../common/lib.sh"
CHAINSPEC_FILE="$1"
RUNTIME=$(basename "$CHAINSPEC_FILE" | cut -d '.' -f 1)
trap cleanup EXIT INT TERM
cleanup(){
echo "[+] Script interrupted or ended. Cleaning up..."
# Kill all the polkadot processes
killall polkadot > /dev/null 2>&1
exit $1
}
# count the number of bootnodes
BOOTNODES=$( jq -r '.bootNodes | length' "$CHAINSPEC_FILE" )
# Make a temporary dir for chainspec files
# Store an array of the bad bootnodes
BAD_BOOTNODES=()
GOOD_BOOTNODES=()
PIDS=()
echo "[+] Checking $BOOTNODES bootnodes for $RUNTIME"
for i in $(seq 0 $((BOOTNODES-1))); do
BOOTNODE=$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )
# Check each bootnode in parallel
check_bootnode "$BOOTNODE" "$CHAINSPEC_FILE" &
PIDS+=($!)
# Hold off 5 seconds between attempting to spawn nodes to stop the machine from getting overloaded
sleep 5
done
RESPS=()
# Wait for all the nodes to finish
for pid in "${PIDS[@]}"; do
wait "$pid"
RESPS+=($?)
done
echo
# For any bootnodes that failed, add them to the bad bootnodes array
for i in "${!RESPS[@]}"; do
if [ "${RESPS[$i]}" -ne 0 ]; then
BAD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
fi
done
# For any bootnodes that succeeded, add them to the good bootnodes array
for i in "${!RESPS[@]}"; do
if [ "${RESPS[$i]}" -eq 0 ]; then
GOOD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
fi
done
# If we've got any uncontactable bootnodes for this runtime, print them
if [ ${#BAD_BOOTNODES[@]} -gt 0 ]; then
echo "[!] Bad bootnodes found for $RUNTIME:"
for i in "${BAD_BOOTNODES[@]}"; do
echo " $i"
done
cleanup 1
else
echo "[+] All bootnodes for $RUNTIME are contactable"
cleanup 0
fi