[CI] Add bootnode checking CI jobs (#6889)

* Add check_bootnode script and github action * fix mktemp for linux machines * Update check_bootnodes.sh show logs to see what's going wrong * fix ephemeral ports and fetch polkadot * fix check-bootnodes.yml * increase node spawn holdoff * disable fail-fast * refactor, separate out check_bootnodes and make it posix-compliant * add new job for detecting new bootnodes * fix check-bootnodes.yml * only check all bootnodes on release * Add test bad bootnode REVERT ME before merging PR. Should cause the test to fail, then when we remove it, we should succeed. Sadly doesn't account for a new successful bootnode, should ask if we have one we can use for testing. * fix paths * fix paths and git... hopefully * this better work... * fix * test * last test * Revert "Add test bad bootnode" This reverts commit 540dd9754a1f8e2d3fef33f7f5a033b8c2aa4dcb. * Update check_bootnodes.sh * optimisations Begin polling the RPC node right after spawning, allowing us to break early on detecting peers * increase holdoff to 5 seconds * dont delete chainspec til we kill the node * Update check-bootnodes.yml * Remove checking bootnodes on pushing of this branch --------- Co-authored-by: parity-processbot <>
2026-04-28 17:57:56 +00:00 · 2023-03-21 12:36:47 +00:00
parent 3264cb7b64
commit 270540cf46
5 changed files with 226 additions and 0 deletions
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# In this script, we check each bootnode for a given chainspec file and ensure they are contactable.
+# We do this by removing every bootnode from the chainspec with the exception of the one
+# we want to check. Then we spin up a node using this new chainspec, wait a little while
+# and then check our local node's RPC endpoint for the number of peers. If the node hasn't
+# been able to contact any other nodes, we can reason that the bootnode we used is not well-connected
+# or is otherwise uncontactable.
+
+# shellcheck source=scripts/ci/common/lib.sh
+source "$(dirname "${0}")/../common/lib.sh"
+CHAINSPEC_FILE="$1"
+RUNTIME=$(basename "$CHAINSPEC_FILE" | cut -d '.' -f 1)
+
+trap cleanup EXIT INT TERM
+
+cleanup(){
+    echo "[+] Script interrupted or ended. Cleaning up..."
+    # Kill all the polkadot processes
+    killall polkadot > /dev/null 2>&1
+    exit $1
+}
+
+# count the number of bootnodes
+BOOTNODES=$( jq -r '.bootNodes | length' "$CHAINSPEC_FILE" )
+# Make a temporary dir for chainspec files
+# Store an array of the bad bootnodes
+BAD_BOOTNODES=()
+GOOD_BOOTNODES=()
+PIDS=()
+
+echo "[+] Checking $BOOTNODES bootnodes for $RUNTIME"
+for i in $(seq 0 $((BOOTNODES-1))); do
+    BOOTNODE=$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )
+    # Check each bootnode in parallel
+    check_bootnode "$BOOTNODE" "$CHAINSPEC_FILE" &
+    PIDS+=($!)
+    # Hold off 5 seconds between attempting to spawn nodes to stop the machine from getting overloaded
+    sleep 5
+done
+RESPS=()
+# Wait for all the nodes to finish
+for pid in "${PIDS[@]}"; do
+    wait "$pid"
+    RESPS+=($?)
+done
+echo
+# For any bootnodes that failed, add them to the bad bootnodes array
+for i in "${!RESPS[@]}"; do
+    if [ "${RESPS[$i]}" -ne 0 ]; then
+        BAD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
+    fi
+done
+# For any bootnodes that succeeded, add them to the good bootnodes array
+for i in "${!RESPS[@]}"; do
+    if [ "${RESPS[$i]}" -eq 0 ]; then
+        GOOD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
+    fi
+done
+
+# If we've got any uncontactable bootnodes for this runtime, print them
+if [ ${#BAD_BOOTNODES[@]} -gt 0 ]; then
+    echo "[!] Bad bootnodes found for $RUNTIME:"
+    for i in "${BAD_BOOTNODES[@]}"; do
+        echo "    $i"
+    done
+    cleanup 1
+else
+    echo "[+] All bootnodes for $RUNTIME are contactable"
+    cleanup 0
+fi