ci: remove all zombienet CI infrastructure

Zombienet tests are upstream Polkadot SDK tests with no custom pallet
coverage. Mainnet has 500K+ blocks, 9 successful upgrades, and zero
breakage — these tests provide no value for our project.

Removed 22 files (2293 lines):
- 6 workflow files (zombienet_*.yml, preflight, flaky-tests check)
- 3 custom actions (zombienet, zombienet-sdk, download-binaries)
- 5 scripts (dispatch, run, parse, process-logs, check-flaky)
- 5 config files (zombienet-env, flaky-tests, test definitions)
- 1 doc file (ZOMBIENET_CI.md)
- Remaining comment references in build-publish-images.yml
This commit is contained in:
2026-03-16 17:27:37 +03:00
parent 2ed2a15a17
commit 35d49d04ad
22 changed files with 0 additions and 2293 deletions
@@ -1,93 +0,0 @@
#!/usr/bin/env bash
# Validates the .github/zombienet-flaky-tests file to ensure:
# 1. Each entry has the correct format: <test-name>:<issue-number>
# 2. The referenced number is a GitHub Issue
# 3. The GitHub issue exists
# 4. The issue is OPEN (warns if closed)
set -uo pipefail
FLAKY_TESTS_FILE="${1:-.github/zombienet-flaky-tests}"
if [[ ! -f "$FLAKY_TESTS_FILE" ]]; then
echo "Error: File not found: $FLAKY_TESTS_FILE" >&2
exit 1
fi
if ! command -v gh &> /dev/null; then
echo "Error: gh CLI is not installed" >&2
exit 1
fi
echo "Validating $FLAKY_TESTS_FILE..."
echo
has_errors=false
line_num=0
while IFS= read -r line || [[ -n "$line" ]]; do
line_num=$((line_num + 1))
if [[ -z "$line" ]]; then
continue
fi
# Parse format: test-name:issue-number
if [[ ! "$line" =~ ^([^:]+):([0-9]+)$ ]]; then
echo "❌ Line $line_num: Missing required issue number" >&2
echo " Entry: '$line'" >&2
echo " Expected format: <test-name>:<issue-number>" >&2
echo " Example: zombienet-pezkuwi-test-name:1234" >&2
has_errors=true
continue
fi
test_name="${BASH_REMATCH[1]}"
issue_number="${BASH_REMATCH[2]}"
set +e
issue_data=$(gh issue view "$issue_number" --json state,title,url 2>&1)
gh_exit_code=$?
set -e
if [[ $gh_exit_code -ne 0 ]]; then
echo "❌ Line $line_num: Issue #$issue_number does not exist" >&2
echo " Test: $test_name" >&2
has_errors=true
continue
fi
url=$(echo "$issue_data" | jq -r '.url')
state=$(echo "$issue_data" | jq -r '.state')
title=$(echo "$issue_data" | jq -r '.title')
# Check if it's an issue (not a PR) by verifying the URL contains '/issues/'
if [[ ! "$url" =~ /issues/ ]]; then
echo "❌ Line $line_num: #$issue_number is a Pull Request, not an Issue" >&2
echo " Test: $test_name" >&2
echo " URL: $url" >&2
echo " Please reference a GitHub Issue, not a PR" >&2
has_errors=true
continue
fi
if [[ "$state" == "OPEN" ]]; then
echo "✅ Line $line_num: $test_name -> Issue #$issue_number (open)"
else
echo "⚠️ Line $line_num: Issue #$issue_number is closed: '$title'" >&2
echo " Test: $test_name" >&2
echo " Consider removing this entry if the issue is resolved." >&2
fi
done < "$FLAKY_TESTS_FILE"
echo
if [[ "$has_errors" == "true" ]]; then
echo "❌ Validation failed with errors" >&2
exit 1
else
echo "✅ All entries are valid"
exit 0
fi
@@ -1,123 +0,0 @@
#!/bin/bash
# Zombienet Workflow Dispatcher
#
# This script triggers GitHub Actions workflows for zombienet tests and monitors their execution.
# It can run workflows multiple times for reliability testing and optionally filter tests by pattern.
# Results are automatically saved to a timestamped CSV file for analysis.
#
# Features:
# - Trigger workflows on specific branches
# - Filter tests by pattern (useful for debugging specific tests)
# - Run workflows multiple times for flaky test detection
# - Monitor workflow completion and collect results
# - Export results to CSV with job details (ID, name, conclusion, timing, URLs)
#
# Requirements:
# - GitHub CLI (gh) must be installed and authenticated
# - Must be run from pezkuwi-sdk repository root
# - Target branch must have corresponding PR with CI enabled
# Exit on error
# set -e
function dbg {
local msg="$@"
local tstamp=$(date "+%Y-%m-%d %T")
printf "%s - %s\n" "$tstamp" "$msg"
}
function write_job_results_to_csv {
local run_id="$1"
local branch="$2"
local csv_file="$3"
dbg "Writing job results for run $run_id to $csv_file"
# Get job details for the completed run, filtering only jobs starting with 'zombienet-' and with success or failure conclusions
gh run view "$run_id" --json jobs --jq \
'.jobs[] | select(.name | startswith("zombienet-")) |
select(.conclusion == "success" or .conclusion == "failure") |
[.databaseId, .name, .conclusion, .startedAt, "'"$branch"'", .url] | @csv' >> "$csv_file"
}
# Parse command line arguments
WORKFLOW_FILE=""
BRANCH=""
MAX_RESULT_CNT=-1
TEST_PATTERN=""
while getopts "w:b:m:p:h" opt; do
case $opt in
w) WORKFLOW_FILE="$OPTARG" ;;
b) BRANCH="$OPTARG" ;;
m) MAX_RESULT_CNT="$OPTARG" ;;
p) TEST_PATTERN="$OPTARG" ;;
h) echo "Usage: $0 -w <workflow-file> -b <branch> [-m max-triggers] [-p test-pattern]"
echo " -w: Workflow file (required)"
echo " -b: Branch name (required)"
echo " -m: Maximum number of triggers (optional, default: infinite)"
echo " -p: Test pattern for workflow input (optional)"
exit 0 ;;
\?) echo "Invalid option -$OPTARG" >&2
echo "Use -h for help"
exit 1 ;;
esac
done
if [[ -z "$WORKFLOW_FILE" || -z "$BRANCH" ]]; then
echo "Error: Both workflow file (-w) and branch (-b) are required"
echo "Usage: $0 -w <workflow-file> -b <branch> [-m max-triggers] [-p test-pattern]"
echo "Use -h for help"
exit 1
fi
# Create CSV file with headers
CSV_FILE="workflow_results_$(date +%Y%m%d_%H%M%S).csv"
echo "job_id,job_name,conclusion,started_at,branch,job_url" > "$CSV_FILE"
dbg "Created CSV file: $CSV_FILE"
dbg "Starting loop for workflow: $WORKFLOW_FILE on branch: $BRANCH"
TRIGGER_CNT=0
RESULT_CNT=0
while [[ $MAX_RESULT_CNT -eq -1 || $RESULT_CNT -lt $MAX_RESULT_CNT ]]; do
dbg "Waiting until workflow $WORKFLOW_FILE (branch: $BRANCH) jobs are completed"
while true ; do
echo ""
gh run list --workflow=$WORKFLOW_FILE -e workflow_dispatch -b $BRANCH -L 5
sleep 2
# if job is completed it should have non-empty conclusion field
ALL_JOBS_COMPLETED=$(gh run list --workflow=$WORKFLOW_FILE -e workflow_dispatch -b $BRANCH --json conclusion --jq 'all(.[]; .conclusion != "")')
if [[ "$ALL_JOBS_COMPLETED" == "true" ]]; then
break
fi
sleep 60
done
dbg "Workflow $WORKFLOW_FILE (branch: $BRANCH) jobs completed"
# Skip the first iteration - latest run id is not the one we triggered here
if [ $TRIGGER_CNT -gt 0 ]; then
# Get the most recent completed run ID and write job results to CSV
LATEST_RUN_ID=$(gh run list --workflow=$WORKFLOW_FILE -e workflow_dispatch -b $BRANCH -L 1 --json databaseId --jq '.[0].databaseId')
write_job_results_to_csv "$LATEST_RUN_ID" "$BRANCH" "$CSV_FILE"
RESULT_CNT=$(( RESULT_CNT + 1 ))
fi
TRIGGER_CNT=$(( TRIGGER_CNT + 1 ))
dbg "Triggering #$TRIGGER_CNT workflow $WORKFLOW_FILE (branch: $BRANCH)"
if [[ -n "$TEST_PATTERN" ]]; then
gh workflow run "$WORKFLOW_FILE" --ref "$BRANCH" -f test_pattern="$TEST_PATTERN"
else
gh workflow run "$WORKFLOW_FILE" --ref "$BRANCH"
fi
dbg "Sleeping 60s"
sleep 60
done
-63
View File
@@ -1,63 +0,0 @@
#!/usr/bin/env python3
"""
Zombienet Test Matrix Parser
This script parses YAML test definition files and converts them to JSON format
for use as GitHub Actions matrix jobs. It provides filtering capabilities to:
1. Exclude flaky tests (unless a specific test pattern is provided)
2. Filter tests by name pattern for targeted execution
3. Convert YAML test definitions to JSON matrix format
The script is used by GitHub Actions workflows to dynamically generate
test matrices based on YAML configuration files, enabling flexible
test execution and maintenance.
Usage:
python parse-zombienet-tests.py --matrix tests.yml [--flaky-tests flaky.txt] [--test-pattern pattern]
Output:
JSON array of test job objects suitable for GitHub Actions matrix strategy
"""
import argparse
import yaml
import json
import re
def parse_args():
parser = argparse.ArgumentParser(description="Parse test matrix YAML file with optional filtering")
parser.add_argument("--matrix", required=True, help="Path to the YAML matrix file")
parser.add_argument("--flaky-tests", default="", help="Newline-separated list of flaky job names")
parser.add_argument("--test-pattern", default="", help="Regex pattern to match job_name")
return parser.parse_args()
def load_jobs(matrix_path):
with open(matrix_path, "r") as f:
return yaml.safe_load(f)
def filter_jobs(jobs, flaky_tests, test_pattern):
flaky_set = set(name.strip() for name in flaky_tests.splitlines() if name.strip())
filtered = []
for job in jobs:
name = job.get("job-name", "")
# If test_pattern provided then don't care about flaky tests, just check test_pattern
if test_pattern and len(test_pattern) > 0:
if re.search(test_pattern, name):
filtered.append(job)
elif name not in flaky_set:
filtered.append(job)
return filtered
def main():
args = parse_args()
jobs = load_jobs(args.matrix)
result = filter_jobs(jobs, args.flaky_tests, args.test_pattern)
print(json.dumps(result))
if __name__ == "__main__":
main()
-214
View File
@@ -1,214 +0,0 @@
#!/bin/bash
set -euo pipefail
# This script processes logs produced by nodes spawned using the zombienet-sdk framework.
# The logs are prepared for upload as GitHub artifacts.
# If Loki logging is available, the corresponding log URLs are also printed.
# NOTE: Loki URL disabled - Pezkuwi does not use external Grafana.
# Zombienet logs are available as GitHub Actions artifacts.
LOKI_URL_FOR_NODE=''
LOKI_DIR_FOR_NATIVE_LOGS="/tmp/zombienet"
# JQ queries
JQ_QUERY_RELAY_V1='.relay[].name'
JQ_QUERY_RELAY_SDK='.relay.nodes[].name'
JQ_QUERY_PARA_NODES_V1='.paras[$pid].nodes[].name'
JQ_QUERY_PARA_NODES_SDK='.teyrchains[$pid][] .collators[].name'
# current time in milliseconds + 60 secs to allow loki to ingest logs
TO=$(($(date +%s%3N) + 60000))
make_url() {
local name="$1"
local to="$2"
local url="${LOKI_URL_FOR_NODE//\{\{namespace\}\}/$NS}"
url="${url//\{\{podName\}\}/$name}"
url="${url//\{\{from\}\}/$FROM}"
url="${url//\{\{to\}\}/$to}"
echo "$url"
}
# Since we don't have the zombie.json file, we will make the best-effort to send the logs
process_logs_from_fallback() {
local BASE_DIR="$1"
local TARGET_DIR="$2"
# Extract namespace from BASE_DIR (e.g., /tmp/zombie-abc123 -> zombie-abc123)
NS=$(basename "$BASE_DIR")
echo "Using fallback mode for namespace: $NS"
# Use current time as FROM since we don't have zombie.json
FROM=$(($(date +%s%3N) - 600000)) # 10 minutes ago
# Find all logs with glob patterns
local log_files=()
# Search for SDK pattern: BASE_DIR/<name>/<name>.log
if [[ -d "$BASE_DIR" ]]; then
for node_dir in "$BASE_DIR"/*; do
if [[ -d "$node_dir" && "$node_dir" != "$TARGET_DIR" ]]; then
local node_name=$(basename "$node_dir")
if [[ -f "$node_dir/$node_name.log" ]]; then
log_files+=("$node_dir/$node_name.log")
fi
fi
done
fi
# Search for v1 pattern: BASE_DIR/logs/<name>.log
if [[ -d "$TARGET_DIR" ]]; then
for log_file in "$TARGET_DIR"/*.log; do
if [[ -f "$log_file" ]]; then
log_files+=("$log_file")
fi
done
fi
if [[ ${#log_files[@]} -eq 0 ]]; then
echo "::warning ::No log files found in $BASE_DIR using glob patterns"
return 1
fi
echo "Found ${#log_files[@]} log file(s) using glob patterns"
echo "Nodes:"
for log_file in "${log_files[@]}"; do
# Extract node name from log file path
local name=$(basename "$log_file" .log)
local_to=$TO
# Copy log to target directory if not already there
if [[ "$log_file" != "$TARGET_DIR/$name.log" ]]; then
if ! cp "$log_file" "$TARGET_DIR/$name.log" 2>/dev/null; then
echo "::warning ::Failed to copy log for $name"
continue
fi
fi
# Send logs to loki
if [[ -d "$LOKI_DIR_FOR_NATIVE_LOGS" ]]; then
if [[ -f "$TARGET_DIR/$name.log" ]]; then
awk -v NS="$NS" -v NAME="$name" '{print NS" "NAME" " $0}' "$TARGET_DIR/$name.log" >> "$LOKI_DIR_FOR_NATIVE_LOGS/to-loki.log"
local_to=$(($(date +%s%3N) + 60000))
fi
fi
echo -e "\t$name: $(make_url "$name" "$local_to")"
done
echo ""
}
process_logs_from_zombie_file() {
local BASE_DIR="$1"
local TARGET_DIR="$2"
local ZOMBIE_JSON="$3"
# Extract namespace (ns in sdk / namespace in v1)
NS=$(jq -r '.ns // .namespace' "$ZOMBIE_JSON")
# test start time in milliseconds
FROM=$(jq -r '.start_time_ts' "$ZOMBIE_JSON")
echo "Relay nodes:"
JQ_QUERY_RELAY=$JQ_QUERY_RELAY_V1
JQ_QUERY_PARA_NODES=$JQ_QUERY_PARA_NODES_V1
if [[ $(echo "$NS" | grep -E "zombie-[A-Fa-f0-9]+-") ]]; then
JQ_QUERY_RELAY=$JQ_QUERY_RELAY_SDK
JQ_QUERY_PARA_NODES=$JQ_QUERY_PARA_NODES_SDK
fi;
jq -r $JQ_QUERY_RELAY "$ZOMBIE_JSON" | while read -r name; do
[[ -z "$name" ]] && continue
local_to=$TO
if [[ "${ZOMBIE_PROVIDER:-}" == "k8s" ]]; then
# Fetching logs from k8s
if ! kubectl logs "$name" -c "$name" -n "$NS" > "$TARGET_DIR/$name.log" 2>&1; then
echo "::warning ::Failed to fetch logs for $name"
fi
else
# zombienet v1 dump the logs to the `/logs` directory
if [[ ! -f "$TARGET_DIR/$name.log" ]]; then
# `sdk` use this pattern to store the logs in native provider
if [[ -f "$BASE_DIR/$name/$name.log" ]]; then
cp "$BASE_DIR/$name/$name.log" "$TARGET_DIR/$name.log"
else
echo "::warning ::Log file not found: $BASE_DIR/$name/$name.log"
continue
fi
fi
# send logs to loki
if [[ -d "$LOKI_DIR_FOR_NATIVE_LOGS" && -f "$TARGET_DIR/$name.log" ]]; then
awk -v NS="$NS" -v NAME="$name" '{print NS" "NAME" " $0}' "$TARGET_DIR/$name.log" >> "$LOKI_DIR_FOR_NATIVE_LOGS/to-loki.log"
local_to=$(($(date +%s%3N) + 60000))
fi
fi
echo -e "\t$name: $(make_url "$name" "$local_to")"
done
echo ""
# Handle teyrchains grouped by paraId
jq -r '.paras // .teyrchains | to_entries[] | "\(.key)"' "$ZOMBIE_JSON" | while read -r para_id; do
echo "ParaId: $para_id"
jq -r --arg pid "$para_id" "$JQ_QUERY_PARA_NODES" "$ZOMBIE_JSON" | while read -r name; do
[[ -z "$name" ]] && continue
local_to=$TO
if [[ "${ZOMBIE_PROVIDER:-}" == "k8s" ]]; then
# Fetching logs from k8s
if ! kubectl logs "$name" -c "$name" -n "$NS" > "$TARGET_DIR/$name.log" 2>&1; then
echo "::warning ::Failed to fetch logs for $name"
fi
else
# zombienet v1 dump the logs to the `/logs` directory
if [[ ! -f "$TARGET_DIR/$name.log" ]]; then
# `sdk` use this pattern to store the logs in native provider
if [[ -f "$BASE_DIR/$name/$name.log" ]]; then
cp "$BASE_DIR/$name/$name.log" "$TARGET_DIR/$name.log"
else
echo "::warning ::Log file not found: $BASE_DIR/$name/$name.log"
continue
fi
fi
# send logs to loki
if [[ -d "$LOKI_DIR_FOR_NATIVE_LOGS" && -f "$TARGET_DIR/$name.log" ]]; then
awk -v NS="$NS" -v NAME="$name" '{print NS" "NAME" " $0}' "$TARGET_DIR/$name.log" >> "$LOKI_DIR_FOR_NATIVE_LOGS/to-loki.log"
local_to=$(($(date +%s%3N) + 60000))
fi
fi
echo -e "\t$name: $(make_url "$name" "$local_to")"
done
echo ""
done
}
# Main execution - Process all zombie-* directories (supports rstest with multiple tests per job)
BASE_DIRS=$(ls -dt /tmp/zombie-* 2>/dev/null || true)
if [[ -z "$BASE_DIRS" ]]; then
echo "No zombie directories found in /tmp/zombie-*"
exit 0
fi
for BASE_DIR in $BASE_DIRS; do
echo "Processing directory: $BASE_DIR"
# Make sure target directory exists
TARGET_DIR="$BASE_DIR/logs"
mkdir -p "$TARGET_DIR"
ZOMBIE_JSON="$BASE_DIR/zombie.json"
if [[ ! -f "$ZOMBIE_JSON" ]]; then
echo "Zombie file $ZOMBIE_JSON not present, calling fallback"
process_logs_from_fallback "$BASE_DIR" "$TARGET_DIR"
else
# we have a zombie.json file, let process it
echo "Processing logs from zombie.json"
process_logs_from_zombie_file "$BASE_DIR" "$TARGET_DIR" "$ZOMBIE_JSON"
fi
echo ""
done
# sleep for a minute to give alloy time to forward logs
sleep 60
-85
View File
@@ -1,85 +0,0 @@
#!/usr/bin/env bash
# This script executes a given zombienet test for the `native` provider.
# It is equivalent to running run-test-local-env-manager.sh for the `k8s` provider.
function run_test {
cd "${OUTPUT_DIR}"
for i in $(find ${OUTPUT_DIR} -name "${TEST_TO_RUN}"| head -1); do
TEST_FOUND=1
# in order to let native provider work properly we need
# to unset ZOMBIENET_IMAGE, which controls 'inCI' internal flag.
# ZOMBIENET_IMAGE not set && RUN_IN_CONTAINER=0 => inCI=false
# Apparently inCI=true works properly only with k8s provider
unset ZOMBIENET_IMAGE
if [ -z "$ZOMBIE_BASE_DIR" ]; then
${ZOMBIE_COMMAND} -p native -c $CONCURRENCY test $i
else
${ZOMBIE_COMMAND} -p native -c $CONCURRENCY -d $ZOMBIE_BASE_DIR -f test $i
fi;
EXIT_STATUS=$?
done;
if [[ $TEST_FOUND -lt 1 ]]; then
EXIT_STATUS=1
fi;
}
function create_isolated_dir {
TS=$(date +%s)
ISOLATED=${OUTPUT_DIR}/${TS}
mkdir -p ${ISOLATED}
OUTPUT_DIR="${ISOLATED}"
}
function copy_to_isolated {
cd "${SCRIPT_PATH}"
echo $(pwd)
cp -r "${LOCAL_DIR}"/* "${OUTPUT_DIR}"
}
function rm_isolated_dir {
echo "Removing ${OUTPUT_DIR}"
rm -rf "${OUTPUT_DIR}"
}
function log {
local lvl msg fmt
lvl=$1 msg=$2
fmt='+%Y-%m-%d %H:%M:%S'
lg_date=$(date "${fmt}")
if [[ "${lvl}" = "DIE" ]] ; then
lvl="ERROR"
echo -e "\n${lg_date} - ${lvl} - ${msg}"
exit 1
else
echo -e "\n${lg_date} - ${lvl} - ${msg}"
fi
}
set -x
SCRIPT_NAME="$0"
SCRIPT_PATH=$(dirname "$0") # relative
SCRIPT_PATH=$(cd "${SCRIPT_PATH}" && pwd) # absolutized and normalized
ZOMBIE_COMMAND=zombie
EXIT_STATUS=0
# args
LOCAL_DIR="$1"
CONCURRENCY="$2"
TEST_TO_RUN="$3"
ZOMBIE_BASE_DIR="$4"
cd "${SCRIPT_PATH}"
OUTPUT_DIR="${SCRIPT_PATH}"
create_isolated_dir
copy_to_isolated
run_test
rm_isolated_dir
log INFO "Exit status is ${EXIT_STATUS}"
exit "${EXIT_STATUS}"