ci: remove all zombienet CI infrastructure

Zombienet tests are upstream Polkadot SDK tests with no custom pallet coverage. Mainnet has 500K+ blocks, 9 successful upgrades, and zero breakage — these tests provide no value for our project. Removed 22 files (2293 lines): - 6 workflow files (zombienet_*.yml, preflight, flaky-tests check) - 3 custom actions (zombienet, zombienet-sdk, download-binaries) - 5 scripts (dispatch, run, parse, process-logs, check-flaky) - 5 config files (zombienet-env, flaky-tests, test definitions) - 1 doc file (ZOMBIENET_CI.md) - Remaining comment references in build-publish-images.yml
2026-03-16 17:27:37 +03:00
parent 2ed2a15a17
commit 35d49d04ad
22 changed files with 0 additions and 2293 deletions
@@ -1,93 +0,0 @@
-#!/usr/bin/env bash
-
-# Validates the .github/zombienet-flaky-tests file to ensure:
-# 1. Each entry has the correct format: <test-name>:<issue-number>
-# 2. The referenced number is a GitHub Issue
-# 3. The GitHub issue exists
-# 4. The issue is OPEN (warns if closed)
-
-set -uo pipefail
-
-FLAKY_TESTS_FILE="${1:-.github/zombienet-flaky-tests}"
-
-if [[ ! -f "$FLAKY_TESTS_FILE" ]]; then
-    echo "Error: File not found: $FLAKY_TESTS_FILE" >&2
-    exit 1
-fi
-
-if ! command -v gh &> /dev/null; then
-    echo "Error: gh CLI is not installed" >&2
-    exit 1
-fi
-
-echo "Validating $FLAKY_TESTS_FILE..."
-echo
-
-has_errors=false
-line_num=0
-
-while IFS= read -r line || [[ -n "$line" ]]; do
-    line_num=$((line_num + 1))
-    
-    if [[ -z "$line" ]]; then
-        continue
-    fi
-    
-    # Parse format: test-name:issue-number
-    if [[ ! "$line" =~ ^([^:]+):([0-9]+)$ ]]; then
-        echo "❌ Line $line_num: Missing required issue number" >&2
-        echo "   Entry: '$line'" >&2
-        echo "   Expected format: <test-name>:<issue-number>" >&2
-        echo "   Example: zombienet-pezkuwi-test-name:1234" >&2
-        has_errors=true
-        continue
-    fi
-    
-    test_name="${BASH_REMATCH[1]}"
-    issue_number="${BASH_REMATCH[2]}"
-    
-    set +e
-    issue_data=$(gh issue view "$issue_number" --json state,title,url 2>&1)
-    gh_exit_code=$?
-    set -e
-    
-    if [[ $gh_exit_code -ne 0 ]]; then
-        echo "❌ Line $line_num: Issue #$issue_number does not exist" >&2
-        echo "   Test: $test_name" >&2
-        has_errors=true
-        continue
-    fi
-    
-    url=$(echo "$issue_data" | jq -r '.url')
-    state=$(echo "$issue_data" | jq -r '.state')
-    title=$(echo "$issue_data" | jq -r '.title')
-    
-    # Check if it's an issue (not a PR) by verifying the URL contains '/issues/'
-    if [[ ! "$url" =~ /issues/ ]]; then
-        echo "❌ Line $line_num: #$issue_number is a Pull Request, not an Issue" >&2
-        echo "   Test: $test_name" >&2
-        echo "   URL: $url" >&2
-        echo "   Please reference a GitHub Issue, not a PR" >&2
-        has_errors=true
-        continue
-    fi
-    
-    if [[ "$state" == "OPEN" ]]; then
-        echo "✅ Line $line_num: $test_name -> Issue #$issue_number (open)"
-    else
-        echo "⚠️  Line $line_num: Issue #$issue_number is closed: '$title'" >&2
-        echo "   Test: $test_name" >&2
-        echo "   Consider removing this entry if the issue is resolved." >&2
-    fi
-    
-done < "$FLAKY_TESTS_FILE"
-
-echo
-
-if [[ "$has_errors" == "true" ]]; then
-    echo "❌ Validation failed with errors" >&2
-    exit 1
-else
-    echo "✅ All entries are valid"
-    exit 0
-fi
@@ -1,123 +0,0 @@
-#!/bin/bash
-
-# Zombienet Workflow Dispatcher
-#
-# This script triggers GitHub Actions workflows for zombienet tests and monitors their execution.
-# It can run workflows multiple times for reliability testing and optionally filter tests by pattern.
-# Results are automatically saved to a timestamped CSV file for analysis.
-#
-# Features:
-# - Trigger workflows on specific branches
-# - Filter tests by pattern (useful for debugging specific tests)
-# - Run workflows multiple times for flaky test detection
-# - Monitor workflow completion and collect results
-# - Export results to CSV with job details (ID, name, conclusion, timing, URLs)
-#
-# Requirements:
-# - GitHub CLI (gh) must be installed and authenticated
-# - Must be run from pezkuwi-sdk repository root
-# - Target branch must have corresponding PR with CI enabled
-
-# Exit on error
-# set -e
-
-function dbg {
-  local msg="$@"
-
-  local tstamp=$(date "+%Y-%m-%d %T")
-  printf "%s - %s\n" "$tstamp" "$msg"
-}
-
-function write_job_results_to_csv {
-  local run_id="$1"
-  local branch="$2"
-  local csv_file="$3"
-
-  dbg "Writing job results for run $run_id to $csv_file"
-
-  # Get job details for the completed run, filtering only jobs starting with 'zombienet-' and with success or failure conclusions
-  gh run view "$run_id" --json jobs --jq \
-    '.jobs[] | select(.name | startswith("zombienet-")) |
-      select(.conclusion == "success" or .conclusion == "failure") |
-      [.databaseId, .name, .conclusion, .startedAt, "'"$branch"'", .url] | @csv' >> "$csv_file"
-}
-
-# Parse command line arguments
-WORKFLOW_FILE=""
-BRANCH=""
-MAX_RESULT_CNT=-1
-TEST_PATTERN=""
-
-while getopts "w:b:m:p:h" opt; do
-  case $opt in
-    w) WORKFLOW_FILE="$OPTARG" ;;
-    b) BRANCH="$OPTARG" ;;
-    m) MAX_RESULT_CNT="$OPTARG" ;;
-    p) TEST_PATTERN="$OPTARG" ;;
-    h) echo "Usage: $0 -w <workflow-file> -b <branch> [-m max-triggers] [-p test-pattern]"
-       echo "  -w: Workflow file (required)"
-       echo "  -b: Branch name (required)"
-       echo "  -m: Maximum number of triggers (optional, default: infinite)"
-       echo "  -p: Test pattern for workflow input (optional)"
-       exit 0 ;;
-    \?) echo "Invalid option -$OPTARG" >&2
-        echo "Use -h for help"
-        exit 1 ;;
-  esac
-done
-
-if [[ -z "$WORKFLOW_FILE" || -z "$BRANCH" ]]; then
-  echo "Error: Both workflow file (-w) and branch (-b) are required"
-  echo "Usage: $0 -w <workflow-file> -b <branch> [-m max-triggers] [-p test-pattern]"
-  echo "Use -h for help"
-  exit 1
-fi
-
-# Create CSV file with headers
-CSV_FILE="workflow_results_$(date +%Y%m%d_%H%M%S).csv"
-echo "job_id,job_name,conclusion,started_at,branch,job_url" > "$CSV_FILE"
-dbg "Created CSV file: $CSV_FILE"
-
-dbg "Starting loop for workflow: $WORKFLOW_FILE on branch: $BRANCH"
-
-TRIGGER_CNT=0
-RESULT_CNT=0
-
-while [[ $MAX_RESULT_CNT -eq -1 || $RESULT_CNT -lt $MAX_RESULT_CNT ]]; do
-
-  dbg "Waiting until workflow $WORKFLOW_FILE (branch: $BRANCH) jobs are completed"
-
-  while true ; do
-    echo ""
-    gh run list  --workflow=$WORKFLOW_FILE -e workflow_dispatch -b $BRANCH -L 5
-    sleep 2
-    # if job is completed it should have non-empty conclusion field
-    ALL_JOBS_COMPLETED=$(gh run list --workflow=$WORKFLOW_FILE -e workflow_dispatch -b $BRANCH --json conclusion --jq 'all(.[]; .conclusion != "")')
-    if [[ "$ALL_JOBS_COMPLETED" == "true" ]]; then
-      break
-    fi
-    sleep 60
-  done
-  dbg "Workflow $WORKFLOW_FILE (branch: $BRANCH) jobs completed"
-
-  # Skip the first iteration - latest run id is not the one we triggered here
-  if [ $TRIGGER_CNT -gt 0 ]; then
-    # Get the most recent completed run ID and write job results to CSV
-    LATEST_RUN_ID=$(gh run list --workflow=$WORKFLOW_FILE -e workflow_dispatch -b $BRANCH -L 1 --json databaseId --jq '.[0].databaseId')
-    write_job_results_to_csv "$LATEST_RUN_ID" "$BRANCH" "$CSV_FILE"
-    RESULT_CNT=$(( RESULT_CNT + 1 ))
-  fi
-
-  TRIGGER_CNT=$(( TRIGGER_CNT + 1 ))
-  dbg "Triggering #$TRIGGER_CNT workflow $WORKFLOW_FILE (branch: $BRANCH)"
-
-  if [[ -n "$TEST_PATTERN" ]]; then
-    gh workflow run "$WORKFLOW_FILE" --ref "$BRANCH" -f test_pattern="$TEST_PATTERN"
-  else
-    gh workflow run "$WORKFLOW_FILE" --ref "$BRANCH"
-  fi
-
-  dbg "Sleeping 60s"
-  sleep 60
-done
-
@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Zombienet Test Matrix Parser
-
-This script parses YAML test definition files and converts them to JSON format
-for use as GitHub Actions matrix jobs. It provides filtering capabilities to:
-
-1. Exclude flaky tests (unless a specific test pattern is provided)
-2. Filter tests by name pattern for targeted execution
-3. Convert YAML test definitions to JSON matrix format
-
-The script is used by GitHub Actions workflows to dynamically generate
-test matrices based on YAML configuration files, enabling flexible
-test execution and maintenance.
-
-Usage:
-    python parse-zombienet-tests.py --matrix tests.yml [--flaky-tests flaky.txt] [--test-pattern pattern]
-
-Output:
-    JSON array of test job objects suitable for GitHub Actions matrix strategy
-"""
-
-import argparse
-import yaml
-import json
-import re
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="Parse test matrix YAML file with optional filtering")
-    parser.add_argument("--matrix", required=True, help="Path to the YAML matrix file")
-    parser.add_argument("--flaky-tests", default="", help="Newline-separated list of flaky job names")
-    parser.add_argument("--test-pattern", default="", help="Regex pattern to match job_name")
-    return parser.parse_args()
-
-def load_jobs(matrix_path):
-    with open(matrix_path, "r") as f:
-        return yaml.safe_load(f)
-
-def filter_jobs(jobs, flaky_tests, test_pattern):
-    flaky_set = set(name.strip() for name in flaky_tests.splitlines() if name.strip())
-    filtered = []
-
-    for job in jobs:
-        name = job.get("job-name", "")
-
-        # If test_pattern provided then don't care about flaky tests, just check test_pattern
-        if test_pattern and len(test_pattern) > 0:
-            if re.search(test_pattern, name):
-                filtered.append(job)
-        elif name not in flaky_set:
-            filtered.append(job)
-
-    return filtered
-
-def main():
-    args = parse_args()
-    jobs = load_jobs(args.matrix)
-    result = filter_jobs(jobs, args.flaky_tests, args.test_pattern)
-    print(json.dumps(result))
-
-if __name__ == "__main__":
-    main()
@@ -1,214 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-# This script processes logs produced by nodes spawned using the zombienet-sdk framework.
-# The logs are prepared for upload as GitHub artifacts.
-# If Loki logging is available, the corresponding log URLs are also printed.
-# NOTE: Loki URL disabled - Pezkuwi does not use external Grafana.
-# Zombienet logs are available as GitHub Actions artifacts.
-LOKI_URL_FOR_NODE=''
-
-LOKI_DIR_FOR_NATIVE_LOGS="/tmp/zombienet"
-
-# JQ queries
-JQ_QUERY_RELAY_V1='.relay[].name'
-JQ_QUERY_RELAY_SDK='.relay.nodes[].name'
-
-JQ_QUERY_PARA_NODES_V1='.paras[$pid].nodes[].name'
-JQ_QUERY_PARA_NODES_SDK='.teyrchains[$pid][] .collators[].name'
-
-# current time in milliseconds + 60 secs to allow loki to ingest logs
-TO=$(($(date +%s%3N) + 60000))
-
-make_url() {
-  local name="$1"
-  local to="$2"
-  local url="${LOKI_URL_FOR_NODE//\{\{namespace\}\}/$NS}"
-  url="${url//\{\{podName\}\}/$name}"
-  url="${url//\{\{from\}\}/$FROM}"
-  url="${url//\{\{to\}\}/$to}"
-  echo "$url"
-}
-
-# Since we don't have the zombie.json file, we will make the best-effort to send the logs
-process_logs_from_fallback() {
-  local BASE_DIR="$1"
-  local TARGET_DIR="$2"
-
-  # Extract namespace from BASE_DIR (e.g., /tmp/zombie-abc123 -> zombie-abc123)
-  NS=$(basename "$BASE_DIR")
-  echo "Using fallback mode for namespace: $NS"
-
-  # Use current time as FROM since we don't have zombie.json
-  FROM=$(($(date +%s%3N) - 600000))  # 10 minutes ago
-
-  # Find all logs with glob patterns
-  local log_files=()
-  
-  # Search for SDK pattern: BASE_DIR/<name>/<name>.log
-  if [[ -d "$BASE_DIR" ]]; then
-    for node_dir in "$BASE_DIR"/*; do
-      if [[ -d "$node_dir" && "$node_dir" != "$TARGET_DIR" ]]; then
-        local node_name=$(basename "$node_dir")
-        if [[ -f "$node_dir/$node_name.log" ]]; then
-          log_files+=("$node_dir/$node_name.log")
-        fi
-      fi
-    done
-  fi
-
-  # Search for v1 pattern: BASE_DIR/logs/<name>.log
-  if [[ -d "$TARGET_DIR" ]]; then
-    for log_file in "$TARGET_DIR"/*.log; do
-      if [[ -f "$log_file" ]]; then
-        log_files+=("$log_file")
-      fi
-    done
-  fi
-
-  if [[ ${#log_files[@]} -eq 0 ]]; then
-    echo "::warning ::No log files found in $BASE_DIR using glob patterns"
-    return 1
-  fi
-
-  echo "Found ${#log_files[@]} log file(s) using glob patterns"
-  echo "Nodes:"
-
-  for log_file in "${log_files[@]}"; do
-    # Extract node name from log file path
-    local name=$(basename "$log_file" .log)
-    local_to=$TO
-
-    # Copy log to target directory if not already there
-    if [[ "$log_file" != "$TARGET_DIR/$name.log" ]]; then
-      if ! cp "$log_file" "$TARGET_DIR/$name.log" 2>/dev/null; then
-        echo "::warning ::Failed to copy log for $name"
-        continue
-      fi
-    fi
-
-    # Send logs to loki
-    if [[ -d "$LOKI_DIR_FOR_NATIVE_LOGS" ]]; then
-      if [[ -f "$TARGET_DIR/$name.log" ]]; then
-        awk -v NS="$NS" -v NAME="$name" '{print NS" "NAME" " $0}' "$TARGET_DIR/$name.log" >> "$LOKI_DIR_FOR_NATIVE_LOGS/to-loki.log"
-        local_to=$(($(date +%s%3N) + 60000))
-      fi
-    fi
-    echo -e "\t$name: $(make_url "$name" "$local_to")"
-  done
-  echo ""
-}
-
-process_logs_from_zombie_file() {
-  local BASE_DIR="$1"
-  local TARGET_DIR="$2"
-  local ZOMBIE_JSON="$3"
-
-  # Extract namespace (ns in sdk / namespace in v1)
-  NS=$(jq -r '.ns // .namespace' "$ZOMBIE_JSON")
-  # test start time in milliseconds
-  FROM=$(jq -r '.start_time_ts' "$ZOMBIE_JSON")
-
-  echo "Relay nodes:"
-
-  JQ_QUERY_RELAY=$JQ_QUERY_RELAY_V1
-  JQ_QUERY_PARA_NODES=$JQ_QUERY_PARA_NODES_V1
-  if [[ $(echo "$NS" | grep -E "zombie-[A-Fa-f0-9]+-") ]]; then
-      JQ_QUERY_RELAY=$JQ_QUERY_RELAY_SDK
-      JQ_QUERY_PARA_NODES=$JQ_QUERY_PARA_NODES_SDK
-  fi;
-
-  jq -r $JQ_QUERY_RELAY "$ZOMBIE_JSON" | while read -r name; do
-    [[ -z "$name" ]] && continue
-    local_to=$TO
-    if [[ "${ZOMBIE_PROVIDER:-}" == "k8s" ]]; then
-      # Fetching logs from k8s
-      if ! kubectl logs "$name" -c "$name" -n "$NS" > "$TARGET_DIR/$name.log" 2>&1; then
-        echo "::warning ::Failed to fetch logs for $name"
-      fi
-    else
-      # zombienet v1 dump the logs to the `/logs` directory
-      if [[ ! -f "$TARGET_DIR/$name.log" ]]; then
-        # `sdk` use this pattern to store the logs in native provider
-        if [[ -f "$BASE_DIR/$name/$name.log" ]]; then
-          cp "$BASE_DIR/$name/$name.log" "$TARGET_DIR/$name.log"
-        else
-          echo "::warning ::Log file not found: $BASE_DIR/$name/$name.log"
-          continue
-        fi
-      fi
-
-      # send logs to loki
-      if [[ -d "$LOKI_DIR_FOR_NATIVE_LOGS" && -f "$TARGET_DIR/$name.log" ]]; then
-        awk -v NS="$NS" -v NAME="$name" '{print NS" "NAME" " $0}' "$TARGET_DIR/$name.log" >> "$LOKI_DIR_FOR_NATIVE_LOGS/to-loki.log"
-        local_to=$(($(date +%s%3N) + 60000))
-      fi
-    fi
-    echo -e "\t$name: $(make_url "$name" "$local_to")"
-  done
-  echo ""
-
-  # Handle teyrchains grouped by paraId
-  jq -r '.paras // .teyrchains | to_entries[] | "\(.key)"' "$ZOMBIE_JSON" | while read -r para_id; do
-    echo "ParaId: $para_id"
-    jq -r --arg pid "$para_id" "$JQ_QUERY_PARA_NODES" "$ZOMBIE_JSON" | while read -r name; do
-      [[ -z "$name" ]] && continue
-      local_to=$TO
-      if [[ "${ZOMBIE_PROVIDER:-}" == "k8s" ]]; then
-        # Fetching logs from k8s
-        if ! kubectl logs "$name" -c "$name" -n "$NS" > "$TARGET_DIR/$name.log" 2>&1; then
-          echo "::warning ::Failed to fetch logs for $name"
-        fi
-      else
-        # zombienet v1 dump the logs to the `/logs` directory
-        if [[ ! -f "$TARGET_DIR/$name.log" ]]; then
-          # `sdk` use this pattern to store the logs in native provider
-          if [[ -f "$BASE_DIR/$name/$name.log" ]]; then
-            cp "$BASE_DIR/$name/$name.log" "$TARGET_DIR/$name.log"
-          else
-            echo "::warning ::Log file not found: $BASE_DIR/$name/$name.log"
-            continue
-          fi
-        fi
-
-        # send logs to loki
-        if [[ -d "$LOKI_DIR_FOR_NATIVE_LOGS" && -f "$TARGET_DIR/$name.log" ]]; then
-          awk -v NS="$NS" -v NAME="$name" '{print NS" "NAME" " $0}' "$TARGET_DIR/$name.log" >> "$LOKI_DIR_FOR_NATIVE_LOGS/to-loki.log"
-          local_to=$(($(date +%s%3N) + 60000))
-        fi
-      fi
-      echo -e "\t$name: $(make_url "$name" "$local_to")"
-    done
-    echo ""
-  done
-}
-
-# Main execution - Process all zombie-* directories (supports rstest with multiple tests per job)
-BASE_DIRS=$(ls -dt /tmp/zombie-* 2>/dev/null || true)
-
-if [[ -z "$BASE_DIRS" ]]; then
-  echo "No zombie directories found in /tmp/zombie-*"
-  exit 0
-fi
-
-for BASE_DIR in $BASE_DIRS; do
-  echo "Processing directory: $BASE_DIR"
-  
-  # Make sure target directory exists
-  TARGET_DIR="$BASE_DIR/logs"
-  mkdir -p "$TARGET_DIR"
-  ZOMBIE_JSON="$BASE_DIR/zombie.json"
-
-  if [[ ! -f "$ZOMBIE_JSON" ]]; then
-    echo "Zombie file $ZOMBIE_JSON not present, calling fallback"
-    process_logs_from_fallback "$BASE_DIR" "$TARGET_DIR"
-  else
-    # we have a zombie.json file, let process it
-    echo "Processing logs from zombie.json"
-    process_logs_from_zombie_file "$BASE_DIR" "$TARGET_DIR" "$ZOMBIE_JSON"
-  fi
-  echo ""
-done
-
-# sleep for a minute to give alloy time to forward logs
-sleep 60
@@ -1,85 +0,0 @@
-#!/usr/bin/env bash
-
-# This script executes a given zombienet test for the `native` provider.
-# It is equivalent to running run-test-local-env-manager.sh for the `k8s` provider.
-
-function run_test {
-  cd "${OUTPUT_DIR}"
-  for i in $(find ${OUTPUT_DIR} -name "${TEST_TO_RUN}"| head -1); do
-    TEST_FOUND=1
-    # in order to let native provider work properly we need
-    # to unset ZOMBIENET_IMAGE, which controls 'inCI' internal flag.
-    # ZOMBIENET_IMAGE not set && RUN_IN_CONTAINER=0 => inCI=false
-    # Apparently inCI=true works properly only with k8s provider
-    unset ZOMBIENET_IMAGE
-    if [ -z "$ZOMBIE_BASE_DIR" ]; then
-      ${ZOMBIE_COMMAND} -p native -c $CONCURRENCY test $i
-    else
-      ${ZOMBIE_COMMAND} -p native -c $CONCURRENCY -d $ZOMBIE_BASE_DIR -f test $i
-    fi;
-    EXIT_STATUS=$?
-  done;
-  if [[ $TEST_FOUND -lt 1 ]]; then
-    EXIT_STATUS=1
-  fi;
-}
-
-function create_isolated_dir {
-  TS=$(date +%s)
-  ISOLATED=${OUTPUT_DIR}/${TS}
-  mkdir -p ${ISOLATED}
-  OUTPUT_DIR="${ISOLATED}"
-}
-
-function copy_to_isolated {
-  cd "${SCRIPT_PATH}"
-  echo $(pwd)
-  cp -r "${LOCAL_DIR}"/* "${OUTPUT_DIR}"
-}
-
-function rm_isolated_dir {
-  echo "Removing ${OUTPUT_DIR}"
-  rm -rf "${OUTPUT_DIR}"
-}
-
-function log {
-  local lvl msg fmt
-  lvl=$1 msg=$2
-  fmt='+%Y-%m-%d %H:%M:%S'
-  lg_date=$(date "${fmt}")
-  if [[ "${lvl}" = "DIE" ]] ; then
-    lvl="ERROR"
-   echo -e "\n${lg_date} - ${lvl} - ${msg}"
-   exit 1
-  else
-    echo -e "\n${lg_date} - ${lvl} - ${msg}"
-  fi
-}
-
-set -x
-
-SCRIPT_NAME="$0"
-SCRIPT_PATH=$(dirname "$0")               # relative
-SCRIPT_PATH=$(cd "${SCRIPT_PATH}" && pwd) # absolutized and normalized
-
-ZOMBIE_COMMAND=zombie
-
-EXIT_STATUS=0
-
-# args
-LOCAL_DIR="$1"
-CONCURRENCY="$2"
-TEST_TO_RUN="$3"
-ZOMBIE_BASE_DIR="$4"
-
-cd "${SCRIPT_PATH}"
-
-OUTPUT_DIR="${SCRIPT_PATH}"
-
-create_isolated_dir
-copy_to_isolated
-run_test
-rm_isolated_dir
-
-log INFO "Exit status is ${EXIT_STATUS}"
-exit "${EXIT_STATUS}"