Skip to content

Commit 5c7fd6d

Browse files
committed
Add retry logic with exponential backoff to zombienet SDK tests
1 parent 23cac32 commit 5c7fd6d

File tree

3 files changed

+42
-7
lines changed

3 files changed

+42
-7
lines changed

.github/actions/zombienet-sdk/action.yml

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ inputs:
1919
gh-token:
2020
description: "GITHUB_TOKEN to use for downloading artifacts"
2121
required: true
22+
max-retries:
23+
description: "Maximum number of test retry attempts (1 = no retries)"
24+
required: false
25+
default: "3"
26+
initial-delay-seconds:
27+
description: "Initial delay in seconds before first retry (doubles with each attempt)"
28+
required: false
29+
default: "10"
2230

2331

2432

@@ -81,10 +89,11 @@ runs:
8189
- name: zombie_test
8290
shell: bash
8391
env:
84-
# don't retry sdk tests
8592
NEXTEST_RETRIES: 0
8693
TEST_FILTER: ${{ inputs.test-filter }}
8794
PREFIX: ${{ inputs.prefix }}
95+
MAX_RETRIES: ${{ inputs.max-retries }}
96+
INITIAL_DELAY: ${{ inputs.initial-delay-seconds }}
8897
run: |
8998
# RUN_IN_CI=1 shall be set only for k8s provider
9099
if [[ "$ZOMBIE_PROVIDER" == "native" ]]; then
@@ -101,7 +110,33 @@ runs:
101110
fi
102111
103112
ls -ltr ./artifacts
104-
# We want to run tests sequentially, '--no-capture' ensures that.
105-
# If we want to get rid of '--no-capture' some day, please use '--test-threads 1' or NEXTEST_TEST_THREADS=1
106-
# Both options cannot coexist for cargo-nextest below v0.9.94
107-
cargo nextest run --archive-file ./artifacts/${PREFIX}-zombienet-tests.tar.zst --no-capture -- ${TEST_FILTER}
113+
114+
# Retry logic with exponential backoff
115+
delay=$INITIAL_DELAY
116+
for attempt in $(seq 1 $MAX_RETRIES); do
117+
echo "::group::Test attempt $attempt of $MAX_RETRIES"
118+
echo "Test filter: ${TEST_FILTER}"
119+
120+
# We want to run tests sequentially, '--no-capture' ensures that.
121+
# If we want to get rid of '--no-capture' some day, please use '--test-threads 1' or NEXTEST_TEST_THREADS=1
122+
# Both options cannot coexist for cargo-nextest below v0.9.94
123+
if cargo nextest run --archive-file ./artifacts/${PREFIX}-zombienet-tests.tar.zst --no-capture -- ${TEST_FILTER}; then
124+
echo "✅ Test passed on attempt $attempt"
125+
echo "::endgroup::"
126+
exit 0
127+
else
128+
exit_code=$?
129+
echo "❌ Attempt $attempt failed with exit code $exit_code"
130+
echo "::endgroup::"
131+
132+
if [[ $attempt -lt $MAX_RETRIES ]]; then
133+
echo "⏳ Retrying in ${delay}s (exponential backoff)..."
134+
sleep "$delay"
135+
# Double the delay for next attempt (exponential backoff)
136+
delay=$((delay * 2))
137+
else
138+
echo "::error::Test '${TEST_FILTER}' failed after $MAX_RETRIES attempts"
139+
exit 1
140+
fi
141+
fi
142+
done

.github/scripts/process-logs-zombienet.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,4 +210,4 @@ for BASE_DIR in $BASE_DIRS; do
210210
done
211211

212212
# sleep for a minute to give alloy time to forward logs
213-
sleep 60
213+
sleep 240 # 4 minutes

substrate/zombienet/zombienet-sdk/tests/zombie_ci/block_building_warp_sync.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const ROLE_TIMEOUT_SECS: u64 = 60;
1515
const PEER_TIMEOUT_SECS: u64 = 60;
1616
const BOOTSTRAP_TIMEOUT_SECS: u64 = 180;
1717
const METRIC_TIMEOUT_SECS: u64 = 60;
18-
const NEW_BLOCK_TIMEOUT_SECS: u64 = 120;
18+
const NEW_BLOCK_TIMEOUT_SECS: u64 = 10;
1919
const LOG_ERROR_TIMEOUT_SECS: u64 = 10;
2020
const BEEFY_SYNC_TIMEOUT_SECS: u64 = 180;
2121
const BEEFY_PROGRESS_TIMEOUT_SECS: u64 = 60;

0 commit comments

Comments
 (0)