Skip to content

Commit 50263a5

Browse files
authored
Merge pull request #3039 from IntersectMBO/better_bash_err_handling
fix(ci): improve error handling and reporting in scripts
2 parents 640d313 + c6155f0 commit 50263a5

File tree

11 files changed

+78
-70
lines changed

11 files changed

+78
-70
lines changed

.github/grep_errors.sh

100644100755
Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
#!/usr/bin/env bash
22

33
ARTIFACTS_DIR="${ARTIFACTS_DIR:-".artifacts"}"
4-
ERR_LOGFILE="$PWD/errors_all.log"
4+
ERR_LOGFILE="${PWD}/errors_all.log"
55

6-
# shellcheck disable=SC2012
7-
pushd "$ARTIFACTS_DIR" > /dev/null || { echo "Cannot switch to $ARTIFACTS_DIR"; ls -1a "$ARTIFACTS_DIR"; exit 1; } > "$ERR_LOGFILE"
8-
grep -r --include "*.stdout" --include "*.stderr" -Ei ":error:|failed|failure" . > "$ERR_LOGFILE"
9-
popd > /dev/null || exit 1
6+
cd "$ARTIFACTS_DIR" || { echo "Cannot switch to $ARTIFACTS_DIR" >&2; exit 1; }
7+
grep -r --include "*.stdout" --include "*.stderr" -Ei ":error:|failed|failure" . > "$ERR_LOGFILE" || :

.github/node_upgrade.sh

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
# BASE_REVISION - revision of cardano-node to upgrade from (alternative to BASE_TAR_URL)
77
# UPGRADE_REVISION - revision of cardano-node to upgrade to
88

9-
set -euo pipefail
9+
set -Eeuo pipefail
10+
trap 'echo "Error at line $LINENO"' ERR
1011

1112
if [[ -z "${BASE_TAR_URL:-""}" && -z "${BASE_REVISION:-""}" ]]; then
1213
echo "BASE_TAR_URL or BASE_REVISION must be set"
@@ -16,6 +17,8 @@ fi
1617
nix --version
1718
df -h .
1819

20+
retval=0
21+
1922
REPODIR="$(readlink -m "${0%/*}/..")"
2023
export REPODIR
2124
cd "$REPODIR"
@@ -27,13 +30,15 @@ export WORKDIR="$REPODIR/run_workdir"
2730

2831
_cleanup() {
2932
# stop all running cluster instances
30-
stop_instances "$WORKDIR"
33+
stop_instances "$WORKDIR" || :
3134
}
3235

3336
_cleanup
3437

3538
# cleanup on Ctrl+C
36-
trap 'set +e; _cleanup; exit 130' SIGINT
39+
trap '_cleanup; exit 130' SIGINT
40+
# cleanup on error
41+
trap 'echo "Error at line $LINENO"; _cleanup' ERR
3742

3843
# create clean workdir
3944
rm -rf "${WORKDIR:?}"
@@ -78,11 +83,11 @@ else
7883
NODE_OVERRIDE=$(node_override)
7984
fi
8085

81-
set +e
8286
# shellcheck disable=SC2086
8387
nix flake update --accept-flake-config $NODE_OVERRIDE
8488
# shellcheck disable=SC2016
8589
nix develop --accept-flake-config .#venv --command bash -c '
90+
set -euo pipefail
8691
: > "$WORKDIR/.nix_step1"
8792
echo "::endgroup::" # end group for "Nix env setup step1"
8893

@@ -95,9 +100,10 @@ nix develop --accept-flake-config .#venv --command bash -c '
95100
printf "start: %(%H:%M:%S)T\n" -1
96101
df -h .
97102
# prepare scripts for stating cluster instance, start cluster instance, run smoke tests
98-
./.github/node_upgrade_pytest.sh step1
99-
'
100-
retval="$?"
103+
retval=0
104+
./.github/node_upgrade_pytest.sh step1 || retval="$?"
105+
exit "$retval"
106+
' || retval="$?"
101107
102108
if [ ! -e "$WORKDIR/.nix_step1" ]; then
103109
echo "Nix env setup failed, exiting"
@@ -123,6 +129,7 @@ fi
123129
nix flake update --accept-flake-config $NODE_OVERRIDE
124130
# shellcheck disable=SC2016
125131
nix develop --accept-flake-config .#venv --command bash -c '
132+
set -euo pipefail
126133
: > "$WORKDIR/.nix_step2"
127134
echo "::endgroup::" # end group for "Nix env setup steps 2 & 3"
128135

@@ -135,8 +142,8 @@ nix develop --accept-flake-config .#venv --command bash -c '
135142
printf "start: %(%H:%M:%S)T\n" -1
136143
df -h .
137144
# update cluster nodes, run smoke tests
138-
./.github/node_upgrade_pytest.sh step2
139-
retval="$?"
145+
retval=0
146+
./.github/node_upgrade_pytest.sh step2 || retval="$?"
140147
# retval 0 == all tests passed; 1 == some tests failed; > 1 == some runtime error and we dont want to continue
141148
[ "$retval" -le 1 ] || exit "$retval"
142149
echo "::endgroup::" # end group for "Testrun Step2"
@@ -145,35 +152,32 @@ nix develop --accept-flake-config .#venv --command bash -c '
145152
printf "start: %(%H:%M:%S)T\n" -1
146153
df -h .
147154
# update to Conway, run smoke tests
148-
./.github/node_upgrade_pytest.sh step3
149-
retval="$?"
155+
retval=0
156+
./.github/node_upgrade_pytest.sh step3 || retval="$?"
150157
df -h .
151158
echo "::endgroup::" # end group for "Testrun Step3"
152159

153160
echo "::group::Teardown cluster & collect artifacts"
154161
printf "start: %(%H:%M:%S)T\n" -1
155162
# teardown cluster
156-
./.github/node_upgrade_pytest.sh finish
163+
./.github/node_upgrade_pytest.sh finish || :
157164
exit $retval
158-
'
159-
retval="$?"
165+
' || retval="$?"
160166
161167
if [ ! -e "$WORKDIR/.nix_step2" ]; then
162168
echo "Nix env setup failed, exiting"
163169
exit 1
164170
fi
165171
166172
# grep testing artifacts for errors
167-
# shellcheck disable=SC1090,SC1091
168-
. .github/grep_errors.sh
173+
./.github/grep_errors.sh
169174
170175
_cleanup
171176
172177
# prepare artifacts for upload in Github Actions
173178
if [ -n "${GITHUB_ACTIONS:-""}" ]; then
174179
# save testing artifacts
175-
# shellcheck disable=SC1090,SC1091
176-
. .github/save_artifacts.sh
180+
./.github/save_artifacts.sh
177181
178182
# compress scheduling log
179183
xz "$SCHEDULING_LOG"

.github/node_upgrade_pytest.sh

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#!/usr/bin/env bash
22

3-
set -uo pipefail
3+
set -Eeuo pipefail
4+
trap 'echo "Error at line $LINENO"' ERR
45

5-
retval=1
6+
retval=0
67

78
export CARDANO_NODE_SOCKET_PATH="$CARDANO_NODE_SOCKET_PATH_CI"
89

@@ -77,6 +78,7 @@ if [ "$1" = "step1" ]; then
7778

7879
# run smoke tests
7980
printf "STEP1 tests: %(%H:%M:%S)T\n" -1
81+
retval=0
8082
pytest \
8183
cardano_node_tests \
8284
-n "$TEST_THREADS" \
@@ -85,8 +87,8 @@ if [ "$1" = "step1" ]; then
8587
--cli-coverage-dir="$COVERAGE_DIR" \
8688
--alluredir="$REPORTS_DIR" \
8789
--html=testrun-report-step1.html \
88-
--self-contained-html
89-
retval="$?"
90+
--self-contained-html \
91+
|| retval="$?"
9092

9193
# stop local cluster if tests failed unexpectedly
9294
[ "$retval" -le 1 ] || "$CLUSTER_SCRIPTS_DIR/stop-cluster"
@@ -232,14 +234,15 @@ elif [ "$1" = "step2" ]; then
232234
[ "$sync_progress" = "100.00" ] || { echo "Failed to sync node" >&2; exit 6; } # assert
233235

234236
# Test for ignoring expected errors in log files. Run separately to make sure it runs first.
235-
pytest cardano_node_tests/tests/test_node_upgrade.py -k test_ignore_log_errors
236-
err_retval="$?"
237+
err_retval=0
238+
pytest cardano_node_tests/tests/test_node_upgrade.py -k test_ignore_log_errors || err_retval="$?"
237239

238240
# Update Plutus cost models.
239241
pytest cardano_node_tests/tests/test_node_upgrade.py -k test_update_cost_models || exit 6
240242

241243
# run smoke tests
242244
printf "STEP2 tests: %(%H:%M:%S)T\n" -1
245+
retval=0
243246
pytest \
244247
cardano_node_tests \
245248
-n "$TEST_THREADS" \
@@ -248,8 +251,8 @@ elif [ "$1" = "step2" ]; then
248251
--cli-coverage-dir="$COVERAGE_DIR" \
249252
--alluredir="$REPORTS_DIR" \
250253
--html=testrun-report-step2.html \
251-
--self-contained-html
252-
retval="$?"
254+
--self-contained-html \
255+
||retval="$?"
253256

254257
# stop local cluster if tests failed unexpectedly
255258
[ "$retval" -le 1 ] || "$CLUSTER_SCRIPTS_DIR/stop-cluster"
@@ -350,14 +353,15 @@ elif [ "$1" = "step3" ]; then
350353
[ "$sync_progress" = "100.00" ] || { echo "Failed to sync node" >&2; exit 6; } # assert
351354

352355
# Test for ignoring expected errors in log files. Run separately to make sure it runs first.
353-
pytest cardano_node_tests/tests/test_node_upgrade.py -k test_ignore_log_errors
354-
err_retval="$?"
356+
err_retval=0
357+
pytest cardano_node_tests/tests/test_node_upgrade.py -k test_ignore_log_errors || err_retval="$?"
355358

356359
# Hard fork to PV10.
357360
pytest cardano_node_tests/tests/test_node_upgrade.py -k test_hardfork || exit 6
358361

359362
# Run smoke tests
360363
printf "STEP3 tests: %(%H:%M:%S)T\n" -1
364+
retval=0
361365
pytest \
362366
cardano_node_tests \
363367
-n "$TEST_THREADS" \
@@ -366,8 +370,8 @@ elif [ "$1" = "step3" ]; then
366370
--cli-coverage-dir="$COVERAGE_DIR" \
367371
--alluredir="$REPORTS_DIR" \
368372
--html=testrun-report-step3.html \
369-
--self-contained-html
370-
retval="$?"
373+
--self-contained-html \
374+
||retval="$?"
371375

372376
# create results archive for step3
373377
./.github/results.sh .

.github/regression.sh

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#! /usr/bin/env -S nix develop --accept-flake-config .#base -c bash
22
# shellcheck shell=bash disable=SC2317
33

4-
set -euo pipefail
4+
set -Eeuo pipefail
5+
trap 'echo "Error at line $LINENO"' ERR
56

67
nix --version
78
df -h .
89

10+
retval=0
11+
912
DEFAULT_CLUSTER_ERA="conway"
1013

1114
REPODIR="$(readlink -m "${0%/*}/..")"
@@ -177,14 +180,15 @@ _cleanup_testnet_on_interrupt() {
177180
echo "::endgroup::"
178181
}
179182
180-
# cleanup on Ctrl+C
183+
# cleanup on Ctrl+C or error
181184
_interrupted() {
182185
# Do testnet cleanup only on interrupted testrun. When not interrupted,
183186
# cleanup is done as part of a testrun.
184-
_cleanup_testnet_on_interrupt
187+
_cleanup_testnet_on_interrupt || :
185188
_cleanup
186189
}
187-
trap 'set +e; _interrupted; exit 130' SIGINT
190+
trap '_interrupted; exit 130' SIGINT
191+
trap 'echo "Error at line $LINENO"; _interrupted' ERR
188192
189193
echo "::endgroup::" # end group for "Script setup"
190194
@@ -202,12 +206,13 @@ if [ "$(echo "$PWD"/.bin/*)" != "${PWD}/.bin/*" ]; then
202206
echo
203207
fi
204208
205-
# run tests and generate report
206-
set +e
209+
# Run tests and generate report
210+
207211
# shellcheck disable=SC2046,SC2119
208212
nix flake update --accept-flake-config $(node_override)
209213
# shellcheck disable=SC2016
210214
nix develop --accept-flake-config .#venv --command bash -c '
215+
set -euo pipefail
211216
echo "::endgroup::" # end group for "Nix env setup"
212217

213218
echo "::group::Python venv setup"
@@ -220,8 +225,8 @@ nix develop --accept-flake-config .#venv --command bash -c '
220225
df -h .
221226
export PATH="$PATH_APPEND":"$PATH"
222227
export CARDANO_NODE_SOCKET_PATH="$CARDANO_NODE_SOCKET_PATH_CI"
223-
make "${MAKE_TARGET:-"tests"}"
224-
retval="$?"
228+
retval=0
229+
make "${MAKE_TARGET:-"tests"}" || retval="$?"
225230
df -h .
226231
echo "::endgroup::" # end group for "Testrun"
227232

@@ -230,12 +235,10 @@ nix develop --accept-flake-config .#venv --command bash -c '
230235
./.github/cli_coverage.sh
231236
./.github/reqs_coverage.sh
232237
exit "$retval"
233-
'
234-
retval="$?"
238+
' || retval="$?"
235239
236240
# grep testing artifacts for errors
237-
# shellcheck disable=SC1090,SC1091
238-
. .github/grep_errors.sh
241+
./.github/grep_errors.sh
239242
240243
# Don't stop cluster instances just yet if KEEP_CLUSTERS_RUNNING is set to 1.
241244
# After any key is pressed, resume this script and stop all running cluster instances.
@@ -260,8 +263,7 @@ if [ -n "${GITHUB_ACTIONS:-""}" ]; then
260263
./.github/results.sh
261264

262265
# save testing artifacts
263-
# shellcheck disable=SC1090,SC1091
264-
. .github/save_artifacts.sh
266+
./.github/save_artifacts.sh
265267

266268
# compress scheduling log
267269
xz "$SCHEDULING_LOG"

.github/save_artifacts.sh

100644100755
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#!/usr/bin/env bash
22

3+
set -euo pipefail
4+
35
ARTIFACTS_DIR="${ARTIFACTS_DIR:-".artifacts"}"
4-
ARTIFACTS_TAR="$PWD/testing_artifacts.tar.xz"
6+
ARTIFACTS_TAR="${PWD}/testing_artifacts.tar.xz"
57

68
NEW_DIR="artifacts_$(date +%Y%m%d%H%M%S)"
79
mv "$ARTIFACTS_DIR" "$NEW_DIR" || { echo "Cannot move $ARTIFACTS_DIR to $NEW_DIR"; ls -1a; exit 1; }

.github/setup_venv.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/bin/bash
22

3+
set -euo pipefail
4+
35
_VENV_DIR="${_VENV_DIR:-"$WORKDIR/.venv"}"
46

57
if [ "${1:-""}" = "clean" ]; then

cardano_node_tests/cluster_scripts/conway/start-cluster

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
# PV9 - if set, will use protocol version 9
1111
# DRY_RUN - if set, will not start the cluster
1212

13-
set -euo pipefail
14-
sets="$-"
13+
set -Eeuo pipefail
14+
trap 'echo "Error at line $LINENO"' ERR
1515

1616
SCRIPT_DIR="$(readlink -m "${0%/*}")"
1717
SOCKET_PATH="$(readlink -m "$CARDANO_NODE_SOCKET_PATH")"
@@ -64,10 +64,8 @@ cardano_cli_log() {
6464
echo cardano-cli "$@" >> "$START_CLUSTER_LOG"
6565

6666
for _ in {1..3}; do
67-
set +e
68-
out="$(cardano-cli "$@" 2>&1)"
69-
retval="$?"
70-
set -"$sets"
67+
retval=0
68+
{ out="$(cardano-cli "$@" 2>&1)"; } || retval="$?"
7169

7270
case "$out" in
7371
*"resource vanished"*)

cardano_node_tests/cluster_scripts/conway_fast/start-cluster

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
# PV9 - if set, will use protocol version 9
1111
# DRY_RUN - if set, will not start the cluster
1212

13-
set -euo pipefail
14-
sets="$-"
13+
set -Eeuo pipefail
14+
trap 'echo "Error at line $LINENO"' ERR
1515

1616
SCRIPT_DIR="$(readlink -m "${0%/*}")"
1717
SOCKET_PATH="$(readlink -m "$CARDANO_NODE_SOCKET_PATH")"
@@ -69,10 +69,8 @@ cardano_cli_log() {
6969
echo cardano-cli "$@" >> "$START_CLUSTER_LOG"
7070

7171
for _ in {1..3}; do
72-
set +e
73-
out="$(cardano-cli "$@" 2>&1)"
74-
retval="$?"
75-
set -"$sets"
72+
retval=0
73+
{ out="$(cardano-cli "$@" 2>&1)"; } || retval="$?"
7674

7775
case "$out" in
7876
*"resource vanished"*)

0 commit comments

Comments
 (0)