Skip to content

Commit e78c64a

Browse files
committed
Retry timeout-only Stage A parity blocks once in CI
1 parent b794585 commit e78c64a

File tree

2 files changed

+87
-19
lines changed

2 files changed

+87
-19
lines changed

.github/workflows/matlab-parity-gate.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ jobs:
2424
TMPDIR: /Users/iahncajigas/actions-runner/_work/_temp
2525
NSTAT_MATLAB_EXTRA_ARGS: -maca64 -nodisplay -noFigureWindows -softwareopengl
2626
NSTAT_FORCE_M_HELP_SCRIPTS: "1"
27+
NSTAT_PARITY_RETRY_TIMEOUT_BLOCKS: "1"
28+
NSTAT_PARITY_TIMEOUT_RETRY_BLOCKS: timeout_front
2729
steps:
2830
- name: Prepare runner directories
2931
run: |

python/tools/run_parity_ladder.sh

Lines changed: 85 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
55
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
66
PYTHON_BIN="${PYTHON_BIN:-python3}"
77
MATLAB_EXTRA_ARGS="${NSTAT_MATLAB_EXTRA_ARGS:--maca64 -nodisplay -noFigureWindows -softwareopengl}"
8+
MATLAB_BIN="${NSTAT_MATLAB_BIN:-/Applications/MATLAB_R2025b.app/bin/matlab}"
89
SET_ACTIONS_RUNNER_SVC="${NSTAT_SET_ACTIONS_RUNNER_SVC:-1}"
910
RUNTIME_MULTIPLIER="${NSTAT_PARITY_RUNTIME_MULTIPLIER:-2.5}"
11+
RETRY_TIMEOUT_BLOCKS="${NSTAT_PARITY_RETRY_TIMEOUT_BLOCKS:-0}"
12+
TIMEOUT_RETRY_BLOCKS="${NSTAT_PARITY_TIMEOUT_RETRY_BLOCKS:-timeout_front}"
1013

1114
DEFAULT_BLOCKS=(core_smoke timeout_front graphics_mid heavy_tail full_suite)
1215
if [[ $# -gt 0 ]]; then
@@ -26,13 +29,58 @@ baseline_runtime_sum_s() {
2629
esac
2730
}
2831

32+
block_retry_enabled() {
33+
local block="$1"
34+
[[ "${RETRY_TIMEOUT_BLOCKS}" == "1" ]] || return 1
35+
local token
36+
for token in ${TIMEOUT_RETRY_BLOCKS//,/ }; do
37+
[[ "${token}" == "${block}" ]] && return 0
38+
done
39+
return 1
40+
}
41+
42+
is_timeout_only_regression() {
43+
local report_path="$1"
44+
"${PYTHON_BIN}" - "${report_path}" <<'PY'
45+
import json
46+
import sys
47+
from pathlib import Path
48+
49+
path = Path(sys.argv[1])
50+
if not path.exists():
51+
raise SystemExit(1)
52+
payload = json.loads(path.read_text(encoding="utf-8"))
53+
rows = payload.get("helpfile_similarity", {}).get("rows", [])
54+
if not rows:
55+
raise SystemExit(1)
56+
failed = [r for r in rows if not bool(r.get("matlab_ok"))]
57+
if not failed or len(failed) != len(rows):
58+
raise SystemExit(1)
59+
if not all(str(r.get("matlab_error", "")).strip() == "matlab_timeout" for r in failed):
60+
raise SystemExit(1)
61+
topics = [str(r.get("topic", "")) for r in failed]
62+
print(f"[ladder] timeout-only regression detected across {len(topics)} topic(s): {topics}")
63+
raise SystemExit(0)
64+
PY
65+
}
66+
67+
warmup_matlab() {
68+
if [[ ! -x "${MATLAB_BIN}" ]]; then
69+
echo "[ladder] matlab warmup skipped; binary not executable: ${MATLAB_BIN}"
70+
return 0
71+
fi
72+
echo "[ladder] running matlab warmup before retry"
73+
"${MATLAB_BIN}" ${MATLAB_EXTRA_ARGS} -batch "disp(version); exit" >/dev/null 2>&1 || true
74+
}
75+
2976
cd "${REPO_ROOT}"
3077

3178
echo "[ladder] repo: ${REPO_ROOT}"
3279
echo "[ladder] python: ${PYTHON_BIN}"
3380
echo "[ladder] matlab args: ${MATLAB_EXTRA_ARGS}"
3481
echo "[ladder] blocks: ${BLOCKS[*]}"
3582
echo "[ladder] runtime multiplier: ${RUNTIME_MULTIPLIER} (<=0 disables runtime regression checks)"
83+
echo "[ladder] retry timeout-only blocks: ${RETRY_TIMEOUT_BLOCKS} (blocks: ${TIMEOUT_RETRY_BLOCKS})"
3684

3785
for block in "${BLOCKS[@]}"; do
3886
if ! baseline_s="$(baseline_runtime_sum_s "${block}")"; then
@@ -41,27 +89,32 @@ for block in "${BLOCKS[@]}"; do
4189
fi
4290

4391
echo "[ladder] running block: ${block}"
44-
45-
cmd=(
46-
"${PYTHON_BIN}"
47-
"${REPO_ROOT}/python/tools/debug_parity_blocks.py"
48-
--blocks "${block}"
49-
--matlab-extra-args "${MATLAB_EXTRA_ARGS}"
50-
--output "python/reports/parity_block_benchmark_report_ladder_${block}.json"
51-
)
52-
if [[ "${SET_ACTIONS_RUNNER_SVC}" == "1" ]]; then
53-
cmd+=(--set-actions-runner-svc)
54-
fi
55-
56-
"${cmd[@]}"
57-
5892
report_path="${REPO_ROOT}/python/reports/parity_block_${block}.json"
59-
if [[ ! -f "${report_path}" ]]; then
60-
echo "[ladder] missing report: ${report_path}" >&2
61-
exit 3
93+
max_attempts=1
94+
if block_retry_enabled "${block}"; then
95+
max_attempts=2
6296
fi
63-
64-
"${PYTHON_BIN}" - "${report_path}" "${block}" "${baseline_s}" "${RUNTIME_MULTIPLIER}" <<'PY'
97+
attempt=1
98+
while true; do
99+
cmd=(
100+
"${PYTHON_BIN}"
101+
"${REPO_ROOT}/python/tools/debug_parity_blocks.py"
102+
--blocks "${block}"
103+
--matlab-extra-args "${MATLAB_EXTRA_ARGS}"
104+
--output "python/reports/parity_block_benchmark_report_ladder_${block}.json"
105+
)
106+
if [[ "${SET_ACTIONS_RUNNER_SVC}" == "1" ]]; then
107+
cmd+=(--set-actions-runner-svc)
108+
fi
109+
110+
"${cmd[@]}"
111+
112+
if [[ ! -f "${report_path}" ]]; then
113+
echo "[ladder] missing report: ${report_path}" >&2
114+
exit 3
115+
fi
116+
117+
if "${PYTHON_BIN}" - "${report_path}" "${block}" "${baseline_s}" "${RUNTIME_MULTIPLIER}" <<'PY'
65118
import json
66119
import sys
67120
from pathlib import Path
@@ -133,6 +186,19 @@ if mult > 0:
133186
134187
print(f"[ladder] block passed: {block}")
135188
PY
189+
then
190+
break
191+
fi
192+
193+
rc=$?
194+
if [[ "${rc}" -eq 10 ]] && [[ "${attempt}" -lt "${max_attempts}" ]] && is_timeout_only_regression "${report_path}"; then
195+
echo "[ladder] retrying block ${block} after timeout-only regression (attempt ${attempt}/${max_attempts})"
196+
warmup_matlab
197+
attempt=$((attempt + 1))
198+
continue
199+
fi
200+
exit "${rc}"
201+
done
136202

137203
done
138204

0 commit comments

Comments
 (0)