Skip to content

Commit 2ee1c6a

Browse files
committed
Add parity preflight wrapper and retry telemetry
1 parent e78c64a commit 2ee1c6a

File tree

4 files changed

+171
-2
lines changed

4 files changed

+171
-2
lines changed

.github/workflows/matlab-parity-gate.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ jobs:
2222
AGENT_TOOLSDIRECTORY: /Users/iahncajigas/actions-runner/_work/_tool
2323
RUNNER_TEMP: /Users/iahncajigas/actions-runner/_work/_temp
2424
TMPDIR: /Users/iahncajigas/actions-runner/_work/_temp
25+
ACTIONS_RUNNER_SVC: "1"
2526
NSTAT_MATLAB_EXTRA_ARGS: -maca64 -nodisplay -noFigureWindows -softwareopengl
2627
NSTAT_FORCE_M_HELP_SCRIPTS: "1"
2728
NSTAT_PARITY_RETRY_TIMEOUT_BLOCKS: "1"
@@ -133,4 +134,5 @@ jobs:
133134
python/reports/python_vs_matlab_similarity_baseline.json
134135
python/reports/python_vs_matlab_similarity_summary.json
135136
python/reports/parity_block*.json
137+
python/reports/parity_retry_summary.json
136138
python/reports/matlab_crash_diagnostics/**

python/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ Single wrapper command (fail-fast ladder):
7070
python/tools/run_parity_ladder.sh
7171
```
7272

73+
Single preflight command (Stage A ladder + selected Stage B topics):
74+
75+
```bash
76+
python/tools/run_parity_preflight.sh
77+
```
78+
7379
Notes:
7480

7581
- Runs blocks in order: `core_smoke -> timeout_front -> graphics_mid -> heavy_tail -> full_suite`.
@@ -78,6 +84,9 @@ Notes:
7884
- Set `NSTAT_PARITY_RUNTIME_MULTIPLIER=0` to disable runtime regression checks.
7985
- Pass specific block names as args to run subset ladders, e.g.:
8086
`python/tools/run_parity_ladder.sh core_smoke timeout_front`.
87+
- Ladder writes retry telemetry to `python/reports/parity_retry_summary.json` (block, attempt count, retry reason, timeout-topic list).
88+
- Retry behavior is controlled by `NSTAT_PARITY_RETRY_TIMEOUT_BLOCKS` and `NSTAT_PARITY_TIMEOUT_RETRY_BLOCKS`.
89+
- Preflight topic selection can be overridden with `NSTAT_PARITY_PREFLIGHT_STAGEB_TOPICS`.
8190

8291
Use targeted blocks to debug delays locally before running remote CI:
8392

python/tools/run_parity_ladder.sh

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SET_ACTIONS_RUNNER_SVC="${NSTAT_SET_ACTIONS_RUNNER_SVC:-1}"
1010
RUNTIME_MULTIPLIER="${NSTAT_PARITY_RUNTIME_MULTIPLIER:-2.5}"
1111
RETRY_TIMEOUT_BLOCKS="${NSTAT_PARITY_RETRY_TIMEOUT_BLOCKS:-0}"
1212
TIMEOUT_RETRY_BLOCKS="${NSTAT_PARITY_TIMEOUT_RETRY_BLOCKS:-timeout_front}"
13+
RETRY_SUMMARY_PATH="${NSTAT_PARITY_RETRY_SUMMARY_PATH:-python/reports/parity_retry_summary.json}"
1314

1415
DEFAULT_BLOCKS=(core_smoke timeout_front graphics_mid heavy_tail full_suite)
1516
if [[ $# -gt 0 ]]; then
@@ -73,14 +74,108 @@ warmup_matlab() {
7374
"${MATLAB_BIN}" ${MATLAB_EXTRA_ARGS} -batch "disp(version); exit" >/dev/null 2>&1 || true
7475
}
7576

77+
resolve_path() {
78+
local p="$1"
79+
if [[ "${p}" = /* ]]; then
80+
printf "%s" "${p}"
81+
else
82+
printf "%s/%s" "${REPO_ROOT}" "${p}"
83+
fi
84+
}
85+
86+
timeout_only_topics_csv() {
87+
local report_path="$1"
88+
"${PYTHON_BIN}" - "${report_path}" <<'PY'
89+
import json
90+
import sys
91+
from pathlib import Path
92+
93+
path = Path(sys.argv[1])
94+
if not path.exists():
95+
raise SystemExit(1)
96+
payload = json.loads(path.read_text(encoding="utf-8"))
97+
rows = payload.get("helpfile_similarity", {}).get("rows", [])
98+
if not rows:
99+
raise SystemExit(1)
100+
failed = [r for r in rows if not bool(r.get("matlab_ok"))]
101+
if not failed or len(failed) != len(rows):
102+
raise SystemExit(1)
103+
if not all(str(r.get("matlab_error", "")).strip() == "matlab_timeout" for r in failed):
104+
raise SystemExit(1)
105+
topics = [str(r.get("topic", "")).strip() for r in failed if str(r.get("topic", "")).strip()]
106+
print(",".join(topics))
107+
raise SystemExit(0)
108+
PY
109+
}
110+
111+
init_retry_summary() {
112+
"${PYTHON_BIN}" - "${RETRY_SUMMARY_ABS}" "${RETRY_TIMEOUT_BLOCKS}" "${TIMEOUT_RETRY_BLOCKS}" <<'PY'
113+
import json
114+
import sys
115+
from datetime import datetime, timezone
116+
from pathlib import Path
117+
118+
path = Path(sys.argv[1])
119+
path.parent.mkdir(parents=True, exist_ok=True)
120+
payload = {
121+
"generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
122+
"retry_timeout_blocks_enabled": sys.argv[2] == "1",
123+
"timeout_retry_blocks": [b for b in sys.argv[3].replace(",", " ").split() if b],
124+
"events": [],
125+
}
126+
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
127+
PY
128+
}
129+
130+
append_retry_summary_event() {
131+
local kind="$1"
132+
local block="$2"
133+
local attempt="$3"
134+
local max_attempts="$4"
135+
local status="$5"
136+
local return_code="$6"
137+
local reason="$7"
138+
local timeout_topics_csv="$8"
139+
"${PYTHON_BIN}" - "${RETRY_SUMMARY_ABS}" "${kind}" "${block}" "${attempt}" "${max_attempts}" "${status}" "${return_code}" "${reason}" "${timeout_topics_csv}" <<'PY'
140+
import json
141+
import sys
142+
from datetime import datetime, timezone
143+
from pathlib import Path
144+
145+
path = Path(sys.argv[1])
146+
if path.exists():
147+
payload = json.loads(path.read_text(encoding="utf-8"))
148+
else:
149+
payload = {"generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "events": []}
150+
events = payload.setdefault("events", [])
151+
topics_raw = sys.argv[9].strip()
152+
event = {
153+
"ts_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
154+
"kind": sys.argv[2],
155+
"block": sys.argv[3],
156+
"attempt": int(sys.argv[4]),
157+
"max_attempts": int(sys.argv[5]),
158+
"status": sys.argv[6],
159+
"return_code": int(sys.argv[7]),
160+
"reason": sys.argv[8],
161+
"timeout_topics": [t for t in topics_raw.split(",") if t] if topics_raw else [],
162+
}
163+
events.append(event)
164+
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
165+
PY
166+
}
167+
76168
cd "${REPO_ROOT}"
169+
RETRY_SUMMARY_ABS="$(resolve_path "${RETRY_SUMMARY_PATH}")"
170+
init_retry_summary
77171

78172
echo "[ladder] repo: ${REPO_ROOT}"
79173
echo "[ladder] python: ${PYTHON_BIN}"
80174
echo "[ladder] matlab args: ${MATLAB_EXTRA_ARGS}"
81175
echo "[ladder] blocks: ${BLOCKS[*]}"
82176
echo "[ladder] runtime multiplier: ${RUNTIME_MULTIPLIER} (<=0 disables runtime regression checks)"
83177
echo "[ladder] retry timeout-only blocks: ${RETRY_TIMEOUT_BLOCKS} (blocks: ${TIMEOUT_RETRY_BLOCKS})"
178+
echo "[ladder] retry summary path: ${RETRY_SUMMARY_PATH}"
84179

85180
for block in "${BLOCKS[@]}"; do
86181
if ! baseline_s="$(baseline_runtime_sum_s "${block}")"; then
@@ -187,16 +282,30 @@ if mult > 0:
187282
print(f"[ladder] block passed: {block}")
188283
PY
189284
then
285+
append_retry_summary_event "block_result" "${block}" "${attempt}" "${max_attempts}" "pass" "0" "ok" ""
190286
break
191287
fi
192288

193289
rc=$?
194-
if [[ "${rc}" -eq 10 ]] && [[ "${attempt}" -lt "${max_attempts}" ]] && is_timeout_only_regression "${report_path}"; then
195-
echo "[ladder] retrying block ${block} after timeout-only regression (attempt ${attempt}/${max_attempts})"
290+
if [[ "${rc}" -eq 10 ]] && [[ "${attempt}" -lt "${max_attempts}" ]] && timeout_topics_csv="$(timeout_only_topics_csv "${report_path}")"; then
291+
is_timeout_only_regression "${report_path}" >/dev/null
292+
echo "[ladder] retrying block ${block} after timeout-only regression (attempt ${attempt}/${max_attempts}); topics=${timeout_topics_csv}"
293+
append_retry_summary_event "retry_scheduled" "${block}" "${attempt}" "${max_attempts}" "retry" "${rc}" "timeout_only_regression" "${timeout_topics_csv}"
196294
warmup_matlab
197295
attempt=$((attempt + 1))
198296
continue
199297
fi
298+
reason="block_failure"
299+
if [[ "${rc}" -eq 10 ]]; then
300+
reason="regression_gate_failure"
301+
elif [[ "${rc}" -eq 11 ]]; then
302+
reason="runtime_regression"
303+
fi
304+
timeout_topics_csv=""
305+
if timeout_topics_tmp="$(timeout_only_topics_csv "${report_path}")"; then
306+
timeout_topics_csv="${timeout_topics_tmp}"
307+
fi
308+
append_retry_summary_event "block_result" "${block}" "${attempt}" "${max_attempts}" "fail" "${rc}" "${reason}" "${timeout_topics_csv}"
200309
exit "${rc}"
201310
done
202311

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5+
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
6+
PYTHON_BIN="${PYTHON_BIN:-python3}"
7+
MATLAB_EXTRA_ARGS="${NSTAT_MATLAB_EXTRA_ARGS:--maca64 -nodisplay -noFigureWindows -softwareopengl}"
8+
STAGE_A_BLOCKS_RAW="${NSTAT_PARITY_PREFLIGHT_STAGEA_BLOCKS:-core_smoke timeout_front}"
9+
STAGE_B_TOPICS_RAW="${NSTAT_PARITY_PREFLIGHT_STAGEB_TOPICS:-PPThinning,ValidationDataSet,DecodingExample,StimulusDecode2D}"
10+
STAGE_B_REPORT_PATH="${NSTAT_PARITY_PREFLIGHT_STAGEB_REPORT:-python/reports/parity_preflight_stageb_selected.json}"
11+
12+
stage_a_tokens="${STAGE_A_BLOCKS_RAW//,/ }"
13+
read -r -a STAGE_A_BLOCKS <<< "${stage_a_tokens}"
14+
if [[ "${#STAGE_A_BLOCKS[@]}" -eq 0 ]]; then
15+
echo "[preflight] no Stage A blocks resolved from NSTAT_PARITY_PREFLIGHT_STAGEA_BLOCKS='${STAGE_A_BLOCKS_RAW}'" >&2
16+
exit 2
17+
fi
18+
19+
stage_b_tokens="${STAGE_B_TOPICS_RAW//,/ }"
20+
read -r -a STAGE_B_TOPICS <<< "${stage_b_tokens}"
21+
if [[ "${#STAGE_B_TOPICS[@]}" -eq 0 ]]; then
22+
echo "[preflight] no Stage B topics resolved from NSTAT_PARITY_PREFLIGHT_STAGEB_TOPICS='${STAGE_B_TOPICS_RAW}'" >&2
23+
exit 2
24+
fi
25+
26+
cd "${REPO_ROOT}"
27+
export NSTAT_MATLAB_EXTRA_ARGS="${MATLAB_EXTRA_ARGS}"
28+
export NSTAT_FORCE_M_HELP_SCRIPTS="${NSTAT_FORCE_M_HELP_SCRIPTS:-1}"
29+
if [[ "${NSTAT_SET_ACTIONS_RUNNER_SVC:-1}" == "1" ]]; then
30+
export ACTIONS_RUNNER_SVC=1
31+
fi
32+
33+
echo "[preflight] repo: ${REPO_ROOT}"
34+
echo "[preflight] python: ${PYTHON_BIN}"
35+
echo "[preflight] matlab args: ${NSTAT_MATLAB_EXTRA_ARGS}"
36+
echo "[preflight] stage A blocks: ${STAGE_A_BLOCKS[*]}"
37+
echo "[preflight] stage B selected topics: ${STAGE_B_TOPICS[*]}"
38+
echo "[preflight] stage B report: ${STAGE_B_REPORT_PATH}"
39+
40+
python/tools/run_parity_ladder.sh "${STAGE_A_BLOCKS[@]}"
41+
42+
"${PYTHON_BIN}" python/tools/verify_python_vs_matlab_similarity.py \
43+
--enforce-gate \
44+
--report-path "${STAGE_B_REPORT_PATH}" \
45+
--topics "${STAGE_B_TOPICS[@]}"
46+
47+
"${PYTHON_BIN}" python/tools/summarize_parity_report.py "${STAGE_B_REPORT_PATH}" || true
48+
49+
echo "[preflight] complete"

0 commit comments

Comments
 (0)