@@ -10,6 +10,7 @@ SET_ACTIONS_RUNNER_SVC="${NSTAT_SET_ACTIONS_RUNNER_SVC:-1}"
1010RUNTIME_MULTIPLIER=" ${NSTAT_PARITY_RUNTIME_MULTIPLIER:- 2.5} "
1111RETRY_TIMEOUT_BLOCKS=" ${NSTAT_PARITY_RETRY_TIMEOUT_BLOCKS:- 0} "
1212TIMEOUT_RETRY_BLOCKS=" ${NSTAT_PARITY_TIMEOUT_RETRY_BLOCKS:- timeout_front} "
13+ RETRY_SUMMARY_PATH=" ${NSTAT_PARITY_RETRY_SUMMARY_PATH:- python/ reports/ parity_retry_summary.json} "
1314
1415DEFAULT_BLOCKS=(core_smoke timeout_front graphics_mid heavy_tail full_suite)
1516if [[ $# -gt 0 ]]; then
@@ -73,14 +74,108 @@ warmup_matlab() {
7374 " ${MATLAB_BIN} " ${MATLAB_EXTRA_ARGS} -batch " disp(version); exit" > /dev/null 2>&1 || true
7475}
7576
77+ resolve_path () {
78+ local p=" $1 "
79+ if [[ " ${p} " = /* ]]; then
80+ printf " %s" " ${p} "
81+ else
82+ printf " %s/%s" " ${REPO_ROOT} " " ${p} "
83+ fi
84+ }
85+
86+ timeout_only_topics_csv () {
87+ local report_path=" $1 "
88+ " ${PYTHON_BIN} " - " ${report_path} " << 'PY '
89+ import json
90+ import sys
91+ from pathlib import Path
92+
93+ path = Path(sys.argv[1])
94+ if not path.exists():
95+ raise SystemExit(1)
96+ payload = json.loads(path.read_text(encoding="utf-8"))
97+ rows = payload.get("helpfile_similarity", {}).get("rows", [])
98+ if not rows:
99+ raise SystemExit(1)
100+ failed = [r for r in rows if not bool(r.get("matlab_ok"))]
101+ if not failed or len(failed) != len(rows):
102+ raise SystemExit(1)
103+ if not all(str(r.get("matlab_error", "")).strip() == "matlab_timeout" for r in failed):
104+ raise SystemExit(1)
105+ topics = [str(r.get("topic", "")).strip() for r in failed if str(r.get("topic", "")).strip()]
106+ print(",".join(topics))
107+ raise SystemExit(0)
108+ PY
109+ }
110+
111+ init_retry_summary () {
112+ " ${PYTHON_BIN} " - " ${RETRY_SUMMARY_ABS} " " ${RETRY_TIMEOUT_BLOCKS} " " ${TIMEOUT_RETRY_BLOCKS} " << 'PY '
113+ import json
114+ import sys
115+ from datetime import datetime, timezone
116+ from pathlib import Path
117+
118+ path = Path(sys.argv[1])
119+ path.parent.mkdir(parents=True, exist_ok=True)
120+ payload = {
121+ "generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
122+ "retry_timeout_blocks_enabled": sys.argv[2] == "1",
123+ "timeout_retry_blocks": [b for b in sys.argv[3].replace(",", " ").split() if b],
124+ "events": [],
125+ }
126+ path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
127+ PY
128+ }
129+
130+ append_retry_summary_event () {
131+ local kind=" $1 "
132+ local block=" $2 "
133+ local attempt=" $3 "
134+ local max_attempts=" $4 "
135+ local status=" $5 "
136+ local return_code=" $6 "
137+ local reason=" $7 "
138+ local timeout_topics_csv=" $8 "
139+ " ${PYTHON_BIN} " - " ${RETRY_SUMMARY_ABS} " " ${kind} " " ${block} " " ${attempt} " " ${max_attempts} " " ${status} " " ${return_code} " " ${reason} " " ${timeout_topics_csv} " << 'PY '
140+ import json
141+ import sys
142+ from datetime import datetime, timezone
143+ from pathlib import Path
144+
145+ path = Path(sys.argv[1])
146+ if path.exists():
147+ payload = json.loads(path.read_text(encoding="utf-8"))
148+ else:
149+ payload = {"generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), "events": []}
150+ events = payload.setdefault("events", [])
151+ topics_raw = sys.argv[9].strip()
152+ event = {
153+ "ts_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
154+ "kind": sys.argv[2],
155+ "block": sys.argv[3],
156+ "attempt": int(sys.argv[4]),
157+ "max_attempts": int(sys.argv[5]),
158+ "status": sys.argv[6],
159+ "return_code": int(sys.argv[7]),
160+ "reason": sys.argv[8],
161+ "timeout_topics": [t for t in topics_raw.split(",") if t] if topics_raw else [],
162+ }
163+ events.append(event)
164+ path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
165+ PY
166+ }
167+
76168cd " ${REPO_ROOT} "
169+ RETRY_SUMMARY_ABS=" $( resolve_path " ${RETRY_SUMMARY_PATH} " ) "
170+ init_retry_summary
77171
78172echo " [ladder] repo: ${REPO_ROOT} "
79173echo " [ladder] python: ${PYTHON_BIN} "
80174echo " [ladder] matlab args: ${MATLAB_EXTRA_ARGS} "
81175echo " [ladder] blocks: ${BLOCKS[*]} "
82176echo " [ladder] runtime multiplier: ${RUNTIME_MULTIPLIER} (<=0 disables runtime regression checks)"
83177echo " [ladder] retry timeout-only blocks: ${RETRY_TIMEOUT_BLOCKS} (blocks: ${TIMEOUT_RETRY_BLOCKS} )"
178+ echo " [ladder] retry summary path: ${RETRY_SUMMARY_PATH} "
84179
85180for block in " ${BLOCKS[@]} " ; do
86181 if ! baseline_s=" $( baseline_runtime_sum_s " ${block} " ) " ; then
@@ -187,16 +282,30 @@ if mult > 0:
187282print(f"[ladder] block passed: {block}")
188283PY
189284 then
285+ append_retry_summary_event " block_result" " ${block} " " ${attempt} " " ${max_attempts} " " pass" " 0" " ok" " "
190286 break
191287 fi
192288
193289 rc=$?
194- if [[ " ${rc} " -eq 10 ]] && [[ " ${attempt} " -lt " ${max_attempts} " ]] && is_timeout_only_regression " ${report_path} " ; then
195- echo " [ladder] retrying block ${block} after timeout-only regression (attempt ${attempt} /${max_attempts} )"
290+ if [[ " ${rc} " -eq 10 ]] && [[ " ${attempt} " -lt " ${max_attempts} " ]] && timeout_topics_csv=" $( timeout_only_topics_csv " ${report_path} " ) " ; then
291+ is_timeout_only_regression " ${report_path} " > /dev/null
292+ echo " [ladder] retrying block ${block} after timeout-only regression (attempt ${attempt} /${max_attempts} ); topics=${timeout_topics_csv} "
293+ append_retry_summary_event " retry_scheduled" " ${block} " " ${attempt} " " ${max_attempts} " " retry" " ${rc} " " timeout_only_regression" " ${timeout_topics_csv} "
196294 warmup_matlab
197295 attempt=$(( attempt + 1 ))
198296 continue
199297 fi
298+ reason=" block_failure"
299+ if [[ " ${rc} " -eq 10 ]]; then
300+ reason=" regression_gate_failure"
301+ elif [[ " ${rc} " -eq 11 ]]; then
302+ reason=" runtime_regression"
303+ fi
304+ timeout_topics_csv=" "
305+ if timeout_topics_tmp=" $( timeout_only_topics_csv " ${report_path} " ) " ; then
306+ timeout_topics_csv=" ${timeout_topics_tmp} "
307+ fi
308+ append_retry_summary_event " block_result" " ${block} " " ${attempt} " " ${max_attempts} " " fail" " ${rc} " " ${reason} " " ${timeout_topics_csv} "
200309 exit " ${rc} "
201310 done
202311
0 commit comments