6666 "nSTATPaperExamples" ,
6767}
6868DEFAULT_HELP_TOPIC_TIMEOUT_S = 120
69+ try :
70+ DEFAULT_MATLAB_MAX_ATTEMPTS = max (1 , int (os .environ .get ("NSTAT_MATLAB_TOPIC_MAX_ATTEMPTS" , "1" )))
71+ except ValueError :
72+ DEFAULT_MATLAB_MAX_ATTEMPTS = 1
73+ CRASH_ERROR_MARKERS = (
74+ "matlab is exiting because of fatal error" ,
75+ "fatal error" ,
76+ "mathworkscrashreporter" ,
77+ "crash report has been saved" ,
78+ "libmwhandle_graphics" ,
79+ )
6980DEFAULT_TOPIC_TIMEOUT_OVERRIDES : dict [str , int ] = {
7081 "SignalObjExamples" : 180 ,
7182 "CovariateExamples" : 180 ,
@@ -128,6 +139,32 @@ def _cleanup_runner_matlab_processes() -> None:
128139 time .sleep (0.5 )
129140
130141
142+ def _matlab_warmup (timeout_s : int = 90 ) -> None :
143+ if not MATLAB_BIN .exists ():
144+ return
145+ try :
146+ _run_matlab_batch_logged ("disp(version); exit" , timeout_s = timeout_s )
147+ except Exception :
148+ return
149+
150+
151+ def _is_retryable_matlab_failure (payload : dict [str , Any ]) -> bool :
152+ if bool (payload .get ("ok" )):
153+ return False
154+ error = str (payload .get ("error" , "" )).strip ()
155+ if error == "matlab_timeout" :
156+ return True
157+ combined = " " .join (
158+ [
159+ error ,
160+ str (payload .get ("error_report" , "" )),
161+ str (payload .get ("fallback_error" , "" )),
162+ str (payload .get ("fallback_error_report" , "" )),
163+ ]
164+ ).lower ()
165+ return any (marker in combined for marker in CRASH_ERROR_MARKERS )
166+
167+
131168def _kill_process_group (pid : int ) -> None :
132169 try :
133170 os .killpg (pid , signal .SIGKILL )
@@ -494,6 +531,10 @@ def run_script_path(path: Path, timeout: int, source_label: str | None = None) -
494531 "end; exit(0);"
495532 )
496533
534+ # In runner-service mode, enforce a clean MATLAB process slate before each topic.
535+ if _runner_service_mode ():
536+ _cleanup_runner_matlab_processes ()
537+
497538 t0 = time .time ()
498539 try :
499540 run = _run_matlab_batch_logged (cmd , timeout )
@@ -615,6 +656,7 @@ def _help_similarity(
615656 topics : list [tuple [str , str ]],
616657 default_timeout_s : int = DEFAULT_HELP_TOPIC_TIMEOUT_S ,
617658 topic_timeout_overrides : dict [str , int ] | None = None ,
659+ matlab_max_attempts : int = DEFAULT_MATLAB_MAX_ATTEMPTS ,
618660) -> dict [str , Any ]:
619661 rows : list [dict [str , Any ]] = []
620662
@@ -649,7 +691,41 @@ def _help_similarity(
649691
650692 py = _run_python_topic (stem )
651693 timeout_s = topic_timeouts .get (stem , default_timeout_s )
694+ ml_attempt_history : list [dict [str , Any ]] = []
652695 ml = _run_matlab_help_script (script_rel , timeout_s = timeout_s )
696+ ml_attempt_history .append (
697+ {
698+ "attempt" : 1 ,
699+ "ok" : bool (ml .get ("ok" )),
700+ "error" : str (ml .get ("error" , "" )),
701+ "runtime_s" : float (ml .get ("runtime_s" ) or 0.0 ),
702+ "script_used" : str (ml .get ("script_used" , script_rel )),
703+ }
704+ )
705+ attempt = 1
706+ while (
707+ attempt < matlab_max_attempts
708+ and not bool (ml .get ("ok" ))
709+ and _is_retryable_matlab_failure (ml )
710+ ):
711+ next_attempt = attempt + 1
712+ print (
713+ f"[help retry { next_attempt } /{ matlab_max_attempts } ] { stem } "
714+ f"after retryable MATLAB failure: { ml .get ('error' , '' )} " ,
715+ flush = True ,
716+ )
717+ _matlab_warmup ()
718+ ml = _run_matlab_help_script (script_rel , timeout_s = timeout_s )
719+ ml_attempt_history .append (
720+ {
721+ "attempt" : next_attempt ,
722+ "ok" : bool (ml .get ("ok" )),
723+ "error" : str (ml .get ("error" , "" )),
724+ "runtime_s" : float (ml .get ("runtime_s" ) or 0.0 ),
725+ "script_used" : str (ml .get ("script_used" , script_rel )),
726+ }
727+ )
728+ attempt = next_attempt
653729
654730 if py .get ("ok" ):
655731 summary ["python_ok" ] += 1
@@ -697,6 +773,9 @@ def _help_similarity(
697773 "matlab_fallback_script_used" : ml .get ("fallback_script_used" , "" ),
698774 "matlab_runtime_s" : ml .get ("runtime_s" ),
699775 "matlab_timeout_s" : timeout_s ,
776+ "matlab_attempts" : len (ml_attempt_history ),
777+ "matlab_retry_applied" : len (ml_attempt_history ) > 1 ,
778+ "matlab_attempt_history" : ml_attempt_history ,
700779 "matlab_timeout_snapshot_before_cleanup" : ml .get ("timeout_process_snapshot_before_cleanup" , "" ),
701780 "matlab_timeout_snapshot_after_cleanup" : ml .get ("timeout_process_snapshot_after_cleanup" , "" ),
702781 "matlab_runner_service_cleanup" : bool (ml .get ("runner_service_cleanup" , False )),
@@ -878,6 +957,15 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
878957 default = [],
879958 help = "Override per-topic MATLAB timeout using TOPIC=SECONDS (repeatable)." ,
880959 )
960+ parser .add_argument (
961+ "--matlab-max-attempts" ,
962+ type = int ,
963+ default = DEFAULT_MATLAB_MAX_ATTEMPTS ,
964+ help = (
965+ "Maximum MATLAB attempts per help topic for retryable failures "
966+ f"(default: { DEFAULT_MATLAB_MAX_ATTEMPTS } )."
967+ ),
968+ )
881969 parser .add_argument (
882970 "--report-path" ,
883971 default = "python/reports/python_vs_matlab_similarity_report.json" ,
@@ -892,6 +980,9 @@ def main(argv: list[str] | None = None) -> int:
892980 if args .default_topic_timeout <= 0 :
893981 print ("--default-topic-timeout must be positive" , file = sys .stderr )
894982 return 2
983+ if args .matlab_max_attempts <= 0 :
984+ print ("--matlab-max-attempts must be positive" , file = sys .stderr )
985+ return 2
895986 try :
896987 requested_topics = _parse_topics_arg (args .topics )
897988 topics = _resolve_topics (requested_topics )
@@ -910,6 +1001,7 @@ def main(argv: list[str] | None = None) -> int:
9101001 "default_timeout_s" : args .default_topic_timeout ,
9111002 "topic_timeout_overrides" : topic_timeout_overrides ,
9121003 "force_m_help_scripts" : FORCE_M_HELP_SCRIPTS ,
1004+ "matlab_max_attempts" : args .matlab_max_attempts ,
9131005 }
9141006
9151007 print ("[class] running Python/MATLAB class checks" , flush = True )
@@ -934,6 +1026,7 @@ def main(argv: list[str] | None = None) -> int:
9341026 topics = topics ,
9351027 default_timeout_s = args .default_topic_timeout ,
9361028 topic_timeout_overrides = topic_timeout_overrides ,
1029+ matlab_max_attempts = args .matlab_max_attempts ,
9371030 )
9381031 contract_topics = None if full_suite else set (selected_topic_stems )
9391032 report ["parity_contract" ] = _evaluate_parity_contract (report ["helpfile_similarity" ]["rows" ], topics_filter = contract_topics )
0 commit comments