Test Runner: --maxfail: Allow interrupt of test run after N failures

davidfstr · davidfstr · commit 011cddaedf39 · 2026-03-22T21:02:18.000-04:00
Also:
* Test Runner: If &gt;50 tests interrupted,
  print number of tests rather than all names of tests
diff --git a/src/crystal/main.py b/src/crystal/main.py
@@ -293,6 +293,13 @@ def sys_unraisablehook(args) -> None:
                 help='Print additional diagnostic information. Only applies with --parallel.',
                 action='store_true',
             )
+            test_parser.add_argument(
+                '--maxfail',
+                help='Stop running tests after N failures.',
+                type=int,
+                default=None,
+                metavar='N',
+            )
         
         # Define main command
         parser.add_argument(
@@ -444,6 +451,12 @@ def sys_unraisablehook(args) -> None:
                 (not hasattr(parsed_args, 'parallel') or not parsed_args.parallel)):
                 print('error: -j/--jobs can only be used with -p/--parallel', file=sys.stderr)
                 sys.exit(2)
+            
+            # Validate --maxfail must be positive
+            if (hasattr(parsed_args, 'maxfail') and parsed_args.maxfail is not None and
+                    parsed_args.maxfail <= 0):
+                print('error: --maxfail must be a positive integer', file=sys.stderr)
+                sys.exit(2)
         else:
             parsed_args.test = None
         
@@ -471,12 +484,14 @@ def sys_unraisablehook(args) -> None:
 
         jobs = parsed_args.jobs if hasattr(parsed_args, 'jobs') else None
         verbose = parsed_args.verbose if hasattr(parsed_args, 'verbose') else False
+        maxfail = parsed_args.maxfail if hasattr(parsed_args, 'maxfail') else None
         
         # NOTE: Run on main thread so that it can handle KeyboardInterrupt
         is_ok = run_tests_parallel(
             parsed_args.test,
             jobs=jobs,
-            verbose=verbose
+            verbose=verbose,
+            maxfail=maxfail,
         )
         exit_code = 0 if is_ok else 1
         sys.exit(exit_code)
@@ -786,7 +801,8 @@ def bg_task() -> None:
                 
                 is_ok = False
                 try:
-                    is_ok = run_tests_serial(parsed_args.test, interactive=is_interactive)
+                    maxfail = parsed_args.maxfail if hasattr(parsed_args, 'maxfail') else None
+                    is_ok = run_tests_serial(parsed_args.test, interactive=is_interactive, maxfail=maxfail)
                 finally:
                     exit_code = 0 if is_ok else 1
                     if is_coverage():
diff --git a/src/crystal/tests/runner/parallel.py b/src/crystal/tests/runner/parallel.py
@@ -6,7 +6,7 @@
 import argparse
 from collections.abc import Sequence
 from contextlib import closing
-from crystal.tests.runner.shared import normalize_test_names
+from crystal.tests.runner.shared import MAX_INTERRUPTED_TEST_COUNT_TO_REPORT, normalize_test_names
 from crystal.tests.util.cli import get_crystal_command
 from crystal.util.bulkheads import capture_crashes_to_stderr
 from crystal.util.pipes import create_selectable_pipe, Pipe, ReadablePipeEnd
@@ -82,6 +82,7 @@ def run_tests(
         raw_test_names: list[str],
         *, jobs: int | None,
         verbose: bool,
+        maxfail: int | None = None,
         ) -> bool:
     from crystal.tests.index import TEST_FUNCS
     
@@ -210,6 +211,10 @@ def run_tests(
     # Create shared state for interrupt handling
     interrupted_event = threading.Event()
     
+    # Create shared state for --maxfail
+    fail_count = [0]  # mutable int, shared across worker threads
+    fail_count_lock = threading.Lock()
+    
     # Create coordination state for simulated parent interrupt
     # (used when '!' appears in CRYSTAL_PARALLEL_WORKER_TASKS)
     workers_at_interrupt_point: list[threading.Event] = [
@@ -235,6 +240,9 @@ def run_worker_thread(worker_id: int) -> None:
                     workers_at_interrupt_point[worker_id]
                     if simulate_parent_interrupt else None
                 ),
+                maxfail=maxfail,
+                fail_count=fail_count,
+                fail_count_lock=fail_count_lock,
             )
             with worker_results_lock:
                 worker_results[worker_id] = result
@@ -569,7 +577,10 @@ def _format_summary(all_tests: 'list[TestResult]', total_duration: float) -> tup
         if interrupted_tests:
             output_lines.append('')
             output_lines.append('Rerun interrupted tests with:')
-            output_lines.append(f'$ crystal test {" ".join(interrupted_tests)}')
+            if len(interrupted_tests) < MAX_INTERRUPTED_TEST_COUNT_TO_REPORT:
+                output_lines.append(f'$ crystal test {" ".join(interrupted_tests)}')
+            else:
+                output_lines.append(f'$ crystal test <{len(interrupted_tests)} tests>')
     
     return ('\n'.join(output_lines), is_ok)
 
@@ -621,6 +632,9 @@ def _run_worker(
         interrupt_read_pipe: 'ReadablePipeEnd',
         display_result_immediately: bool = True,
         at_interrupt_point_event: threading.Event | None = None,
+        maxfail: int | None = None,
+        fail_count: 'list[int] | None' = None,
+        fail_count_lock: 'threading.Lock | None' = None,
         ) -> WorkerResult:
     """
     Run a worker subprocess in interactive mode, pulling tests from work_queue on-demand.
@@ -649,6 +663,9 @@ def _run_worker(
         If False, results are only returned in the WorkerResult.
     * at_interrupt_point_event -- Event to set when worker reaches _INTERRUPT_MARKER.
         The worker will then wait for interrupted_event to be set before continuing.
+    * maxfail -- Stop running after this many failures, or None for no limit.
+    * fail_count -- Shared mutable counter of failures across all workers.
+    * fail_count_lock -- Lock protecting fail_count.
     
     Returns:
     * WorkerResult containing test results and metadata.
@@ -775,6 +792,21 @@ def _run_worker(
                     if display_result_immediately:
                         _display_test_result(test_result)
                     
+                    # Check if --maxfail threshold has been reached
+                    if (maxfail is not None and
+                            fail_count is not None and
+                            fail_count_lock is not None and
+                            test_result.status in ('FAILURE', 'ERROR')):
+                        with fail_count_lock:
+                            fail_count[0] += 1
+                            reached_maxfail = (fail_count[0] >= maxfail)
+                        if reached_maxfail and not interrupted_event.is_set():
+                            if verbose:
+                                print(
+                                    f'[Runner] Reached maxfail={maxfail}, interrupting workers...',
+                                    file=sys.stderr)
+                            interrupted_event.set()
+                    
                     if process_is_interrupted:
                         break
                 
diff --git a/src/crystal/tests/runner/serial.py b/src/crystal/tests/runner/serial.py
@@ -2,7 +2,7 @@
 from concurrent.futures import Future
 from contextlib import contextmanager
 from crystal.app_preferences import app_prefs
-from crystal.tests.runner.shared import available_modules_str, normalize_test_names
+from crystal.tests.runner.shared import MAX_INTERRUPTED_TEST_COUNT_TO_REPORT, available_modules_str, normalize_test_names
 from crystal.tests.util.downloads import delay_between_downloads_minimized
 from crystal.tests.util.runner import run_test
 from crystal.tests.util.subtests import SubtestFailed
@@ -30,7 +30,7 @@
 
 
 @bg_affinity
-def run_tests(raw_test_names: list[str], *, interactive: bool = False) -> bool:
+def run_tests(raw_test_names: list[str], *, interactive: bool = False, maxfail: int | None = None) -> bool:
     """
     Runs automated UI tests, printing a summary report,
     and returning whether the run was OK.
@@ -54,10 +54,10 @@ def run_tests(raw_test_names: list[str], *, interactive: bool = False) -> bool:
         else:
             test_names = []  # ignored
         
-        return _run_tests(test_names, interactive=interactive)
+        return _run_tests(test_names, interactive=interactive, maxfail=maxfail)
 
 
-def _run_tests(test_names: list[str], *, interactive: bool = False) -> bool:
+def _run_tests(test_names: list[str], *, interactive: bool = False, maxfail: int | None = None) -> bool:
     from crystal.tests.index import TEST_FUNCS
     
     # Ensure ancestor caller did already call set_tests_are_running()
@@ -83,6 +83,7 @@ def _run_tests(test_names: list[str], *, interactive: bool = False) -> bool:
                 test_func_by_name[test_name] = test_func
             
             # Interactive mode: read test names from stdin one at a time
+            fail_count = 0  # for --maxfail tracking
             try:
                 while True:
                     # Print prompt
@@ -152,6 +153,15 @@ def _run_tests(test_names: list[str], *, interactive: bool = False) -> bool:
                     except NoForegroundThreadError:
                         # Fatal error; abort
                         break
+                    
+                    # Check if --maxfail threshold has been reached
+                    if maxfail is not None:
+                        last_result = result_for_test_func_id.get(test_func_id)
+                        if (last_result is not None and
+                                not isinstance(last_result, (SkipTest, _TestInterrupted))):
+                            fail_count += 1
+                            if fail_count >= maxfail:
+                                break
             except KeyboardInterrupt:
                 # Proceed to print a summary section, and exit the process
                 pass
@@ -166,9 +176,16 @@ def _run_tests(test_names: list[str], *, interactive: bool = False) -> bool:
                     if test_name not in test_names and test_func.__module__ not in test_names:
                         continue
                 test_funcs_to_run.append(test_func)
-                
+            
+            def mark_remaining_tests_as_interrupted() -> None:
+                for remaining_test_func in test_funcs_to_run[test_func_index:]:
+                    remaining_test_func_id = (remaining_test_func.__module__, remaining_test_func.__name__)
+                    if remaining_test_func_id not in result_for_test_func_id:
+                        result_for_test_func_id[remaining_test_func_id] = _TestInterrupted()
+            
             num_test_funcs_to_run = len(test_funcs_to_run)  # cache
             
+            fail_count = 0  # for --maxfail tracking
             try:
                 for (test_func_index, test_func) in enumerate(test_funcs_to_run):
                     test_func_id = (test_func.__module__, test_func.__name__)
@@ -196,12 +213,18 @@ def _run_tests(test_names: list[str], *, interactive: bool = False) -> bool:
                     except NoForegroundThreadError:
                         # Fatal error; abort
                         break
+                    
+                    # Check if --maxfail threshold has been reached
+                    if maxfail is not None:
+                        last_result = result_for_test_func_id.get(test_func_id)
+                        if (last_result is not None and
+                                not isinstance(last_result, (SkipTest, _TestInterrupted))):
+                            fail_count += 1
+                            if fail_count >= maxfail:
+                                mark_remaining_tests_as_interrupted()
+                                break
             except KeyboardInterrupt:
-                # Mark all remaining tests as interrupted
-                for remaining_test_func in test_funcs_to_run[test_func_index:]:
-                    remaining_test_func_id = (remaining_test_func.__module__, remaining_test_func.__name__)
-                    if remaining_test_func_id not in result_for_test_func_id:
-                        result_for_test_func_id[remaining_test_func_id] = _TestInterrupted()
+                mark_remaining_tests_as_interrupted()
                 
                 # Proceed to print a summary section, and exit the process
                 pass
@@ -314,7 +337,10 @@ def _run_tests(test_names: list[str], *, interactive: bool = False) -> bool:
     if len(interrupted_test_names) != 0:
         print()
         print('Rerun interrupted tests with:')
-        print(f'$ crystal test {" ".join(interrupted_test_names)}')
+        if len(interrupted_test_names) < MAX_INTERRUPTED_TEST_COUNT_TO_REPORT:
+            print(f'$ crystal test {" ".join(interrupted_test_names)}')
+        else:
+            print(f'$ crystal test <{len(interrupted_test_names)} tests>')
     
     # Play bell sound in terminal
     print('\a', end='', flush=True)
diff --git a/src/crystal/tests/runner/shared.py b/src/crystal/tests/runner/shared.py
@@ -1,3 +1,6 @@
+MAX_INTERRUPTED_TEST_COUNT_TO_REPORT = 50
+
+
 def normalize_test_names(raw_test_names: list[str]) -> list[str]:
     """
     Normalize test names from various formats into the canonical format.