5
5
This script:
6
6
1. Runs the full test suite initially (with normal cleanup)
7
7
2. Identifies failed tests and retries them with configurable strategy
8
- 3. Manages test namespaces intelligently (cleanup on success, keep failed for debugging)
8
+ 3. Manages test namespaces intelligently (cleanup on success, keep failed for debugging by default )
9
9
4. Provides detailed logging and comprehensive reporting
10
10
11
11
Usage: ./scripts/auto-retry-tests.py --parallel 4 --attempts-serial 3 --attempts-parallel 2 --venv ./venv
16
16
import json
17
17
import os
18
18
import re
19
+ import shutil
19
20
import statistics
20
21
import subprocess
21
22
import sys
@@ -32,8 +33,7 @@ class TestConstants:
32
33
"""Constants used throughout the test runner."""
33
34
34
35
MIN_RUNTIME_THRESHOLD = 30.0 # Filter out quick failures (seconds)
35
- MAX_RUNTIME_HISTORY = 10 # Keep only recent runs
36
- MAX_ERROR_LINES_TO_CHECK = 50 # Lines to scan for errors in logs
36
+ MAX_RUNTIME_HISTORY = 50 # Keep only recent runs
37
37
MAX_TEST_NAME_LENGTH = 100 # Maximum test name length for filenames
38
38
HASH_SUFFIX_LENGTH = 8 # Length of MD5 hash suffix
39
39
DEFAULT_PARALLEL_WORKERS = 2 # Default number of parallel workers
@@ -231,7 +231,7 @@ def build_configuration_dict(self, args) -> dict:
231
231
"parallel" : args .parallel ,
232
232
"attempts_parallel" : args .attempts_parallel ,
233
233
"attempts_serial" : args .attempts_serial ,
234
- "keep_failed_namespaces " : args .keep_failed_namespaces ,
234
+ "delete_failed_namespaces " : args .delete_failed_namespaces ,
235
235
"venv" : args .venv ,
236
236
"extra_args" : args .extra_args ,
237
237
"output_dir" : str (self .output_dir ),
@@ -378,7 +378,7 @@ def create_safe_log_filename(
378
378
return f"{ safe_test_name } _attempt_{ attempt } _{ attempt_type } .txt"
379
379
380
380
def build_test_command (
381
- self , test_name : str = None , skip_delete : bool = False
381
+ self , test_name : str = None , skip_delete : bool = False , work_dir : str = None
382
382
) -> List [str ]:
383
383
"""Build the command arguments for running tests."""
384
384
command_args = ["scripts/run-tests" ]
@@ -393,6 +393,13 @@ def build_test_command(
393
393
if test_name :
394
394
command_args .extend (["--test" , test_name ])
395
395
396
+ # Add unique work directory to prevent parallel interference
397
+ # beku deletes the work dir at the start of a test so if a test was already running and then
398
+ # another starts the new one would delete (and recreate) the work directory.
399
+ # This does lead to failures.
400
+ if work_dir :
401
+ command_args .extend (["--work-dir" , work_dir ])
402
+
396
403
# Add any extra arguments passed through
397
404
if self .args .extra_args :
398
405
command_args .extend (self .args .extra_args )
@@ -484,7 +491,20 @@ def run_single_test_suite(
484
491
attempt_type : str = "initial" ,
485
492
) -> TestResult :
486
493
"""Run a single test or the full test suite."""
487
- command_args = self .build_test_command (test_name , skip_delete )
494
+ # Create unique work directory to prevent parallel test interference
495
+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S_%f" )[
496
+ :- 3
497
+ ] # microseconds to milliseconds
498
+ if test_name :
499
+ # Create a unique work directory based on test name and timestamp
500
+ safe_test_name = test_name .replace ("/" , "_" ).replace ("," , "_" )
501
+ work_dir = (
502
+ f"tests/_work_{ safe_test_name } _{ attempt } _{ attempt_type } _{ timestamp } "
503
+ )
504
+ else :
505
+ work_dir = f"tests/_work_full_suite_{ attempt } _{ attempt_type } _{ timestamp } "
506
+
507
+ command_args = self .build_test_command (test_name , skip_delete , work_dir )
488
508
489
509
# Set up log file
490
510
if test_name :
@@ -519,6 +539,12 @@ def run_single_test_suite(
519
539
start_time = time .time ()
520
540
521
541
with open (log_file , "w" ) as file_handle :
542
+ # Log the exact command being executed
543
+ file_handle .write (f"Command: { ' ' .join (command_args )} \n " )
544
+ file_handle .write (f"Working directory: { os .getcwd ()} \n " )
545
+ file_handle .write ("=" * 80 + "\n \n " )
546
+ file_handle .flush ()
547
+
522
548
result = subprocess .run (
523
549
command_args ,
524
550
stdout = file_handle ,
@@ -569,10 +595,18 @@ def run_single_test_suite(
569
595
print (f" 📊 Average: { self .format_duration (avg_runtime )} " )
570
596
571
597
# If this was a skip_delete attempt and test passed, clean up the namespace
598
+ # We only want to keep failed namespaces
572
599
if test_name and skip_delete and success and namespace :
573
600
self .delete_test_namespace (namespace )
574
601
print (f" 🧹 Test passed, cleaning up namespace: { namespace } " )
575
602
603
+ # Clean up the unique work directory after test completion
604
+ if work_dir and Path (work_dir ).exists ():
605
+ try :
606
+ shutil .rmtree (work_dir )
607
+ except Exception as exception :
608
+ print (f" ⚠️ Failed to clean up work directory { work_dir } : { exception } " )
609
+
576
610
return test_result
577
611
578
612
@@ -727,7 +761,9 @@ def generate_comprehensive_report(self, runner, start_time: datetime) -> str:
727
761
report .append (f"Parallel: { runner .args .parallel } " )
728
762
report .append (f"Parallel retry attempts: { runner .args .attempts_parallel } " )
729
763
report .append (f"Serial retry attempts: { runner .args .attempts_serial } " )
730
- report .append (f"Keep failed namespaces: { runner .args .keep_failed_namespaces } " )
764
+ report .append (
765
+ f"Delete failed namespaces: { runner .args .delete_failed_namespaces } "
766
+ )
731
767
report .append (f"Virtualenv: { runner .args .venv or 'None' } " )
732
768
report .append ("" )
733
769
@@ -882,7 +918,7 @@ def retry_tests_in_parallel(
882
918
# Determine if this is the last attempt and no serial tests follow
883
919
is_last_attempt = attempt == max_attempts
884
920
use_skip_delete = (
885
- self .args .keep_failed_namespaces
921
+ not self .args .delete_failed_namespaces
886
922
and not serial_tests_follow
887
923
and is_last_attempt
888
924
)
@@ -892,10 +928,6 @@ def retry_tests_in_parallel(
892
928
print (
893
929
f"Retrying { len (tests_to_retry )} tests in parallel (max { max_parallel } at once)..."
894
930
)
895
- if use_skip_delete :
896
- print (
897
- " Using skip-delete for this final parallel attempt (no serial tests follow)"
898
- )
899
931
900
932
# Execute tests in parallel
901
933
with ThreadPoolExecutor (max_workers = max_parallel ) as executor :
@@ -952,10 +984,12 @@ def retry_test_serially(
952
984
for attempt in range (1 , max_attempts + 1 ):
953
985
# Only use skip-delete on the last attempt
954
986
is_last_attempt = attempt == max_attempts
987
+ use_skip_delete = not self .args .delete_failed_namespaces and is_last_attempt
988
+
955
989
result = self .test_executor .run_single_test_suite (
956
990
self .output_dir ,
957
991
test_name = test_name ,
958
- skip_delete = self . args . keep_failed_namespaces and is_last_attempt ,
992
+ skip_delete = use_skip_delete ,
959
993
attempt = attempt ,
960
994
attempt_type = "serial" ,
961
995
)
@@ -992,23 +1026,15 @@ def create_test_summary(
992
1026
else :
993
1027
final_status = "failed"
994
1028
995
- # Find the last namespace for failed tests
1029
+ # Find the last namespace for failed tests (only if keeping failed namespaces)
996
1030
final_namespace = None
997
- if final_status == "failed" :
1031
+ if final_status == "failed" and not self . args . delete_failed_namespaces :
998
1032
# Keep the last failed attempt's namespace
999
1033
for result in reversed (retry_results ):
1000
1034
if result .namespace :
1001
1035
final_namespace = result .namespace
1002
1036
break
1003
1037
1004
- # Clean up namespaces for successful tests
1005
- if final_status in ["passed" , "flaky" ]:
1006
- # Delete all namespaces for this test
1007
- all_results = [initial_result ] + retry_results
1008
- for result in all_results :
1009
- if result .namespace and result .namespace != final_namespace :
1010
- self .test_executor .delete_test_namespace (result .namespace )
1011
-
1012
1038
summary = TestSummary (
1013
1039
test_name = test_name ,
1014
1040
initial_result = initial_result ,
@@ -1078,8 +1104,14 @@ def _run_initial_test_suite(self) -> bool:
1078
1104
)
1079
1105
1080
1106
if not failed_tests :
1081
- print (" No failed tests found in output (this might be a parsing issue)" )
1082
- self .report_generator .generate_and_save_final_report (self , self .start_time )
1107
+ print (" No failed tests found in output but run-tests exited with code 1" )
1108
+ print (
1109
+ " This indicates an infrastructure or setup issue that prevents tests from running"
1110
+ )
1111
+ print (
1112
+ " Check the log file for connection errors, missing dependencies, or cluster issues"
1113
+ )
1114
+ print (f" Log file: { initial_result .log_file } " )
1083
1115
return False
1084
1116
1085
1117
print (f" Found { len (failed_tests )} failed tests:" )
@@ -1281,9 +1313,9 @@ def main():
1281
1313
1282
1314
# Namespace management arguments
1283
1315
parser .add_argument (
1284
- "--keep -failed-namespaces" ,
1316
+ "--delete -failed-namespaces" ,
1285
1317
action = "store_true" ,
1286
- help = "Keep namespaces of failed tests for debugging (only the last one is kept )" ,
1318
+ help = "Delete namespaces of failed tests (default: keep them for debugging )" ,
1287
1319
)
1288
1320
1289
1321
# Output arguments
0 commit comments