Skip to content

Commit e624890

Browse files
committed
Handle errors in setUpClass/Module better
1 parent 16c1487 commit e624890

File tree

1 file changed

+119
-60
lines changed
  • graalpython/com.oracle.graal.python.test/src

1 file changed

+119
-60
lines changed

graalpython/com.oracle.graal.python.test/src/runner.py

Lines changed: 119 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,15 @@ def from_str(cls, s: str):
127127
@classmethod
128128
def from_test_case(cls, test_file: Path, test: unittest.TestCase):
129129
test_id = test.id()
130-
if type(test).id is not unittest.TestCase.id:
130+
if type(test).__name__ == '_ErrorHolder':
131+
if match := re.match(r'(\S+) \(([^)]+)\)', test_id):
132+
action = match.group(1)
133+
class_name = match.group(2)
134+
if 'Module' in action:
135+
test_id = f'<{action}>'
136+
else:
137+
test_id = f'{class_name}.<{action}>'
138+
elif type(test).id is not unittest.TestCase.id:
131139
# Qualify doctests so that we know what they are
132140
test_id = f'{type(test).__qualname__}.{test_id}'
133141
return cls(test_file, test_id)
@@ -215,7 +223,10 @@ def report_result(self, result: TestResult):
215223
pass
216224

217225
def make_result(self, test, status: TestStatus, **kwargs):
218-
return TestResult(status=status, test_id=self.test_id(test), duration=(time.time() - self.start_time), **kwargs)
226+
duration = 0.0
227+
if self.start_time:
228+
duration = time.time() - self.start_time
229+
return TestResult(status=status, test_id=self.test_id(test), duration=duration, **kwargs)
219230

220231
def addSuccess(self, test):
221232
super().addSuccess(test)
@@ -549,56 +560,88 @@ class SubprocessWorker:
549560
def __init__(self, runner: ParallelTestRunner, tests: list['Test']):
550561
self.runner = runner
551562
self.stop_event = runner.stop_event
563+
self.lock = threading.RLock()
552564
self.remaining_test_ids = [test.test_id for test in tests]
553565
self.tests_by_id = {test.test_id: test for test in tests}
554566
self.out_file: typing.TextIO | None = None
555-
self.last_started_test: Test | None = None
567+
self.last_started_test_id: TestId | None = None
556568
self.last_started_time: float | None = None
557569
self.last_out_pos = 0
570+
self.last_test_id_for_blame: TestId | None = None
558571
self.process: subprocess.Popen | None = None
559572
self.thread = None
560573

561574
def process_event(self, event):
575+
test_id = event['test']
562576
match event['event']:
563577
case 'testStarted':
564-
self.remaining_test_ids.remove(event['test'])
565-
self.runner.report_start(event['test'])
566-
self.last_started_test = self.tests_by_id[event['test']]
567-
self.last_started_time = time.time()
568-
self.last_out_pos = event['out_pos']
578+
self.remaining_test_ids.remove(test_id)
579+
self.runner.report_start(test_id)
580+
with self.lock:
581+
self.last_started_test_id = test_id
582+
self.last_started_time = time.time()
583+
self.last_out_pos = event['out_pos']
569584
case 'testResult':
585+
status = event['status']
570586
out_end = event['out_pos']
571587
test_output = ''
572588
if self.last_out_pos != out_end:
573589
self.out_file.seek(self.last_out_pos)
574590
test_output = self.out_file.read(out_end - self.last_out_pos)
575591
result = TestResult(
576-
test_id=event['test'],
577-
status=event['status'],
592+
test_id=test_id,
593+
status=status,
578594
param=event.get('param'),
579595
output=test_output,
580596
duration=event.get('duration'),
581597
)
582598
self.runner.report_result(result)
583-
self.last_started_test = None
584-
self.last_started_time = None
585-
self.last_out_pos = event['out_pos']
599+
with self.lock:
600+
self.last_started_test_id = None
601+
self.last_started_time = time.time() # Starts timeout for the following teardown/setup
602+
self.last_test_id_for_blame = test_id
603+
self.last_out_pos = event['out_pos']
604+
if test_id.test_name.endswith('>'):
605+
class_name = test_id.test_name[:test_id.test_name.find('<')]
606+
specifier = TestSpecifier(test_id.test_file, class_name or None)
607+
self.remaining_test_ids = [
608+
test for test in self.remaining_test_ids if not specifier.match(test_id)
609+
]
586610

587611
def get_status(self):
588-
if not self.process:
589-
process_status = "not started"
590-
elif self.process.poll() is not None:
591-
process_status = f"exitted with code {self.process.returncode}"
592-
else:
593-
process_status = "running"
594-
595-
if self.last_started_test is not None:
596-
duration = time.time() - self.last_started_time
597-
test_status = f"executing {self.last_started_test} for {duration:.2f}s"
598-
else:
599-
test_status = "no current test"
600-
remaining = len(self.remaining_test_ids)
601-
return f"test: {test_status}; remaining: {remaining}; process status: {process_status}"
612+
with self.lock:
613+
if not self.process:
614+
process_status = "not started"
615+
elif self.process.poll() is not None:
616+
process_status = f"exitted with code {self.process.returncode}"
617+
else:
618+
process_status = "running"
619+
620+
last_test_id = self.get_test_to_blame()
621+
if last_test_id is not None:
622+
if last_test_id is not self.last_started_test_id:
623+
last_test_id = f'{last_test_id} (approximate)'
624+
duration = time.time() - self.last_started_time
625+
test_status = f"executing {last_test_id} for {duration:.2f}s"
626+
else:
627+
test_status = "no current test"
628+
remaining = len(self.remaining_test_ids)
629+
return f"test: {test_status}; remaining: {remaining}; process status: {process_status}"
630+
631+
def get_test_to_blame(self):
632+
if self.last_started_test_id:
633+
return self.last_started_test_id
634+
# XXX unittest doesn't report module/class setups/teardowns, so if a test hard crashes or times out during
635+
# those, we can't tell which one is to blame. So we make a combined result for both as a last resort
636+
next_test_id = self.remaining_test_ids[0] if self.remaining_test_ids else None
637+
if self.last_test_id_for_blame is None:
638+
return TestId(next_test_id.test_file, f'<before> {next_test_id}')
639+
if next_test_id is None:
640+
return TestId(self.last_test_id_for_blame.test_file, f'<after> {self.last_test_id_for_blame}')
641+
return TestId(
642+
Path(''),
643+
f'<between> {self.last_test_id_for_blame} <and> {next_test_id}',
644+
)
602645

603646
def run_in_subprocess_and_watch(self):
604647
self.thread = threading.current_thread()
@@ -615,11 +658,13 @@ def run_in_subprocess_and_watch(self):
615658
result_file = tmp_dir / 'result'
616659

617660
while self.remaining_test_ids and not self.stop_event.is_set():
661+
last_remaining_count = len(self.remaining_test_ids)
618662
with (
619663
open(tmp_dir / 'out', 'w+') as self.out_file,
620664
open(tmp_dir / 'tests', 'w+') as tests_file,
621665
):
622666
self.last_out_pos = 0
667+
self.last_started_time = time.time()
623668
cmd = [
624669
sys.executable,
625670
'-u',
@@ -656,22 +701,26 @@ def run_in_subprocess_and_watch(self):
656701
if self.stop_event.is_set():
657702
interrupt_process(self.process)
658703
break
659-
if self.last_started_test is not None:
660-
timeout = self.last_started_test.test_file.test_config.per_test_timeout
661-
if time.time() - self.last_started_time >= timeout:
662-
interrupt_process(self.process)
663-
timed_out = True
664-
# Drain the pipe
665-
while pipe.poll(0.1):
666-
pipe.recv()
667-
break
704+
if self.last_started_test_id:
705+
last_started_test = self.tests_by_id.get(self.last_started_test_id)
706+
timeout = last_started_test.test_file.test_config.per_test_timeout
707+
else:
708+
timeout = self.runner.default_test_timeout
709+
710+
if time.time() - self.last_started_time >= timeout:
711+
interrupt_process(self.process)
712+
timed_out = True
713+
# Drain the pipe
714+
while pipe.poll(0.1):
715+
pipe.recv()
716+
break
668717
try:
669-
returncode = self.process.wait(60)
718+
self.process.wait(self.runner.default_test_timeout)
670719
except subprocess.TimeoutExpired:
671720
log("Warning: Worker didn't shutdown in a timely manner, interrupting it")
672721
interrupt_process(self.process)
673722

674-
self.process.wait()
723+
returncode = self.process.wait()
675724

676725
if self.stop_event.is_set():
677726
return
@@ -686,28 +735,38 @@ def run_in_subprocess_and_watch(self):
686735
if returncode != 0 or timed_out:
687736
self.out_file.seek(self.last_out_pos)
688737
output = self.out_file.read()
689-
if self.last_started_test:
690-
if timed_out:
691-
message = "Timed out"
692-
elif returncode >= 0:
693-
message = f"Test process exitted with code {returncode}"
694-
else:
695-
try:
696-
signal_name = signal.Signals(-returncode).name
697-
except ValueError:
698-
signal_name = str(-returncode)
699-
message = f"Test process killed by signal {signal_name}"
700-
self.runner.report_result(TestResult(
701-
test_id=self.last_started_test.test_id,
702-
status=TestStatus.ERROR,
703-
param=message,
704-
output=output,
705-
))
706-
continue
738+
if timed_out:
739+
message = "Timed out"
740+
elif returncode >= 0:
741+
message = f"Test process exitted with code {returncode}"
707742
else:
708-
# Crashed outside of tests, don't retry
709-
self.runner.crashes.append(output or 'Runner subprocess crashed')
710-
return
743+
try:
744+
signal_name = signal.Signals(-returncode).name
745+
except ValueError:
746+
signal_name = str(-returncode)
747+
message = f"Test process killed by signal {signal_name}"
748+
blame_id = self.get_test_to_blame()
749+
self.runner.report_result(TestResult(
750+
test_id=blame_id,
751+
status=TestStatus.ERROR,
752+
param=message,
753+
output=output,
754+
))
755+
if blame_id is not self.last_started_test_id:
756+
# If we're here, it means we didn't know exactly which test we were executing, we were
757+
# somewhere in between
758+
if self.last_test_id_for_blame:
759+
# Retry the same test again, if it crashes again, we would get into the else branch
760+
self.last_started_test_id = None
761+
self.last_test_id_for_blame = None
762+
continue
763+
else:
764+
# The current test caused the crash for sure, continue with the next
765+
if self.remaining_test_ids:
766+
del self.remaining_test_ids[0]
767+
self.last_started_test_id = None
768+
if last_remaining_count == len(self.remaining_test_ids):
769+
raise RuntimeError("Worker is not making progress")
711770

712771

713772
@dataclass
@@ -966,7 +1025,7 @@ def collect(all_specifiers: list[TestSpecifier], *, use_tags=False, ignore=None,
9661025
ignore = [path_for_comparison(i) for i in ignore]
9671026
test_files = [
9681027
test_file for test_file in test_files
969-
if any(path_for_comparison(test_file.path).is_relative_to(i) for i in ignore)
1028+
if not any(path_for_comparison(test_file.path).is_relative_to(i) for i in ignore)
9701029
]
9711030
if not no_excludes:
9721031
excluded, test_files = partition_list(test_files, lambda f: f.test_config.exclude)

0 commit comments

Comments
 (0)