Skip to content

Commit 4fbd836

Browse files
authored
[autorevert] Treat test retry success as SUCCESS in autorevert signal extraction (#7790)
When a test fails on first try but succeeds on retry within the same job, the HUD and job conclusion treat it as "flaky" (not "failure"). Align autorevert behavior: emit only SUCCESS when success_runs > 0, and filter out tests that retry successfully everywhere from signal creation entirely. Removes noise like this: <img width="502" height="496" alt="image" src="https://github.com/user-attachments/assets/70456275-4d80-4f09-870d-59a0ad88a282" /> The successful test retries: <img width="60" height="30" alt="image" src="https://github.com/user-attachments/assets/4a7e5857-f318-404b-8198-349160293cad" /> from the single job would be collapsed into a single success.
1 parent 5ffff92 commit 4fbd836

File tree

2 files changed

+121
-7
lines changed

2 files changed

+121
-7
lines changed

aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ def _build_test_signals(
339339

340340
tests_by_group_attempt[key] = outcome
341341

342-
# Track keys that have at least one failure across any shard
343-
if outcome.failure_runs > 0:
342+
# Track keys that have at least one persistent failure (no retry success)
343+
if outcome.failure_runs > 0 and outcome.success_runs == 0:
344344
failing_tests_by_job_base_name.add(
345345
(job.workflow_name, job_base_name, tr.test_id)
346346
)
@@ -393,20 +393,23 @@ def _build_test_signals(
393393
}
394394

395395
if outcome:
396-
# Emit at most one FAILURE and one SUCCESS per attempt
397-
if outcome.failure_runs > 0:
396+
# A successful retry means the test is not persistently
397+
# broken — treat it as SUCCESS, consistent with HUD and
398+
# job-level conclusion which consider retried-then-passed
399+
# tests as "flaky" (not "failure").
400+
if outcome.success_runs > 0:
398401
events.append(
399402
SignalEvent(
400-
status=SignalStatus.FAILURE,
403+
status=SignalStatus.SUCCESS,
401404
started_at=outcome.started_at,
402405
job_id=outcome.job_id,
403406
**event_common,
404407
)
405408
)
406-
if outcome.success_runs > 0:
409+
elif outcome.failure_runs > 0:
407410
events.append(
408411
SignalEvent(
409-
status=SignalStatus.SUCCESS,
412+
status=SignalStatus.FAILURE,
410413
started_at=outcome.started_at,
411414
job_id=outcome.job_id,
412415
**event_common,

aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,117 @@ def test_test_track_combines_shards_success_single_event(self):
687687
self.assertEqual(len(events), 1)
688688
self.assertEqual(events[0].status, SignalStatus.SUCCESS)
689689

690+
def test_test_retry_success_emits_only_success(self):
691+
# When a test fails on first try but succeeds on retry within the same
692+
# job, the aggregated outcome has both failure_runs > 0 and success_runs > 0.
693+
# Consistent with HUD (which classifies this as "flaky", not "failure")
694+
# and job conclusion (which is "success" when retry passes), we should
695+
# emit only a SUCCESS event.
696+
jobs = [
697+
# Newer commit: job fails overall (other test fails), but test_retry
698+
# passes on retry (failure_runs=1, success_runs=1)
699+
J(
700+
sha="R2",
701+
run=2000,
702+
job=500,
703+
attempt=1,
704+
started_at=ts(self.t0, 20),
705+
conclusion="failure",
706+
rule="pytest failure",
707+
),
708+
# Older commit: same test fails persistently (no retry success)
709+
J(
710+
sha="R1",
711+
run=2100,
712+
job=501,
713+
attempt=1,
714+
started_at=ts(self.t0, 10),
715+
conclusion="failure",
716+
rule="pytest failure",
717+
),
718+
]
719+
tests = [
720+
# R2: test failed then retried successfully
721+
T(
722+
job=500,
723+
run=2000,
724+
attempt=1,
725+
file="t.py",
726+
name="test_retry",
727+
failure_runs=1,
728+
success_runs=1,
729+
),
730+
# R1: test failed, no successful retry
731+
T(
732+
job=501,
733+
run=2100,
734+
attempt=1,
735+
file="t.py",
736+
name="test_retry",
737+
failure_runs=1,
738+
success_runs=0,
739+
),
740+
]
741+
signals = self._extract(jobs, tests)
742+
sig = self._find_test_signal(signals, "trunk", "t.py::test_retry")
743+
self.assertIsNotNone(sig)
744+
self.assertEqual([c.head_sha for c in sig.commits], ["R2", "R1"])
745+
# R2: retry passed → only SUCCESS (not FAILURE+SUCCESS)
746+
self.assertEqual(len(sig.commits[0].events), 1)
747+
self.assertEqual(sig.commits[0].events[0].status, SignalStatus.SUCCESS)
748+
# R1: no retry success → FAILURE
749+
self.assertEqual(len(sig.commits[1].events), 1)
750+
self.assertEqual(sig.commits[1].events[0].status, SignalStatus.FAILURE)
751+
752+
def test_test_retry_success_everywhere_produces_no_signal(self):
753+
# When a test retries successfully on every commit, it has no persistent
754+
# failures and should not produce a test signal at all.
755+
jobs = [
756+
J(
757+
sha="S2",
758+
run=3000,
759+
job=600,
760+
attempt=1,
761+
started_at=ts(self.t0, 20),
762+
conclusion="failure",
763+
rule="pytest failure",
764+
),
765+
J(
766+
sha="S1",
767+
run=3100,
768+
job=601,
769+
attempt=1,
770+
started_at=ts(self.t0, 10),
771+
conclusion="failure",
772+
rule="pytest failure",
773+
),
774+
]
775+
tests = [
776+
# Both commits: test fails then retries successfully
777+
T(
778+
job=600,
779+
run=3000,
780+
attempt=1,
781+
file="flaky.py",
782+
name="test_always_retries",
783+
failure_runs=1,
784+
success_runs=1,
785+
),
786+
T(
787+
job=601,
788+
run=3100,
789+
attempt=1,
790+
file="flaky.py",
791+
name="test_always_retries",
792+
failure_runs=1,
793+
success_runs=1,
794+
),
795+
]
796+
signals = self._extract(jobs, tests)
797+
self.assertIsNone(
798+
self._find_test_signal(signals, "trunk", "flaky.py::test_always_retries")
799+
)
800+
690801

691802
if __name__ == "__main__":
692803
unittest.main()

0 commit comments

Comments
 (0)