refactor(ci-insights): Use rerun instead of retry in flaky detection (#269)

remyduthu · web-flow · commit c7836ef1faa9 · 2025-11-14T10:26:44.000Z
The goal is to use a consistent naming with `reruns` instead of `retries` because we are not really retrying a test (e.g. if it succeeded on its first run), but rerunning it multiple times. Fixes: MRGFY-6076 Depends-On: #268
diff --git a/pytest_mergify/__init__.py b/pytest_mergify/__init__.py
@@ -206,7 +206,7 @@ def pytest_runtest_protocol(
         context = opentelemetry.trace.set_span_in_context(self.session_span)
 
         # Execute the initial protocol to register its duration, which lets us
-        # calculate the number of retries.
+        # calculate the number of reruns.
         with self.tracer.start_as_current_span(
             item.nodeid, attributes=attributes, context=context
         ):
@@ -216,7 +216,7 @@ def pytest_runtest_protocol(
             return True
 
         for _ in range(
-            self.mergify_ci.flaky_detector.get_retry_count_for_test(item.nodeid)
+            self.mergify_ci.flaky_detector.get_rerun_count_for_test(item.nodeid)
         ):
             with self.tracer.start_as_current_span(
                 item.nodeid, attributes=attributes, context=context
@@ -262,10 +262,10 @@ def pytest_runtest_teardown(
             return
 
         # The goal here is to keep only function-scoped finalizers during
-        # retries and restore higher-scoped finalizers only on the last retry.
+        # reruns and restore higher-scoped finalizers only on the last one.
         if (
             self.mergify_ci.flaky_detector.is_deadline_exceeded()
-            or self.mergify_ci.flaky_detector.is_last_retry_for_test(item.nodeid)
+            or self.mergify_ci.flaky_detector.is_last_rerun_for_test(item.nodeid)
         ):
             self.mergify_ci.flaky_detector.restore_item_finalizers(item)
         else:
diff --git a/pytest_mergify/flaky_detection.py b/pytest_mergify/flaky_detection.py
@@ -43,25 +43,25 @@ class _TestMetrics:
     "Represents the duration of the initial execution of the test."
 
     # NOTE(remyduthu): We need this flag because we may have processed a test
-    # without scheduling retries for it (e.g., because it was too slow).
+    # without scheduling reruns for it (e.g., because it was too slow).
     is_processed: bool = dataclasses.field(default=False)
 
-    retry_count: int = dataclasses.field(default=0)
-    "Represents the number of times the test has been retried so far."
+    rerun_count: int = dataclasses.field(default=0)
+    "Represents the number of times the test has been rerun so far."
 
-    scheduled_retry_count: int = dataclasses.field(default=0)
-    "Represents the number of retries that have been scheduled for this test depending on the budget."
+    scheduled_rerun_count: int = dataclasses.field(default=0)
+    "Represents the number of reruns that have been scheduled for this test depending on the budget."
 
     total_duration: datetime.timedelta = dataclasses.field(
         default_factory=datetime.timedelta
     )
-    "Represents the total duration spent executing this test, including retries."
+    "Represents the total duration spent executing this test, including reruns."
 
     def add_duration(self, duration: datetime.timedelta) -> None:
         if not self.initial_duration:
             self.initial_duration = duration
 
-        self.retry_count += 1
+        self.rerun_count += 1
         self.total_duration += duration
 
 
@@ -102,10 +102,10 @@ class FlakyDetector:
             <session>: [(finalizer_fn, ...), exception_info]        # Session scope.
         }
 
-    When retrying a test, we want to:
+    When rerunning a test, we want to:
 
-    - Tear down and re-setup function-scoped fixtures for each retry.
-    - Keep higher-scoped fixtures alive across all retries.
+    - Tear down and re-setup function-scoped fixtures for each rerun.
+    - Keep higher-scoped fixtures alive across all reruns.
 
     This approach is inspired by pytest-rerunfailures:
     https://github.com/pytest-dev/pytest-rerunfailures/blob/master/src/pytest_rerunfailures.py#L503-L542
@@ -169,7 +169,7 @@ def filter_context_tests_with_session(self, session: _pytest.main.Session) -> No
             test for test in self._context.unhealthy_test_names if test in session_tests
         ]
 
-    def get_retry_count_for_test(self, test: str) -> int:
+    def get_rerun_count_for_test(self, test: str) -> int:
         metrics = self._test_metrics.get(test)
         if not metrics:
             return 0
@@ -186,7 +186,7 @@ def get_retry_count_for_test(self, test: str) -> int:
         if result < self._context.min_test_execution_count:
             return 0
 
-        metrics.scheduled_retry_count = result
+        metrics.scheduled_rerun_count = result
 
         return result
 
@@ -216,42 +216,42 @@ def make_report(self) -> str:
 
             return result
 
-        total_retry_duration_seconds = sum(
+        total_rerun_duration_seconds = sum(
             metrics.total_duration.total_seconds()
             for metrics in self._test_metrics.values()
         )
         budget_duration_seconds = self._get_budget_duration().total_seconds()
         result += (
-            f"{os.linesep}- Used {total_retry_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
-            f"({total_retry_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
+            f"{os.linesep}- Used {total_rerun_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
+            f"({total_rerun_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
         )
 
         result += (
             f"{os.linesep}- Active for {len(self._test_metrics)} {self.mode} "
             f"test{'s' if len(self._test_metrics) > 1 else ''}:"
         )
         for test, metrics in self._test_metrics.items():
-            if metrics.scheduled_retry_count == 0:
+            if metrics.scheduled_rerun_count == 0:
                 result += (
                     f"{os.linesep}    • '{test}' is too slow to be tested at least "
                     f"{self._context.min_test_execution_count} times within the budget"
                 )
                 continue
 
-            if metrics.retry_count < metrics.scheduled_retry_count:
+            if metrics.rerun_count < metrics.scheduled_rerun_count:
                 result += (
-                    f"{os.linesep}    • '{test}' has been tested only {metrics.retry_count} "
-                    f"time{'s' if metrics.retry_count > 1 else ''} instead of {metrics.scheduled_retry_count} "
-                    f"time{'s' if metrics.scheduled_retry_count > 1 else ''} to avoid exceeding the budget"
+                    f"{os.linesep}    • '{test}' has been tested only {metrics.rerun_count} "
+                    f"time{'s' if metrics.rerun_count > 1 else ''} instead of {metrics.scheduled_rerun_count} "
+                    f"time{'s' if metrics.scheduled_rerun_count > 1 else ''} to avoid exceeding the budget"
                 )
                 continue
 
-            retry_duration_seconds = metrics.total_duration.total_seconds()
+            rerun_duration_seconds = metrics.total_duration.total_seconds()
             result += (
-                f"{os.linesep}    • '{test}' has been tested {metrics.retry_count} "
-                f"time{'s' if metrics.retry_count > 1 else ''} using approx. "
-                f"{retry_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
-                f"({retry_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
+                f"{os.linesep}    • '{test}' has been tested {metrics.rerun_count} "
+                f"time{'s' if metrics.rerun_count > 1 else ''} using approx. "
+                f"{rerun_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
+                f"({rerun_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
             )
 
         return result
@@ -263,17 +263,17 @@ def set_deadline(self) -> None:
             + self._get_budget_duration()
         )
 
-    def is_last_retry_for_test(self, test: str) -> bool:
-        "Returns true if the given test exists and this is its last retry."
+    def is_last_rerun_for_test(self, test: str) -> bool:
+        "Returns true if the given test exists and this is its last rerun."
 
         metrics = self._test_metrics.get(test)
         if not metrics:
             return False
 
         return (
-            metrics.scheduled_retry_count != 0
-            and metrics.scheduled_retry_count + 1  # Add the initial execution.
-            == metrics.retry_count
+            metrics.scheduled_rerun_count != 0
+            and metrics.scheduled_rerun_count + 1  # Add the initial execution.
+            == metrics.rerun_count
         )
 
     def suspend_item_finalizers(self, item: _pytest.nodes.Item) -> None:
diff --git a/tests/test_ci_insights.py b/tests/test_ci_insights.py
@@ -284,7 +284,7 @@ def test_quux():
 
     assert spans is not None
 
-    # 1 span for the session and one per test, including 1000 retries for each unhealthy test.
+    # 1 span for the session and one per test, including 1000 reruns for each unhealthy test.
     assert len(spans) == 1 + 3005
 
     unhealthy_tests = [
@@ -356,11 +356,11 @@ def test_second():
 
         def test_last():
             # This test validates that fixtures are properly set up and torn down
-            # during test retries. With 3 tests total (test_first, test_second, test_last)
-            # where test_second is new and gets retried 1000 times:
-            # - SETUP_COUNT should be 1003 (1 initial run per test + 1000 retries of test_second)
+            # during test reruns. With 3 tests total (test_first, test_second, test_last)
+            # where test_second is new and gets reran 1000 times:
+            # - SETUP_COUNT should be 1003 (1 initial run per test + 1000 reruns of test_second)
             # - TEARDOWN_COUNT should be 1002 (all tests complete except test_last which is currently running)
-            # This ensures that function-scoped fixtures execute fresh for each retry,
+            # This ensures that function-scoped fixtures execute fresh for each rerun,
             # while session-scoped fixtures run only once (validated by SESSION_ALREADY_SET).
             global SETUP_COUNT, TEARDOWN_COUNT
             assert SETUP_COUNT == 1003
@@ -449,16 +449,16 @@ async def test_bar():
 
 
 @responses.activate
-def test_flaky_detection_slow_test_not_retried(
+def test_flaky_detection_slow_test_not_reran(
     monkeypatch: pytest.MonkeyPatch,
     pytester: _pytest.pytester.Pytester,
 ) -> None:
-    "Test that a slow test is not retried when it can't reach 5 within the budget."
+    "Test that a slow test is not reran when it can't reach 5 within the budget."
     _set_test_environment(monkeypatch)
     _make_quarantine_mock()
     _make_flaky_detection_context_mock(
         existing_test_names=[
-            "test_flaky_detection_slow_test_not_retried.py::test_existing",
+            "test_flaky_detection_slow_test_not_reran.py::test_existing",
         ],
         min_test_execution_count=5,
     )
@@ -497,29 +497,29 @@ def test_slow():
 
     # `test_fast` should have been tested successfully.
     assert re.search(
-        r"'test_flaky_detection_slow_test_not_retried\.py::test_fast' has been tested \d+ times",
+        r"'test_flaky_detection_slow_test_not_reran\.py::test_fast' has been tested \d+ times",
         result.stdout.str(),
     )
 
     assert (
-        "'test_flaky_detection_slow_test_not_retried.py::test_slow' is too slow to be tested at least 5 times within the budget"
+        "'test_flaky_detection_slow_test_not_reran.py::test_slow' is too slow to be tested at least 5 times within the budget"
         in result.stdout.str()
     )
 
 
 @responses.activate
-def test_flaky_detection_budget_deadline_stops_retries(
+def test_flaky_detection_budget_deadline_stops_reruns(
     monkeypatch: pytest.MonkeyPatch,
     pytester: _pytest.pytester.Pytester,
 ) -> None:
     """
-    Test that retries are stopped when they would exceed the budget deadline.
+    Test that reruns are stopped when they would exceed the budget deadline.
     """
     _set_test_environment(monkeypatch)
     _make_quarantine_mock()
     _make_flaky_detection_context_mock(
         existing_test_names=[
-            "test_flaky_detection_budget_deadline_stops_retries.py::test_existing",
+            "test_flaky_detection_budget_deadline_stops_reruns.py::test_existing",
         ]
     )
 
@@ -564,11 +564,11 @@ def test_new():
     # We should have:
     # - 1 execution of `test_existing`,
     # - 1 initial execution of `test_new`,
-    # - Only 8 retries of `test_new` before the deadline is reached.
+    # - Only 8 reruns of `test_new` before the deadline is reached.
     result.assert_outcomes(passed=10)
 
     assert re.search(
-        r"'test_flaky_detection_budget_deadline_stops_retries\.py::test_new' has been tested only \d+ times instead of \d+ times to avoid exceeding the budget",
+        r"'test_flaky_detection_budget_deadline_stops_reruns\.py::test_new' has been tested only \d+ times instead of \d+ times to avoid exceeding the budget",
         result.stdout.str(),
     )
 
diff --git a/tests/test_flaky_detection.py b/tests/test_flaky_detection.py
@@ -70,7 +70,7 @@ def test_flaky_detector_count_remaining_new_tests() -> None:
 
 
 @freezegun.freeze_time(_NOW)
-def test_flaky_detector_get_retry_count_for_new_tests() -> None:
+def test_flaky_detector_get_rerun_count_for_new_tests() -> None:
     detector = InitializedFlakyDetector()
     detector._context = _make_flaky_detection_context(
         min_test_execution_count=5,
@@ -89,11 +89,11 @@ def test_flaky_detector_get_retry_count_for_new_tests() -> None:
     }
     detector.set_deadline()
 
-    assert detector.get_retry_count_for_test("bar") == 20
+    assert detector.get_rerun_count_for_test("bar") == 20
 
 
 @freezegun.freeze_time(_NOW)
-def test_flaky_detector_get_retry_count_for_new_tests_with_slow_test() -> None:
+def test_flaky_detector_get_rerun_count_for_new_tests_with_slow_test() -> None:
     detector = InitializedFlakyDetector()
     detector._context = _make_flaky_detection_context(
         min_test_execution_count=5,
@@ -102,7 +102,7 @@ def test_flaky_detector_get_retry_count_for_new_tests_with_slow_test() -> None:
     )
     detector._test_metrics = {
         "foo": flaky_detection._TestMetrics(
-            # Can't be retried 5 times within the budget.
+            # Can't be reran 5 times within the budget.
             initial_duration=datetime.timedelta(seconds=1),
         ),
         "bar": flaky_detection._TestMetrics(
@@ -112,13 +112,13 @@ def test_flaky_detector_get_retry_count_for_new_tests_with_slow_test() -> None:
     }
     detector.set_deadline()
 
-    assert detector.get_retry_count_for_test("foo") == 0
+    assert detector.get_rerun_count_for_test("foo") == 0
 
-    assert detector.get_retry_count_for_test("bar") == 500
+    assert detector.get_rerun_count_for_test("bar") == 500
 
 
 @freezegun.freeze_time(_NOW)
-def test_flaky_detector_get_retry_count_for_new_tests_with_fast_test() -> None:
+def test_flaky_detector_get_rerun_count_for_new_tests_with_fast_test() -> None:
     detector = InitializedFlakyDetector()
     detector._context = _make_flaky_detection_context(
         min_test_execution_count=5,
@@ -127,10 +127,10 @@ def test_flaky_detector_get_retry_count_for_new_tests_with_fast_test() -> None:
     )
     detector._test_metrics = {
         "foo": flaky_detection._TestMetrics(
-            # Should only be retried 1000 times, freeing the rest of the budget for other tests.
+            # Should only be reran 1000 times, freeing the rest of the budget for other tests.
             initial_duration=datetime.timedelta(milliseconds=1),
         ),
     }
     detector.set_deadline()
 
-    assert detector.get_retry_count_for_test("foo") == 1000
+    assert detector.get_rerun_count_for_test("foo") == 1000
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
@@ -3,8 +3,7 @@
 from _pytest.pytester import Pytester
 
 import pytest_mergify
-
-from . import conftest
+from tests import conftest
 
 
 def test_plugin_is_loaded(pytestconfig: _pytest.config.Config) -> None: