Skip to content

Commit c7836ef

Browse files
authored
refactor(ci-insights): Use rerun instead of retry in flaky detection (#269)
The goal is to use a consistent naming with `reruns` instead of `retries` because we are not really retrying a test (e.g. if it succeeded on its first run), but rerunning it multiple times. Fixes: MRGFY-6076 Depends-On: #268
1 parent 9a339af commit c7836ef

File tree

5 files changed

+59
-60
lines changed

5 files changed

+59
-60
lines changed

pytest_mergify/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def pytest_runtest_protocol(
206206
context = opentelemetry.trace.set_span_in_context(self.session_span)
207207

208208
# Execute the initial protocol to register its duration, which lets us
209-
# calculate the number of retries.
209+
# calculate the number of reruns.
210210
with self.tracer.start_as_current_span(
211211
item.nodeid, attributes=attributes, context=context
212212
):
@@ -216,7 +216,7 @@ def pytest_runtest_protocol(
216216
return True
217217

218218
for _ in range(
219-
self.mergify_ci.flaky_detector.get_retry_count_for_test(item.nodeid)
219+
self.mergify_ci.flaky_detector.get_rerun_count_for_test(item.nodeid)
220220
):
221221
with self.tracer.start_as_current_span(
222222
item.nodeid, attributes=attributes, context=context
@@ -262,10 +262,10 @@ def pytest_runtest_teardown(
262262
return
263263

264264
# The goal here is to keep only function-scoped finalizers during
265-
# retries and restore higher-scoped finalizers only on the last retry.
265+
# reruns and restore higher-scoped finalizers only on the last one.
266266
if (
267267
self.mergify_ci.flaky_detector.is_deadline_exceeded()
268-
or self.mergify_ci.flaky_detector.is_last_retry_for_test(item.nodeid)
268+
or self.mergify_ci.flaky_detector.is_last_rerun_for_test(item.nodeid)
269269
):
270270
self.mergify_ci.flaky_detector.restore_item_finalizers(item)
271271
else:

pytest_mergify/flaky_detection.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -43,25 +43,25 @@ class _TestMetrics:
4343
"Represents the duration of the initial execution of the test."
4444

4545
# NOTE(remyduthu): We need this flag because we may have processed a test
46-
# without scheduling retries for it (e.g., because it was too slow).
46+
# without scheduling reruns for it (e.g., because it was too slow).
4747
is_processed: bool = dataclasses.field(default=False)
4848

49-
retry_count: int = dataclasses.field(default=0)
50-
"Represents the number of times the test has been retried so far."
49+
rerun_count: int = dataclasses.field(default=0)
50+
"Represents the number of times the test has been rerun so far."
5151

52-
scheduled_retry_count: int = dataclasses.field(default=0)
53-
"Represents the number of retries that have been scheduled for this test depending on the budget."
52+
scheduled_rerun_count: int = dataclasses.field(default=0)
53+
"Represents the number of reruns that have been scheduled for this test depending on the budget."
5454

5555
total_duration: datetime.timedelta = dataclasses.field(
5656
default_factory=datetime.timedelta
5757
)
58-
"Represents the total duration spent executing this test, including retries."
58+
"Represents the total duration spent executing this test, including reruns."
5959

6060
def add_duration(self, duration: datetime.timedelta) -> None:
6161
if not self.initial_duration:
6262
self.initial_duration = duration
6363

64-
self.retry_count += 1
64+
self.rerun_count += 1
6565
self.total_duration += duration
6666

6767

@@ -102,10 +102,10 @@ class FlakyDetector:
102102
<session>: [(finalizer_fn, ...), exception_info] # Session scope.
103103
}
104104
105-
When retrying a test, we want to:
105+
When rerunning a test, we want to:
106106
107-
- Tear down and re-setup function-scoped fixtures for each retry.
108-
- Keep higher-scoped fixtures alive across all retries.
107+
- Tear down and re-setup function-scoped fixtures for each rerun.
108+
- Keep higher-scoped fixtures alive across all reruns.
109109
110110
This approach is inspired by pytest-rerunfailures:
111111
https://github.com/pytest-dev/pytest-rerunfailures/blob/master/src/pytest_rerunfailures.py#L503-L542
@@ -169,7 +169,7 @@ def filter_context_tests_with_session(self, session: _pytest.main.Session) -> No
169169
test for test in self._context.unhealthy_test_names if test in session_tests
170170
]
171171

172-
def get_retry_count_for_test(self, test: str) -> int:
172+
def get_rerun_count_for_test(self, test: str) -> int:
173173
metrics = self._test_metrics.get(test)
174174
if not metrics:
175175
return 0
@@ -186,7 +186,7 @@ def get_retry_count_for_test(self, test: str) -> int:
186186
if result < self._context.min_test_execution_count:
187187
return 0
188188

189-
metrics.scheduled_retry_count = result
189+
metrics.scheduled_rerun_count = result
190190

191191
return result
192192

@@ -216,42 +216,42 @@ def make_report(self) -> str:
216216

217217
return result
218218

219-
total_retry_duration_seconds = sum(
219+
total_rerun_duration_seconds = sum(
220220
metrics.total_duration.total_seconds()
221221
for metrics in self._test_metrics.values()
222222
)
223223
budget_duration_seconds = self._get_budget_duration().total_seconds()
224224
result += (
225-
f"{os.linesep}- Used {total_retry_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
226-
f"({total_retry_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
225+
f"{os.linesep}- Used {total_rerun_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
226+
f"({total_rerun_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
227227
)
228228

229229
result += (
230230
f"{os.linesep}- Active for {len(self._test_metrics)} {self.mode} "
231231
f"test{'s' if len(self._test_metrics) > 1 else ''}:"
232232
)
233233
for test, metrics in self._test_metrics.items():
234-
if metrics.scheduled_retry_count == 0:
234+
if metrics.scheduled_rerun_count == 0:
235235
result += (
236236
f"{os.linesep} • '{test}' is too slow to be tested at least "
237237
f"{self._context.min_test_execution_count} times within the budget"
238238
)
239239
continue
240240

241-
if metrics.retry_count < metrics.scheduled_retry_count:
241+
if metrics.rerun_count < metrics.scheduled_rerun_count:
242242
result += (
243-
f"{os.linesep} • '{test}' has been tested only {metrics.retry_count} "
244-
f"time{'s' if metrics.retry_count > 1 else ''} instead of {metrics.scheduled_retry_count} "
245-
f"time{'s' if metrics.scheduled_retry_count > 1 else ''} to avoid exceeding the budget"
243+
f"{os.linesep} • '{test}' has been tested only {metrics.rerun_count} "
244+
f"time{'s' if metrics.rerun_count > 1 else ''} instead of {metrics.scheduled_rerun_count} "
245+
f"time{'s' if metrics.scheduled_rerun_count > 1 else ''} to avoid exceeding the budget"
246246
)
247247
continue
248248

249-
retry_duration_seconds = metrics.total_duration.total_seconds()
249+
rerun_duration_seconds = metrics.total_duration.total_seconds()
250250
result += (
251-
f"{os.linesep} • '{test}' has been tested {metrics.retry_count} "
252-
f"time{'s' if metrics.retry_count > 1 else ''} using approx. "
253-
f"{retry_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
254-
f"({retry_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
251+
f"{os.linesep} • '{test}' has been tested {metrics.rerun_count} "
252+
f"time{'s' if metrics.rerun_count > 1 else ''} using approx. "
253+
f"{rerun_duration_seconds / budget_duration_seconds * 100:.2f} % of the budget "
254+
f"({rerun_duration_seconds:.2f} s/{budget_duration_seconds:.2f} s)"
255255
)
256256

257257
return result
@@ -263,17 +263,17 @@ def set_deadline(self) -> None:
263263
+ self._get_budget_duration()
264264
)
265265

266-
def is_last_retry_for_test(self, test: str) -> bool:
267-
"Returns true if the given test exists and this is its last retry."
266+
def is_last_rerun_for_test(self, test: str) -> bool:
267+
"Returns true if the given test exists and this is its last rerun."
268268

269269
metrics = self._test_metrics.get(test)
270270
if not metrics:
271271
return False
272272

273273
return (
274-
metrics.scheduled_retry_count != 0
275-
and metrics.scheduled_retry_count + 1 # Add the initial execution.
276-
== metrics.retry_count
274+
metrics.scheduled_rerun_count != 0
275+
and metrics.scheduled_rerun_count + 1 # Add the initial execution.
276+
== metrics.rerun_count
277277
)
278278

279279
def suspend_item_finalizers(self, item: _pytest.nodes.Item) -> None:

tests/test_ci_insights.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def test_quux():
284284

285285
assert spans is not None
286286

287-
# 1 span for the session and one per test, including 1000 retries for each unhealthy test.
287+
# 1 span for the session and one per test, including 1000 reruns for each unhealthy test.
288288
assert len(spans) == 1 + 3005
289289

290290
unhealthy_tests = [
@@ -356,11 +356,11 @@ def test_second():
356356
357357
def test_last():
358358
# This test validates that fixtures are properly set up and torn down
359-
# during test retries. With 3 tests total (test_first, test_second, test_last)
360-
# where test_second is new and gets retried 1000 times:
361-
# - SETUP_COUNT should be 1003 (1 initial run per test + 1000 retries of test_second)
359+
# during test reruns. With 3 tests total (test_first, test_second, test_last)
360+
# where test_second is new and gets reran 1000 times:
361+
# - SETUP_COUNT should be 1003 (1 initial run per test + 1000 reruns of test_second)
362362
# - TEARDOWN_COUNT should be 1002 (all tests complete except test_last which is currently running)
363-
# This ensures that function-scoped fixtures execute fresh for each retry,
363+
# This ensures that function-scoped fixtures execute fresh for each rerun,
364364
# while session-scoped fixtures run only once (validated by SESSION_ALREADY_SET).
365365
global SETUP_COUNT, TEARDOWN_COUNT
366366
assert SETUP_COUNT == 1003
@@ -449,16 +449,16 @@ async def test_bar():
449449

450450

451451
@responses.activate
452-
def test_flaky_detection_slow_test_not_retried(
452+
def test_flaky_detection_slow_test_not_reran(
453453
monkeypatch: pytest.MonkeyPatch,
454454
pytester: _pytest.pytester.Pytester,
455455
) -> None:
456-
"Test that a slow test is not retried when it can't reach 5 within the budget."
456+
"Test that a slow test is not reran when it can't reach 5 within the budget."
457457
_set_test_environment(monkeypatch)
458458
_make_quarantine_mock()
459459
_make_flaky_detection_context_mock(
460460
existing_test_names=[
461-
"test_flaky_detection_slow_test_not_retried.py::test_existing",
461+
"test_flaky_detection_slow_test_not_reran.py::test_existing",
462462
],
463463
min_test_execution_count=5,
464464
)
@@ -497,29 +497,29 @@ def test_slow():
497497

498498
# `test_fast` should have been tested successfully.
499499
assert re.search(
500-
r"'test_flaky_detection_slow_test_not_retried\.py::test_fast' has been tested \d+ times",
500+
r"'test_flaky_detection_slow_test_not_reran\.py::test_fast' has been tested \d+ times",
501501
result.stdout.str(),
502502
)
503503

504504
assert (
505-
"'test_flaky_detection_slow_test_not_retried.py::test_slow' is too slow to be tested at least 5 times within the budget"
505+
"'test_flaky_detection_slow_test_not_reran.py::test_slow' is too slow to be tested at least 5 times within the budget"
506506
in result.stdout.str()
507507
)
508508

509509

510510
@responses.activate
511-
def test_flaky_detection_budget_deadline_stops_retries(
511+
def test_flaky_detection_budget_deadline_stops_reruns(
512512
monkeypatch: pytest.MonkeyPatch,
513513
pytester: _pytest.pytester.Pytester,
514514
) -> None:
515515
"""
516-
Test that retries are stopped when they would exceed the budget deadline.
516+
Test that reruns are stopped when they would exceed the budget deadline.
517517
"""
518518
_set_test_environment(monkeypatch)
519519
_make_quarantine_mock()
520520
_make_flaky_detection_context_mock(
521521
existing_test_names=[
522-
"test_flaky_detection_budget_deadline_stops_retries.py::test_existing",
522+
"test_flaky_detection_budget_deadline_stops_reruns.py::test_existing",
523523
]
524524
)
525525

@@ -564,11 +564,11 @@ def test_new():
564564
# We should have:
565565
# - 1 execution of `test_existing`,
566566
# - 1 initial execution of `test_new`,
567-
# - Only 8 retries of `test_new` before the deadline is reached.
567+
# - Only 8 reruns of `test_new` before the deadline is reached.
568568
result.assert_outcomes(passed=10)
569569

570570
assert re.search(
571-
r"'test_flaky_detection_budget_deadline_stops_retries\.py::test_new' has been tested only \d+ times instead of \d+ times to avoid exceeding the budget",
571+
r"'test_flaky_detection_budget_deadline_stops_reruns\.py::test_new' has been tested only \d+ times instead of \d+ times to avoid exceeding the budget",
572572
result.stdout.str(),
573573
)
574574

tests/test_flaky_detection.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def test_flaky_detector_count_remaining_new_tests() -> None:
7070

7171

7272
@freezegun.freeze_time(_NOW)
73-
def test_flaky_detector_get_retry_count_for_new_tests() -> None:
73+
def test_flaky_detector_get_rerun_count_for_new_tests() -> None:
7474
detector = InitializedFlakyDetector()
7575
detector._context = _make_flaky_detection_context(
7676
min_test_execution_count=5,
@@ -89,11 +89,11 @@ def test_flaky_detector_get_retry_count_for_new_tests() -> None:
8989
}
9090
detector.set_deadline()
9191

92-
assert detector.get_retry_count_for_test("bar") == 20
92+
assert detector.get_rerun_count_for_test("bar") == 20
9393

9494

9595
@freezegun.freeze_time(_NOW)
96-
def test_flaky_detector_get_retry_count_for_new_tests_with_slow_test() -> None:
96+
def test_flaky_detector_get_rerun_count_for_new_tests_with_slow_test() -> None:
9797
detector = InitializedFlakyDetector()
9898
detector._context = _make_flaky_detection_context(
9999
min_test_execution_count=5,
@@ -102,7 +102,7 @@ def test_flaky_detector_get_retry_count_for_new_tests_with_slow_test() -> None:
102102
)
103103
detector._test_metrics = {
104104
"foo": flaky_detection._TestMetrics(
105-
# Can't be retried 5 times within the budget.
105+
# Can't be reran 5 times within the budget.
106106
initial_duration=datetime.timedelta(seconds=1),
107107
),
108108
"bar": flaky_detection._TestMetrics(
@@ -112,13 +112,13 @@ def test_flaky_detector_get_retry_count_for_new_tests_with_slow_test() -> None:
112112
}
113113
detector.set_deadline()
114114

115-
assert detector.get_retry_count_for_test("foo") == 0
115+
assert detector.get_rerun_count_for_test("foo") == 0
116116

117-
assert detector.get_retry_count_for_test("bar") == 500
117+
assert detector.get_rerun_count_for_test("bar") == 500
118118

119119

120120
@freezegun.freeze_time(_NOW)
121-
def test_flaky_detector_get_retry_count_for_new_tests_with_fast_test() -> None:
121+
def test_flaky_detector_get_rerun_count_for_new_tests_with_fast_test() -> None:
122122
detector = InitializedFlakyDetector()
123123
detector._context = _make_flaky_detection_context(
124124
min_test_execution_count=5,
@@ -127,10 +127,10 @@ def test_flaky_detector_get_retry_count_for_new_tests_with_fast_test() -> None:
127127
)
128128
detector._test_metrics = {
129129
"foo": flaky_detection._TestMetrics(
130-
# Should only be retried 1000 times, freeing the rest of the budget for other tests.
130+
# Should only be reran 1000 times, freeing the rest of the budget for other tests.
131131
initial_duration=datetime.timedelta(milliseconds=1),
132132
),
133133
}
134134
detector.set_deadline()
135135

136-
assert detector.get_retry_count_for_test("foo") == 1000
136+
assert detector.get_rerun_count_for_test("foo") == 1000

tests/test_plugin.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from _pytest.pytester import Pytester
44

55
import pytest_mergify
6-
7-
from . import conftest
6+
from tests import conftest
87

98

109
def test_plugin_is_loaded(pytestconfig: _pytest.config.Config) -> None:

0 commit comments

Comments
 (0)