Skip to content

Commit 753ea41

Browse files
authored
fix(ci-insights): Use all phases to measure duration of a test run (#276)
The goal of this change is to precisely measure duration of a test run by including all the phases (setup, call, teardown) instead of using only the call phase. We'll be better at computing the budget with this improvement. Fixes: MRGFY-6111
1 parent d06e2b3 commit 753ea41

File tree

2 files changed

+73
-16
lines changed

2 files changed

+73
-16
lines changed

pytest_mergify/flaky_detection.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,27 @@ def min_budget_duration(self) -> datetime.timedelta:
3737
class _TestMetrics:
3838
"Represents metrics collected for a test."
3939

40-
initial_duration: datetime.timedelta = dataclasses.field(
40+
initial_setup_duration: datetime.timedelta = dataclasses.field(
4141
default_factory=datetime.timedelta
4242
)
43-
"Represents the duration of the initial execution of the test."
43+
initial_call_duration: datetime.timedelta = dataclasses.field(
44+
default_factory=datetime.timedelta
45+
)
46+
initial_teardown_duration: datetime.timedelta = dataclasses.field(
47+
default_factory=datetime.timedelta
48+
)
49+
50+
@property
51+
def initial_duration(self) -> datetime.timedelta:
52+
"""
53+
Represents the duration of the initial run of the test including the 3
54+
phases of the protocol (setup, call, teardown).
55+
"""
56+
return (
57+
self.initial_setup_duration
58+
+ self.initial_call_duration
59+
+ self.initial_teardown_duration
60+
)
4461

4562
# NOTE(remyduthu): We need this flag because we may have processed a test
4663
# without scheduling reruns for it (e.g., because it was too slow).
@@ -59,11 +76,19 @@ class _TestMetrics:
5976
)
6077
"Represents the total duration spent executing this test, including reruns."
6178

62-
def add_duration(self, duration: datetime.timedelta) -> None:
63-
if not self.initial_duration:
64-
self.initial_duration = duration
79+
def fill_from_report(self, report: _pytest.reports.TestReport) -> None:
80+
duration = datetime.timedelta(seconds=report.duration)
81+
82+
if report.when == "setup" and not self.initial_setup_duration:
83+
self.initial_setup_duration = duration
84+
elif report.when == "call" and not self.initial_call_duration:
85+
self.initial_call_duration = duration
86+
elif report.when == "teardown" and not self.initial_teardown_duration:
87+
self.initial_teardown_duration = duration
88+
89+
if report.when == "call":
90+
self.rerun_count += 1
6591

66-
self.rerun_count += 1
6792
self.total_duration += duration
6893

6994
def expected_duration(self) -> datetime.timedelta:
@@ -141,9 +166,6 @@ def _fetch_context(self) -> _FlakyDetectionContext:
141166
return result
142167

143168
def detect_from_report(self, report: _pytest.reports.TestReport) -> bool:
144-
if report.when != "call":
145-
return False
146-
147169
if report.outcome not in ["failed", "passed"]:
148170
return False
149171

@@ -161,7 +183,7 @@ def detect_from_report(self, report: _pytest.reports.TestReport) -> bool:
161183
return False
162184

163185
metrics = self._test_metrics.setdefault(test, _TestMetrics())
164-
metrics.add_duration(datetime.timedelta(seconds=report.duration))
186+
metrics.fill_from_report(report)
165187

166188
return True
167189

tests/test_flaky_detection.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import datetime
22
import typing
33

4+
import _pytest
5+
import _pytest.reports
46
import freezegun
57

68
from pytest_mergify import flaky_detection
@@ -22,6 +24,7 @@ def __init__(self) -> None:
2224
self.url = ""
2325
self.full_repository_name = ""
2426
self.mode = "new"
27+
self._test_metrics = {}
2528

2629
def __post_init__(self) -> None:
2730
pass
@@ -59,6 +62,38 @@ def test_flaky_detector_get_duration_before_deadline() -> None:
5962
assert detector._get_duration_before_deadline() == datetime.timedelta(seconds=10)
6063

6164

65+
def test_flaky_detector_detect_from_report() -> None:
66+
def make_report(
67+
nodeid: str, when: typing.Literal["setup", "call", "teardown"], duration: float
68+
) -> _pytest.reports.TestReport:
69+
return _pytest.reports.TestReport(
70+
duration=duration,
71+
keywords={},
72+
location=("", None, ""),
73+
longrepr=None,
74+
nodeid=nodeid,
75+
outcome="passed",
76+
when=when,
77+
)
78+
79+
detector = InitializedFlakyDetector()
80+
detector._context = _make_flaky_detection_context(max_test_name_length=100)
81+
82+
detector.detect_from_report(make_report(nodeid="foo", when="setup", duration=1))
83+
detector.detect_from_report(make_report(nodeid="foo", when="call", duration=2))
84+
detector.detect_from_report(make_report(nodeid="foo", when="teardown", duration=3))
85+
86+
detector.detect_from_report(make_report(nodeid="foo", when="setup", duration=4))
87+
detector.detect_from_report(make_report(nodeid="foo", when="call", duration=5))
88+
detector.detect_from_report(make_report(nodeid="foo", when="teardown", duration=6))
89+
90+
metrics = detector._test_metrics.get("foo")
91+
assert metrics is not None
92+
assert metrics.initial_duration == datetime.timedelta(seconds=6)
93+
assert metrics.rerun_count == 2
94+
assert metrics.total_duration == datetime.timedelta(seconds=21)
95+
96+
6297
def test_flaky_detector_count_remaining_tests() -> None:
6398
detector = InitializedFlakyDetector()
6499
detector._test_metrics = {
@@ -79,11 +114,11 @@ def test_flaky_detector_get_rerun_count_for_test() -> None:
79114
)
80115
detector._test_metrics = {
81116
"foo": flaky_detection._TestMetrics(
82-
initial_duration=datetime.timedelta(milliseconds=10),
117+
initial_call_duration=datetime.timedelta(milliseconds=10),
83118
is_processed=True,
84119
),
85120
"bar": flaky_detection._TestMetrics(
86-
initial_duration=datetime.timedelta(milliseconds=100),
121+
initial_call_duration=datetime.timedelta(milliseconds=100),
87122
),
88123
"baz": flaky_detection._TestMetrics(),
89124
}
@@ -103,11 +138,11 @@ def test_flaky_detector_get_rerun_count_for_test_with_slow_test() -> None:
103138
detector._test_metrics = {
104139
"foo": flaky_detection._TestMetrics(
105140
# Can't be reran 5 times within the budget.
106-
initial_duration=datetime.timedelta(seconds=1),
141+
initial_call_duration=datetime.timedelta(seconds=1),
107142
),
108143
"bar": flaky_detection._TestMetrics(
109144
# This test should not be impacted by the previous one.
110-
initial_duration=datetime.timedelta(milliseconds=1),
145+
initial_call_duration=datetime.timedelta(milliseconds=1),
111146
),
112147
}
113148
detector.set_deadline()
@@ -128,7 +163,7 @@ def test_flaky_detector_get_rerun_count_for_test_with_fast_test() -> None:
128163
detector._test_metrics = {
129164
"foo": flaky_detection._TestMetrics(
130165
# Should only be reran 1000 times, freeing the rest of the budget for other tests.
131-
initial_duration=datetime.timedelta(milliseconds=1),
166+
initial_call_duration=datetime.timedelta(milliseconds=1),
132167
),
133168
}
134169
detector.set_deadline()
@@ -146,7 +181,7 @@ def test_flaky_detector_get_rerun_count_for_test_with_timeout() -> None:
146181
)
147182
detector._test_metrics = {
148183
"foo": flaky_detection._TestMetrics(
149-
initial_duration=datetime.timedelta(milliseconds=4),
184+
initial_call_duration=datetime.timedelta(milliseconds=4),
150185
),
151186
}
152187
detector.set_deadline()

0 commit comments

Comments
 (0)