Skip to content

Commit b7fdaaf

Browse files
authored
feat(ci-insights): Send only 1 span for reruns (#280)
The goal of this change is to send only 1 span wrapping all reruns. To keep the required context, we are also sending `cicd.test.rerun_count` and `cicd.test.flaky` attributes now. References: MRGFY-6112
1 parent 301bc5d commit b7fdaaf

File tree

5 files changed

+87
-85
lines changed

5 files changed

+87
-85
lines changed

pytest_mergify/__init__.py

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -211,56 +211,70 @@ def pytest_runtest_protocol(
211211
# calculate the number of reruns.
212212
with self.tracer.start_as_current_span(
213213
item.nodeid, attributes=attributes, context=context
214-
):
215-
_pytest.runner.runtestprotocol(item=item, nextitem=nextitem, log=True)
214+
) as current_span:
215+
distinct_outcomes = set()
216216

217-
if not self.mergify_ci.flaky_detector:
218-
return True
217+
for report in _pytest.runner.runtestprotocol(
218+
item=item, nextitem=nextitem, log=True
219+
):
220+
distinct_outcomes.add(report.outcome)
219221

220-
timeout_seconds = pytest_timeout._get_item_settings(item).timeout
222+
if not self.mergify_ci.flaky_detector:
223+
return True
221224

222-
for _ in range(
223-
self.mergify_ci.flaky_detector.get_rerun_count_for_test(
224-
test=item.nodeid,
225-
timeout=datetime.timedelta(seconds=timeout_seconds)
226-
if timeout_seconds
227-
else None,
228-
)
229-
):
230-
with self.tracer.start_as_current_span(
231-
item.nodeid, attributes=attributes, context=context
225+
timeout_seconds = pytest_timeout._get_item_settings(item).timeout
226+
227+
rerun_count = 0
228+
for _ in range(
229+
self.mergify_ci.flaky_detector.get_rerun_count_for_test(
230+
test=item.nodeid,
231+
timeout=datetime.timedelta(seconds=timeout_seconds)
232+
if timeout_seconds
233+
else None,
234+
)
232235
):
233-
self._reruntestprotocol(item, nextitem)
236+
for report in self._reruntestprotocol(item, nextitem):
237+
distinct_outcomes.add(report.outcome)
234238

235-
if self.mergify_ci.flaky_detector.is_deadline_exceeded():
236-
return True
239+
rerun_count += 1
240+
241+
if self.mergify_ci.flaky_detector.is_deadline_exceeded():
242+
break
243+
244+
if "failed" in distinct_outcomes and "passed" in distinct_outcomes:
245+
current_span.set_attribute("cicd.test.flaky", True)
246+
247+
current_span.set_attribute("cicd.test.rerun_count", rerun_count)
237248

238249
return True
239250

240251
def _reruntestprotocol(
241252
self, item: _pytest.nodes.Item, nextitem: typing.Optional[_pytest.nodes.Item]
242-
) -> None:
253+
) -> typing.List[_pytest.reports.TestReport]:
243254
"""
244255
Run the protocol for a rerun of a given test.
245256
246257
In `new` mode, we log rerun failures to pytest's report to enforce a
247258
quality gate and prevent merging PRs with new flaky tests. In other
248259
modes (`unhealthy`), we skip logging to avoid blocking CI, but still
249-
capture reruns in spans.
260+
capture reruns in metrics.
250261
"""
251262

252263
if not self.mergify_ci.flaky_detector:
253-
return
264+
return []
254265

255266
if self.mergify_ci.flaky_detector.mode == "new":
256-
_pytest.runner.runtestprotocol(item=item, nextitem=nextitem, log=True)
257-
return
267+
return _pytest.runner.runtestprotocol(
268+
item=item, nextitem=nextitem, log=True
269+
)
258270

259271
reports = _pytest.runner.runtestprotocol(
260272
item=item, nextitem=nextitem, log=False
261273
)
262274
for report in reports:
263-
self.pytest_runtest_logreport(report)
275+
self.mergify_ci.flaky_detector.try_fill_metrics_from_report(report)
276+
277+
return reports
264278

265279
@pytest.hookimpl(tryfirst=True)
266280
def pytest_runtest_teardown(
@@ -340,8 +354,7 @@ def pytest_runtest_logreport(self, report: _pytest.reports.TestReport) -> None:
340354
if not self.mergify_ci.flaky_detector:
341355
return
342356

343-
detected = self.mergify_ci.flaky_detector.detect_from_report(report)
344-
if not detected:
357+
if not self.mergify_ci.flaky_detector.try_fill_metrics_from_report(report):
345358
return
346359

347360
test_span.set_attributes({"cicd.test.flaky_detection": True})

pytest_mergify/flaky_detection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def _fetch_context(self) -> _FlakyDetectionContext:
165165

166166
return result
167167

168-
def detect_from_report(self, report: _pytest.reports.TestReport) -> bool:
168+
def try_fill_metrics_from_report(self, report: _pytest.reports.TestReport) -> bool:
169169
if report.outcome not in ["failed", "passed"]:
170170
return False
171171

tests/conftest.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
)
1818

1919
import pytest_mergify
20-
import pytest_mergify.quarantine
2120
from pytest_mergify import utils
2221

2322
pytest_plugins = ["pytester"]
@@ -118,7 +117,7 @@ def _run(
118117
result.assert_outcomes(passed=1)
119118
if isinstance(plugin.mergify_ci.exporter, InMemorySpanExporter):
120119
spans = plugin.mergify_ci.exporter.get_finished_spans()
121-
spans_as_dict = _map_spans_to_dict(spans)
120+
spans_as_dict = {span.name: span for span in spans}
122121
# Make sure we don't lose spans in the process
123122
assert len(spans_as_dict) == len(spans)
124123
else:
@@ -129,21 +128,6 @@ def _run(
129128
return _run
130129

131130

132-
def _map_spans_to_dict(
133-
spans: typing.Tuple[ReadableSpan, ...],
134-
) -> typing.Dict[str, ReadableSpan]:
135-
result: typing.Dict[str, ReadableSpan] = {}
136-
137-
for span in spans:
138-
if span.name not in result:
139-
result[span.name] = span
140-
continue
141-
142-
result[f"{span.name}.{span.start_time}.{span.end_time}"] = span
143-
144-
return result
145-
146-
147131
class TestHTTPRequestHandler(http.server.BaseHTTPRequestHandler):
148132
# Class attribute for the response code, set by the fixture.
149133
response_code: int = 200

tests/test_ci_insights.py

Lines changed: 27 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import _pytest.reports
88
import pytest
99
import responses
10-
from opentelemetry.sdk import trace
1110

1211
import pytest_mergify
1312
from pytest_mergify import ci_insights
@@ -199,10 +198,11 @@ def test_corge():
199198
)
200199

201200
assert spans is not None
201+
assert len(spans) == 1 + 6 # 1 for the session and one per test.
202202

203-
# 1 span for the session and one per test.
204-
assert len(spans) == 1 + sum(result.parseoutcomes().values())
205-
203+
flaky_tests = [
204+
"test_flaky_detection_for_new_tests.py::test_bar",
205+
]
206206
new_tests = [
207207
"test_flaky_detection_for_new_tests.py::test_bar",
208208
"test_flaky_detection_for_new_tests.py::test_baz",
@@ -212,12 +212,13 @@ def test_corge():
212212
assert span is not None
213213
assert span.attributes is not None
214214

215-
is_new_test = span.name in new_tests
216-
if not is_new_test:
217-
continue
215+
if span.name in flaky_tests:
216+
assert span.attributes.get("cicd.test.flaky", False) is True
218217

219-
assert span.attributes.get("cicd.test.flaky_detection", False)
220-
assert span.attributes.get("cicd.test.new", False)
218+
if span.name in new_tests:
219+
assert span.attributes.get("cicd.test.flaky_detection", False) is True
220+
assert span.attributes.get("cicd.test.new", False) is True
221+
assert span.attributes.get("cicd.test.rerun_count", 0) == 1000
221222

222223

223224
@responses.activate
@@ -283,10 +284,9 @@ def test_quux():
283284
)
284285

285286
assert spans is not None
287+
assert len(spans) == 5 + 1 # 1 for the session and one per test.
286288

287-
# 1 span for the session and one per test, including 1000 reruns for each unhealthy test.
288-
assert len(spans) == 1 + 3005
289-
289+
flaky_tests = ["test_flaky_detection_for_unhealthy_tests.py::test_bar"]
290290
unhealthy_tests = [
291291
"test_flaky_detection_for_unhealthy_tests.py::test_bar",
292292
"test_flaky_detection_for_unhealthy_tests.py::test_baz",
@@ -296,12 +296,13 @@ def test_quux():
296296
assert span is not None
297297
assert span.attributes is not None
298298

299-
is_unhealthy_test = span.name in unhealthy_tests
300-
if not is_unhealthy_test:
301-
continue
299+
if span.name in flaky_tests:
300+
assert span.attributes.get("cicd.test.flaky", False) is True
302301

303-
assert span.attributes.get("cicd.test.flaky_detection", False)
304-
assert not span.attributes.get("cicd.test.new", False)
302+
if span.name in unhealthy_tests:
303+
assert not span.attributes.get("cicd.test.new")
304+
assert span.attributes.get("cicd.test.flaky_detection", False) is True
305+
assert span.attributes.get("cicd.test.rerun_count", 0) == 1000
305306

306307

307308
@responses.activate
@@ -448,14 +449,16 @@ def test_bar():
448449
result.assert_outcomes(passed=1002)
449450

450451
assert spans is not None
452+
assert len(spans) == 1 + 2 # 1 for the session and one per test.
451453

452-
# Only 1 execution for the existing test (`test_foo`), 1001 executions for
453-
# the new test (`test_bar`) and 1 additional span for the session.
454-
assert _get_span_counts(spans) == {
455-
"pytest session start": 1,
456-
"test_flaky_detection_with_only_one_new_test_at_the_end.py::test_foo": 1,
457-
"test_flaky_detection_with_only_one_new_test_at_the_end.py::test_bar": 1001,
458-
}
454+
span = spans.get(
455+
"test_flaky_detection_with_only_one_new_test_at_the_end.py::test_bar"
456+
)
457+
assert span is not None
458+
assert span.attributes is not None
459+
assert span.attributes.get("cicd.test.flaky_detection", False) is True
460+
assert span.attributes.get("cicd.test.new", False) is True
461+
assert span.attributes.get("cicd.test.rerun_count", 0) == 1000
459462

460463

461464
@responses.activate
@@ -622,13 +625,3 @@ def test_bar():
622625
# Unknown test should have been filtered out after collection.
623626
assert len(plugin.mergify_ci.flaky_detector._context.existing_test_names) == 2
624627
assert len(plugin.mergify_ci.flaky_detector._context.unhealthy_test_names) == 1
625-
626-
627-
def _get_span_counts(
628-
spans: typing.Dict[str, trace.ReadableSpan],
629-
) -> typing.Dict[str, int]:
630-
result: typing.Dict[str, int] = {}
631-
for span in spans.values():
632-
result[span.name] = result.get(span.name, 0) + 1
633-
634-
return result

tests/test_flaky_detection.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def test_flaky_detector_get_duration_before_deadline() -> None:
6262
assert detector._get_duration_before_deadline() == datetime.timedelta(seconds=10)
6363

6464

65-
def test_flaky_detector_detect_from_report() -> None:
65+
def test_flaky_detector_try_fill_metrics_from_report() -> None:
6666
def make_report(
6767
nodeid: str, when: typing.Literal["setup", "call", "teardown"], duration: float
6868
) -> _pytest.reports.TestReport:
@@ -79,13 +79,25 @@ def make_report(
7979
detector = InitializedFlakyDetector()
8080
detector._context = _make_flaky_detection_context(max_test_name_length=100)
8181

82-
detector.detect_from_report(make_report(nodeid="foo", when="setup", duration=1))
83-
detector.detect_from_report(make_report(nodeid="foo", when="call", duration=2))
84-
detector.detect_from_report(make_report(nodeid="foo", when="teardown", duration=3))
82+
detector.try_fill_metrics_from_report(
83+
make_report(nodeid="foo", when="setup", duration=1)
84+
)
85+
detector.try_fill_metrics_from_report(
86+
make_report(nodeid="foo", when="call", duration=2)
87+
)
88+
detector.try_fill_metrics_from_report(
89+
make_report(nodeid="foo", when="teardown", duration=3)
90+
)
8591

86-
detector.detect_from_report(make_report(nodeid="foo", when="setup", duration=4))
87-
detector.detect_from_report(make_report(nodeid="foo", when="call", duration=5))
88-
detector.detect_from_report(make_report(nodeid="foo", when="teardown", duration=6))
92+
detector.try_fill_metrics_from_report(
93+
make_report(nodeid="foo", when="setup", duration=4)
94+
)
95+
detector.try_fill_metrics_from_report(
96+
make_report(nodeid="foo", when="call", duration=5)
97+
)
98+
detector.try_fill_metrics_from_report(
99+
make_report(nodeid="foo", when="teardown", duration=6)
100+
)
89101

90102
metrics = detector._test_metrics.get("foo")
91103
assert metrics is not None

0 commit comments

Comments
 (0)