Skip to content

Commit 77865e6

Browse files
fix(ci_visibility): count failed/skipped tests in JUnit XML when retries are enabled [backport 3.2] (#12899)
Backport 2a506fb from #12862 to 3.2. The pytest JUnit XML plugin uses the test report's [`failed`](https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/junitxml.py#L562) and [`longrepr`](https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/junitxml.py#L201) properties to count failed tests and include them in the output. Because retried tests have their own special statuses (`dd_efd_final_failed`, etc), they don't count as failures, and are excluded from the JUnit XML count. This PR creates a subclass of TestReport that is aware of those special statuses and reports them as passed/failed/skipped accordingly. This is honestly a bit of a hack. It would probably be best to rewrite the retry logic entirely so it would use normal pytest states, and pass the information that they are retries in some other way. But that will take more time, and I would like to fix the bug sooner rather than later. The exception information for the initial attempt is included in the JUnit XML. Known issue: quarantined failing tests are not counted. The way forward with this is to rewrite the retry logic, which I plan to do in a future PR. - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Federico Mon <[email protected]> (cherry picked from commit 2a506fb)
1 parent 6ff0537 commit 77865e6

File tree

7 files changed

+82
-4
lines changed

7 files changed

+82
-4
lines changed

ddtrace/contrib/internal/pytest/_atr_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
7+
from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
78
from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
89
from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
910
from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
@@ -79,13 +80,14 @@ def atr_handle_retries(
7980
return
8081

8182
atr_outcome = _atr_do_retries(item, outcomes)
83+
longrepr = InternalTest.stash_get(test_id, "failure_longrepr")
8284

83-
final_report = pytest_TestReport(
85+
final_report = RetryTestReport(
8486
nodeid=item.nodeid,
8587
location=item.location,
8688
keywords=item.keywords,
8789
when="call",
88-
longrepr=None,
90+
longrepr=longrepr,
8991
outcome=final_outcomes[atr_outcome],
9092
)
9193
item.ihook.pytest_runtest_logreport(report=final_report)

ddtrace/contrib/internal/pytest/_efd_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from ddtrace.contrib.internal.pytest._retry_utils import RetryOutcomes
7+
from ddtrace.contrib.internal.pytest._retry_utils import RetryTestReport
78
from ddtrace.contrib.internal.pytest._retry_utils import _get_outcome_from_retry
89
from ddtrace.contrib.internal.pytest._retry_utils import _get_retry_attempt_string
910
from ddtrace.contrib.internal.pytest._retry_utils import set_retry_num
@@ -85,13 +86,14 @@ def efd_handle_retries(
8586
InternalTest.mark_skip(test_id)
8687

8788
efd_outcome = _efd_do_retries(item)
89+
longrepr = InternalTest.stash_get(test_id, "failure_longrepr")
8890

89-
final_report = pytest_TestReport(
91+
final_report = RetryTestReport(
9092
nodeid=item.nodeid,
9193
location=item.location,
9294
keywords=item.keywords,
9395
when="call",
94-
longrepr=None,
96+
longrepr=longrepr,
9597
outcome=_FINAL_OUTCOMES[efd_outcome],
9698
)
9799
item.ihook.pytest_runtest_logreport(report=final_report)

ddtrace/contrib/internal/pytest/_plugin_v2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,9 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
507507
# (see <https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/main.py#L654>).
508508
original_result.outcome = OUTCOME_QUARANTINED
509509

510+
if original_result.failed or original_result.skipped:
511+
InternalTest.stash_set(test_id, "failure_longrepr", original_result.longrepr)
512+
510513
# ATR and EFD retry tests only if their teardown succeeded to ensure the best chance the retry will succeed
511514
# NOTE: this mutates the original result's outcome
512515
if InternalTest.stash_get(test_id, "setup_failed") or InternalTest.stash_get(test_id, "teardown_failed"):

ddtrace/contrib/internal/pytest/_retry_utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from _pytest.runner import CallInfo
99
import pytest
1010

11+
from ddtrace.contrib.internal.pytest._types import pytest_TestReport
1112
from ddtrace.contrib.internal.pytest._types import tmppath_result_key
1213
from ddtrace.contrib.internal.pytest._utils import _TestOutcome
1314
from ddtrace.ext.test_visibility.api import TestExcInfo
@@ -128,3 +129,33 @@ def _retry_run_when(item, when, outcomes: RetryOutcomes) -> t.Tuple[CallInfo, _p
128129
if when == "call" or "passed" not in report.outcome:
129130
item.ihook.pytest_runtest_logreport(report=report)
130131
return call, report
132+
133+
134+
class RetryTestReport(pytest_TestReport):
135+
"""
136+
A RetryTestReport behaves just like a normal pytest TestReport, except that the the failed/passed/skipped
137+
properties are aware of retry final states (dd_efd_final_*, etc). This affects the test counts in JUnit XML output,
138+
for instance.
139+
140+
The object should be initialized with the `longrepr` of the _initial_ test attempt. A `longrepr` set to `None` means
141+
the initial attempt either succeeded (which means it was already counted by pytest) or was quarantined (which means
142+
we should not count it at all), so we don't need to count it here.
143+
"""
144+
145+
@property
146+
def failed(self):
147+
if self.longrepr is None:
148+
return False
149+
return "final_failed" in self.outcome
150+
151+
@property
152+
def passed(self):
153+
if self.longrepr is None:
154+
return False
155+
return "final_passed" in self.outcome or "final_flaky" in self.outcome
156+
157+
@property
158+
def skipped(self):
159+
if self.longrepr is None:
160+
return False
161+
return "final_skipped" in self.outcome
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
CI Visibility: This fix resolves an issue where JUnit XML output would not count tests retried by Early Flake
5+
Detection, Auto Test Retries, and Attempt-to-Fix.

tests/contrib/pytest/test_pytest_atr.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- The session object is patched to never be a faulty session, by default.
88
"""
99
from unittest import mock
10+
from xml.etree import ElementTree
1011

1112
import pytest
1213

@@ -315,3 +316,22 @@ def test_pytest_atr_does_not_retry_failed_setup_or_teardown(self):
315316

316317
assert rec.ret == 1
317318
assert len(spans) == 5
319+
320+
def test_pytest_atr_junit_xml(self):
321+
self.testdir.makepyfile(test_pass=_TEST_PASS_CONTENT)
322+
self.testdir.makepyfile(test_fail=_TEST_FAIL_CONTENT)
323+
self.testdir.makepyfile(test_errors=_TEST_ERRORS_CONTENT)
324+
self.testdir.makepyfile(test_pass_on_retries=_TEST_PASS_ON_RETRIES_CONTENT)
325+
self.testdir.makepyfile(test_skip=_TEST_SKIP_CONTENT)
326+
327+
rec = self.inline_run("--ddtrace", "--junit-xml=out.xml")
328+
assert rec.ret == 1
329+
330+
test_suite = ElementTree.parse(f"{self.testdir}/out.xml").find("testsuite")
331+
332+
# There are 15 tests, but we get 16 in the JUnit XML output, because a test that passes during call but fails
333+
# during teardown is counted twice. This is a bug in pytest, not ddtrace.
334+
assert test_suite.attrib["tests"] == "16"
335+
assert test_suite.attrib["failures"] == "4"
336+
assert test_suite.attrib["skipped"] == "4"
337+
assert test_suite.attrib["errors"] == "2"

tests/contrib/pytest/test_pytest_efd.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- The session object is patched to never be a faulty session, by default.
88
"""
99
from unittest import mock
10+
from xml.etree import ElementTree
1011

1112
import pytest
1213

@@ -285,3 +286,17 @@ def test_pytest_efd_does_not_retry_failed_teardown(self):
285286
assert fails_teardown_spans[0].get_tag("test.is_retry") != "true"
286287
assert rec.ret == 1
287288
assert len(spans) == 7
289+
290+
def test_pytest_efd_junit_xml(self):
291+
self.testdir.makepyfile(test_known_pass=_TEST_KNOWN_PASS_CONTENT)
292+
self.testdir.makepyfile(test_known_fail=_TEST_KNOWN_FAIL_CONTENT)
293+
self.testdir.makepyfile(test_new_pass=_TEST_NEW_PASS_CONTENT)
294+
self.testdir.makepyfile(test_new_fail=_TEST_NEW_FAIL_CONTENT)
295+
self.testdir.makepyfile(test_new_flaky=_TEST_NEW_FLAKY_CONTENT)
296+
297+
rec = self.inline_run("--ddtrace", "--junit-xml=out.xml")
298+
assert rec.ret == 1
299+
300+
test_suite = ElementTree.parse(f"{self.testdir}/out.xml").find("testsuite")
301+
assert test_suite.attrib["tests"] == "7"
302+
assert test_suite.attrib["failures"] == "3"

0 commit comments

Comments
 (0)