Skip to content

Commit a917791

Browse files
committed
Add functionality to show when tests were previously skipped and now failing accurately
1 parent b2d1117 commit a917791

File tree

2 files changed

+80
-16
lines changed

2 files changed

+80
-16
lines changed

util/compare_test_results.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ def identify_test_changes(current_flat, reference_flat):
5050
reference_flat (dict): Flattened dictionary of reference test results
5151
5252
Returns:
53-
tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests
53+
tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests
5454
"""
5555
# Find regressions (tests that were passing but now failing)
5656
regressions = []
5757
for test_path, status in current_flat.items():
5858
if status in ("FAIL", "ERROR"):
5959
if test_path in reference_flat:
60-
if reference_flat[test_path] in ("PASS", "SKIP"):
60+
if reference_flat[test_path] == "PASS":
6161
regressions.append(test_path)
6262

6363
# Find fixes (tests that were failing but now passing)
@@ -88,7 +88,17 @@ def identify_test_changes(current_flat, reference_flat):
8888
):
8989
newly_passing.append(test_path)
9090

91-
return regressions, fixes, newly_skipped, newly_passing
91+
# Find newly failing tests (were skipped, now failing)
92+
newly_failing = []
93+
for test_path, status in current_flat.items():
94+
if (
95+
status in ("FAIL", "ERROR")
96+
and test_path in reference_flat
97+
and reference_flat[test_path] == "SKIP"
98+
):
99+
newly_failing.append(test_path)
100+
101+
return regressions, fixes, newly_skipped, newly_passing, newly_failing
92102

93103

94104
def main():
@@ -135,8 +145,8 @@ def main():
135145
reference_flat = flatten_test_results(reference_results)
136146

137147
# Identify different categories of test changes
138-
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
139-
current_flat, reference_flat
148+
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
149+
identify_test_changes(current_flat, reference_flat)
140150
)
141151

142152
# Filter out intermittent issues from regressions
@@ -147,6 +157,10 @@ def main():
147157
real_fixes = [f for f in fixes if f not in ignore_list]
148158
intermittent_fixes = [f for f in fixes if f in ignore_list]
149159

160+
# Filter out intermittent issues from newly failing
161+
real_newly_failing = [n for n in newly_failing if n not in ignore_list]
162+
intermittent_newly_failing = [n for n in newly_failing if n in ignore_list]
163+
150164
# Print summary stats
151165
print(f"Total tests in current run: {len(current_flat)}")
152166
print(f"Total tests in reference: {len(reference_flat)}")
@@ -156,6 +170,8 @@ def main():
156170
print(f"Intermittent fixes: {len(intermittent_fixes)}")
157171
print(f"Newly skipped tests: {len(newly_skipped)}")
158172
print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
173+
print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}")
174+
print(f"Intermittent newly failing: {len(intermittent_newly_failing)}")
159175

160176
output_lines = []
161177

@@ -206,6 +222,21 @@ def main():
206222
print(f"::notice ::{msg}", file=sys.stderr)
207223
output_lines.append(msg)
208224

225+
# Report newly failing tests (were skipped, now failing)
226+
if real_newly_failing:
227+
print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr)
228+
for test in sorted(real_newly_failing):
229+
msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing."
230+
print(f"::warning ::{msg}", file=sys.stderr)
231+
output_lines.append(msg)
232+
233+
if intermittent_newly_failing:
234+
print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr)
235+
for test in sorted(intermittent_newly_failing):
236+
msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)"
237+
print(f"::notice ::{msg}", file=sys.stderr)
238+
output_lines.append(msg)
239+
209240
if args.output and output_lines:
210241
with open(args.output, "w") as f:
211242
for line in output_lines:

util/test_compare_test_results.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,11 @@ def test_regressions(self):
129129
}
130130
reference = {
131131
"tests/ls/test1": "PASS",
132-
"tests/ls/test2": "SKIP",
132+
"tests/ls/test2": "PASS",
133133
"tests/cp/test3": "PASS",
134134
"tests/cp/test4": "FAIL",
135135
}
136-
regressions, _, _, _ = identify_test_changes(current, reference)
136+
regressions, _, _, _, _ = identify_test_changes(current, reference)
137137
self.assertEqual(sorted(regressions), ["tests/ls/test1", "tests/ls/test2"])
138138

139139
def test_fixes(self):
@@ -150,7 +150,7 @@ def test_fixes(self):
150150
"tests/cp/test3": "PASS",
151151
"tests/cp/test4": "FAIL",
152152
}
153-
_, fixes, _, _ = identify_test_changes(current, reference)
153+
_, fixes, _, _, _ = identify_test_changes(current, reference)
154154
self.assertEqual(sorted(fixes), ["tests/ls/test1", "tests/ls/test2"])
155155

156156
def test_newly_skipped(self):
@@ -165,7 +165,7 @@ def test_newly_skipped(self):
165165
"tests/ls/test2": "FAIL",
166166
"tests/cp/test3": "PASS",
167167
}
168-
_, _, newly_skipped, _ = identify_test_changes(current, reference)
168+
_, _, newly_skipped, _, _ = identify_test_changes(current, reference)
169169
self.assertEqual(newly_skipped, ["tests/ls/test1"])
170170

171171
def test_newly_passing(self):
@@ -180,7 +180,7 @@ def test_newly_passing(self):
180180
"tests/ls/test2": "FAIL",
181181
"tests/cp/test3": "SKIP",
182182
}
183-
_, _, _, newly_passing = identify_test_changes(current, reference)
183+
_, _, _, newly_passing, _ = identify_test_changes(current, reference)
184184
self.assertEqual(newly_passing, ["tests/ls/test1"])
185185

186186
def test_all_categories(self):
@@ -191,21 +191,24 @@ def test_all_categories(self):
191191
"tests/cp/test3": "SKIP", # Newly skipped
192192
"tests/cp/test4": "PASS", # Newly passing
193193
"tests/rm/test5": "PASS", # No change
194+
"tests/rm/test6": "FAIL", # Newly failing
194195
}
195196
reference = {
196197
"tests/ls/test1": "PASS", # Regression
197198
"tests/ls/test2": "FAIL", # Fix
198199
"tests/cp/test3": "PASS", # Newly skipped
199200
"tests/cp/test4": "SKIP", # Newly passing
200201
"tests/rm/test5": "PASS", # No change
202+
"tests/rm/test6": "SKIP", # Newly failing
201203
}
202-
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
203-
current, reference
204+
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
205+
identify_test_changes(current, reference)
204206
)
205207
self.assertEqual(regressions, ["tests/ls/test1"])
206208
self.assertEqual(fixes, ["tests/ls/test2"])
207209
self.assertEqual(newly_skipped, ["tests/cp/test3"])
208210
self.assertEqual(newly_passing, ["tests/cp/test4"])
211+
self.assertEqual(newly_failing, ["tests/rm/test6"])
209212

210213
def test_new_and_removed_tests(self):
211214
"""Test handling of tests that are only in one of the datasets."""
@@ -219,13 +222,43 @@ def test_new_and_removed_tests(self):
219222
"tests/ls/test2": "PASS",
220223
"tests/rm/old_test": "FAIL",
221224
}
222-
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
223-
current, reference
225+
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
226+
identify_test_changes(current, reference)
224227
)
225228
self.assertEqual(regressions, ["tests/ls/test2"])
226229
self.assertEqual(fixes, [])
227230
self.assertEqual(newly_skipped, [])
228231
self.assertEqual(newly_passing, [])
232+
self.assertEqual(newly_failing, [])
233+
234+
def test_newly_failing(self):
235+
"""Test identifying newly failing tests (SKIP -> FAIL)."""
236+
current = {
237+
"tests/ls/test1": "FAIL",
238+
"tests/ls/test2": "ERROR",
239+
"tests/cp/test3": "PASS",
240+
}
241+
reference = {
242+
"tests/ls/test1": "SKIP",
243+
"tests/ls/test2": "SKIP",
244+
"tests/cp/test3": "SKIP",
245+
}
246+
_, _, _, _, newly_failing = identify_test_changes(current, reference)
247+
self.assertEqual(sorted(newly_failing), ["tests/ls/test1", "tests/ls/test2"])
248+
249+
def test_skip_to_fail_not_regression(self):
250+
"""Test that SKIP -> FAIL is not counted as a regression."""
251+
current = {
252+
"tests/ls/test1": "FAIL",
253+
"tests/ls/test2": "FAIL",
254+
}
255+
reference = {
256+
"tests/ls/test1": "SKIP",
257+
"tests/ls/test2": "PASS",
258+
}
259+
regressions, _, _, _, newly_failing = identify_test_changes(current, reference)
260+
self.assertEqual(regressions, ["tests/ls/test2"])
261+
self.assertEqual(newly_failing, ["tests/ls/test1"])
229262

230263

231264
class TestMainFunction(unittest.TestCase):
@@ -285,7 +318,7 @@ def test_main_exit_code_with_real_regressions(self):
285318
current_flat = flatten_test_results(self.current_data)
286319
reference_flat = flatten_test_results(self.reference_data)
287320

288-
regressions, _, _, _ = identify_test_changes(current_flat, reference_flat)
321+
regressions, _, _, _, _ = identify_test_changes(current_flat, reference_flat)
289322

290323
self.assertIn("tests/ls/test2", regressions)
291324

@@ -320,7 +353,7 @@ def test_filter_intermittent_fixes(self):
320353
current_flat = flatten_test_results(self.current_data)
321354
reference_flat = flatten_test_results(self.reference_data)
322355

323-
_, fixes, _, _ = identify_test_changes(current_flat, reference_flat)
356+
_, fixes, _, _, _ = identify_test_changes(current_flat, reference_flat)
324357

325358
# tests/cp/test1 and tests/cp/test2 should be fixed but tests/cp/test1 is in ignore list
326359
self.assertIn("tests/cp/test1", fixes)

0 commit comments

Comments
 (0)