Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions util/compare_test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def identify_test_changes(current_flat, reference_flat):
reference_flat (dict): Flattened dictionary of reference test results

Returns:
tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests
tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests
"""
# Find regressions (tests that were passing but now failing)
regressions = []
for test_path, status in current_flat.items():
if status in ("FAIL", "ERROR"):
if test_path in reference_flat:
if reference_flat[test_path] in ("PASS", "SKIP"):
if reference_flat[test_path] == "PASS":
regressions.append(test_path)

# Find fixes (tests that were failing but now passing)
Expand Down Expand Up @@ -88,7 +88,17 @@ def identify_test_changes(current_flat, reference_flat):
):
newly_passing.append(test_path)

return regressions, fixes, newly_skipped, newly_passing
# Find newly failing tests (were skipped, now failing)
newly_failing = []
for test_path, status in current_flat.items():
if (
status in ("FAIL", "ERROR")
and test_path in reference_flat
and reference_flat[test_path] == "SKIP"
):
newly_failing.append(test_path)

return regressions, fixes, newly_skipped, newly_passing, newly_failing


def main():
Expand Down Expand Up @@ -135,8 +145,8 @@ def main():
reference_flat = flatten_test_results(reference_results)

# Identify different categories of test changes
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
current_flat, reference_flat
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current_flat, reference_flat)
)

# Filter out intermittent issues from regressions
Expand All @@ -147,6 +157,10 @@ def main():
real_fixes = [f for f in fixes if f not in ignore_list]
intermittent_fixes = [f for f in fixes if f in ignore_list]

# Filter out intermittent issues from newly failing
real_newly_failing = [n for n in newly_failing if n not in ignore_list]
intermittent_newly_failing = [n for n in newly_failing if n in ignore_list]

# Print summary stats
print(f"Total tests in current run: {len(current_flat)}")
print(f"Total tests in reference: {len(reference_flat)}")
Expand All @@ -156,6 +170,8 @@ def main():
print(f"Intermittent fixes: {len(intermittent_fixes)}")
print(f"Newly skipped tests: {len(newly_skipped)}")
print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}")
print(f"Intermittent newly failing: {len(intermittent_newly_failing)}")

output_lines = []

Expand Down Expand Up @@ -206,6 +222,21 @@ def main():
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)

# Report newly failing tests (were skipped, now failing)
if real_newly_failing:
print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr)
for test in sorted(real_newly_failing):
msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing."
print(f"::warning ::{msg}", file=sys.stderr)
output_lines.append(msg)

if intermittent_newly_failing:
print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr)
for test in sorted(intermittent_newly_failing):
msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)

if args.output and output_lines:
with open(args.output, "w") as f:
for line in output_lines:
Expand Down
55 changes: 44 additions & 11 deletions util/test_compare_test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ def test_regressions(self):
}
reference = {
"tests/ls/test1": "PASS",
"tests/ls/test2": "SKIP",
"tests/ls/test2": "PASS",
"tests/cp/test3": "PASS",
"tests/cp/test4": "FAIL",
}
regressions, _, _, _ = identify_test_changes(current, reference)
regressions, _, _, _, _ = identify_test_changes(current, reference)
self.assertEqual(sorted(regressions), ["tests/ls/test1", "tests/ls/test2"])

def test_fixes(self):
Expand All @@ -150,7 +150,7 @@ def test_fixes(self):
"tests/cp/test3": "PASS",
"tests/cp/test4": "FAIL",
}
_, fixes, _, _ = identify_test_changes(current, reference)
_, fixes, _, _, _ = identify_test_changes(current, reference)
self.assertEqual(sorted(fixes), ["tests/ls/test1", "tests/ls/test2"])

def test_newly_skipped(self):
Expand All @@ -165,7 +165,7 @@ def test_newly_skipped(self):
"tests/ls/test2": "FAIL",
"tests/cp/test3": "PASS",
}
_, _, newly_skipped, _ = identify_test_changes(current, reference)
_, _, newly_skipped, _, _ = identify_test_changes(current, reference)
self.assertEqual(newly_skipped, ["tests/ls/test1"])

def test_newly_passing(self):
Expand All @@ -180,7 +180,7 @@ def test_newly_passing(self):
"tests/ls/test2": "FAIL",
"tests/cp/test3": "SKIP",
}
_, _, _, newly_passing = identify_test_changes(current, reference)
_, _, _, newly_passing, _ = identify_test_changes(current, reference)
self.assertEqual(newly_passing, ["tests/ls/test1"])

def test_all_categories(self):
Expand All @@ -191,21 +191,24 @@ def test_all_categories(self):
"tests/cp/test3": "SKIP", # Newly skipped
"tests/cp/test4": "PASS", # Newly passing
"tests/rm/test5": "PASS", # No change
"tests/rm/test6": "FAIL", # Newly failing
}
reference = {
"tests/ls/test1": "PASS", # Regression
"tests/ls/test2": "FAIL", # Fix
"tests/cp/test3": "PASS", # Newly skipped
"tests/cp/test4": "SKIP", # Newly passing
"tests/rm/test5": "PASS", # No change
"tests/rm/test6": "SKIP", # Newly failing
}
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
current, reference
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current, reference)
)
self.assertEqual(regressions, ["tests/ls/test1"])
self.assertEqual(fixes, ["tests/ls/test2"])
self.assertEqual(newly_skipped, ["tests/cp/test3"])
self.assertEqual(newly_passing, ["tests/cp/test4"])
self.assertEqual(newly_failing, ["tests/rm/test6"])

def test_new_and_removed_tests(self):
"""Test handling of tests that are only in one of the datasets."""
Expand All @@ -219,13 +222,43 @@ def test_new_and_removed_tests(self):
"tests/ls/test2": "PASS",
"tests/rm/old_test": "FAIL",
}
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
current, reference
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current, reference)
)
self.assertEqual(regressions, ["tests/ls/test2"])
self.assertEqual(fixes, [])
self.assertEqual(newly_skipped, [])
self.assertEqual(newly_passing, [])
self.assertEqual(newly_failing, [])

def test_newly_failing(self):
"""Test identifying newly failing tests (SKIP -> FAIL)."""
current = {
"tests/ls/test1": "FAIL",
"tests/ls/test2": "ERROR",
"tests/cp/test3": "PASS",
}
reference = {
"tests/ls/test1": "SKIP",
"tests/ls/test2": "SKIP",
"tests/cp/test3": "SKIP",
}
_, _, _, _, newly_failing = identify_test_changes(current, reference)
self.assertEqual(sorted(newly_failing), ["tests/ls/test1", "tests/ls/test2"])

def test_skip_to_fail_not_regression(self):
"""Test that SKIP -> FAIL is not counted as a regression."""
current = {
"tests/ls/test1": "FAIL",
"tests/ls/test2": "FAIL",
}
reference = {
"tests/ls/test1": "SKIP",
"tests/ls/test2": "PASS",
}
regressions, _, _, _, newly_failing = identify_test_changes(current, reference)
self.assertEqual(regressions, ["tests/ls/test2"])
self.assertEqual(newly_failing, ["tests/ls/test1"])


class TestMainFunction(unittest.TestCase):
Expand Down Expand Up @@ -285,7 +318,7 @@ def test_main_exit_code_with_real_regressions(self):
current_flat = flatten_test_results(self.current_data)
reference_flat = flatten_test_results(self.reference_data)

regressions, _, _, _ = identify_test_changes(current_flat, reference_flat)
regressions, _, _, _, _ = identify_test_changes(current_flat, reference_flat)

self.assertIn("tests/ls/test2", regressions)

Expand Down Expand Up @@ -320,7 +353,7 @@ def test_filter_intermittent_fixes(self):
current_flat = flatten_test_results(self.current_data)
reference_flat = flatten_test_results(self.reference_data)

_, fixes, _, _ = identify_test_changes(current_flat, reference_flat)
_, fixes, _, _, _ = identify_test_changes(current_flat, reference_flat)

# tests/cp/test1 and tests/cp/test2 should be fixed but tests/cp/test1 is in ignore list
self.assertIn("tests/cp/test1", fixes)
Expand Down
Loading