GNU/CI: use the aggregated-result.json files and move to python

sylvestre · sylvestre · commit eac9e37515cb · 2025-03-17T15:19:08.000+01:00
diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml
@@ -349,137 +349,35 @@ jobs:
     - name: Compare test failures VS reference
       shell: bash
       run: |
-        ## Compare test failures VS reference
-        have_new_failures=""
-        REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/test-suite.log'
-        ROOT_REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/test-suite-root.log'
-        SELINUX_REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/selinux-test-suite.log'
-        SELINUX_ROOT_REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/selinux-test-suite-root.log'
-        REF_SUMMARY_FILE='${{ steps.vars.outputs.path_reference }}/test-summary/gnu-result.json'
-
-
+        ## Compare test failures VS reference using JSON files
+        REF_SUMMARY_FILE='${{ steps.vars.outputs.path_reference }}/aggregated-result.json'
+        CURRENT_SUMMARY_FILE='${{ steps.vars.outputs.AGGREGATED_SUMMARY_FILE }}'
         REPO_DEFAULT_BRANCH='${{ steps.vars.outputs.repo_default_branch }}'
         path_UUTILS='${{ steps.vars.outputs.path_UUTILS }}'
-        # https://github.com/uutils/coreutils/issues/4294
-        # https://github.com/uutils/coreutils/issues/4295
+
+        # Path to ignore file for intermittent issues
         IGNORE_INTERMITTENT="${path_UUTILS}/.github/workflows/ignore-intermittent.txt"
 
-        mkdir -p ${{ steps.vars.outputs.path_reference }}
-
-        COMMENT_DIR="${{ steps.vars.outputs.path_reference }}/comment"
-        mkdir -p ${COMMENT_DIR}
-        echo ${{ github.event.number }} > ${COMMENT_DIR}/NR
-        COMMENT_LOG="${COMMENT_DIR}/result.txt"
-
-        # The comment log might be downloaded from a previous run
-        # We only want the new changes, so remove it if it exists.
-        rm -f ${COMMENT_LOG}
-        touch ${COMMENT_LOG}
-
-        compare_tests() {
-          local new_log_file=$1
-          local ref_log_file=$2
-          local test_type=$3  # "standard" or "root"
-
-          if test -f "${ref_log_file}"; then
-            echo "Reference ${test_type} test log SHA1/ID: $(sha1sum -- "${ref_log_file}") - ${test_type}"
-            REF_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort)
-            CURRENT_RUN_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort)
-            REF_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort)
-            CURRENT_RUN_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort)
-            REF_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort)
-            CURRENT_RUN_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort)
-
-            echo "Detailed information:"
-            echo "REF_ERROR = ${REF_ERROR}"
-            echo "CURRENT_RUN_ERROR = ${CURRENT_RUN_ERROR}"
-            echo "REF_FAILING = ${REF_FAILING}"
-            echo "CURRENT_RUN_FAILING = ${CURRENT_RUN_FAILING}"
-            echo "REF_SKIP_PASS = ${REF_SKIP}"
-            echo "CURRENT_RUN_SKIP = ${CURRENT_RUN_SKIP}"
-
-            # Compare failing and error tests
-            for LINE in ${CURRENT_RUN_FAILING}
-            do
-              if ! grep -Fxq ${LINE}<<<"${REF_FAILING}"
-              then
-                if ! grep ${LINE} ${IGNORE_INTERMITTENT}
-                then
-                  MSG="GNU test failed: ${LINE}. ${LINE} is passing on '${REPO_DEFAULT_BRANCH}'. Maybe you have to rebase?"
-                  echo "::error ::$MSG"
-                  echo $MSG >> ${COMMENT_LOG}
-                  have_new_failures="true"
-                else
-                  MSG="Skip an intermittent issue ${LINE} (fails in this run but passes in the 'main' branch)"
-                  echo "::notice ::$MSG"
-                  echo $MSG >> ${COMMENT_LOG}
-                  echo ""
-                fi
-              fi
-            done
-
-            for LINE in ${REF_FAILING}
-            do
-              if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_FAILING}"
-              then
-                if ! grep ${LINE} ${IGNORE_INTERMITTENT}
-                then
-                  MSG="Congrats! The gnu test ${LINE} is no longer failing!"
-                  echo "::notice ::$MSG"
-                  echo $MSG >> ${COMMENT_LOG}
-                else
-                  MSG="Skipping an intermittent issue ${LINE} (passes in this run but fails in the 'main' branch)"
-                  echo "::notice ::$MSG"
-                  echo $MSG >> ${COMMENT_LOG}
-                  echo ""
-                fi
-              fi
-            done
-
-            for LINE in ${CURRENT_RUN_ERROR}
-            do
-              if ! grep -Fxq ${LINE}<<<"${REF_ERROR}"
-              then
-                MSG="GNU test error: ${LINE}. ${LINE} is passing on '${REPO_DEFAULT_BRANCH}'. Maybe you have to rebase?"
-                echo "::error ::$MSG"
-                echo $MSG >> ${COMMENT_LOG}
-                have_new_failures="true"
-              fi
-            done
-
-            for LINE in ${REF_ERROR}
-            do
-              if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_ERROR}"
-              then
-                MSG="Congrats! The gnu test ${LINE} is no longer ERROR! (might be PASS or FAIL)"
-                echo "::warning ::$MSG"
-                echo $MSG >> ${COMMENT_LOG}
-              fi
-            done
-
-            for LINE in ${REF_SKIP}
-            do
-              if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_SKIP}"
-              then
-                MSG="Congrats! The gnu test ${LINE} is no longer SKIP! (might be PASS, ERROR or FAIL)"
-                echo "::warning ::$MSG"
-                echo $MSG >> ${COMMENT_LOG}
-              fi
-            done
+        COMPARISON_RESULT=0
+        if test -f "${CURRENT_SUMMARY_FILE}"; then
+          if test -f "${REF_SUMMARY_FILE}"; then
+            echo "Reference summary SHA1/ID: $(sha1sum -- "${REF_SUMMARY_FILE}")"
+            echo "Current summary SHA1/ID: $(sha1sum -- "${CURRENT_SUMMARY_FILE}")"
+
+            python3 util/compare_test_results.py \
+              --ignore-file "${IGNORE_INTERMITTENT}" \
+              "${CURRENT_SUMMARY_FILE}" "${REF_SUMMARY_FILE}"
 
+            COMPARISON_RESULT=$?
           else
-            echo "::warning ::Skipping ${test_type} test failure comparison; no prior reference test logs are available."
+            echo "::warning ::Skipping test comparison; no prior reference summary is available at '${REF_SUMMARY_FILE}'."
           fi
-        }
-
-        # Compare standard tests
-        compare_tests '${{ steps.vars.outputs.path_GNU_tests }}/test-suite.log' "${REF_LOG_FILE}" "standard"
-
-        # Compare root tests
-        compare_tests '${{ steps.vars.outputs.path_GNU_tests }}/test-suite-root.log' "${ROOT_REF_LOG_FILE}" "root"
+        else
+          echo "::error ::Failed to find summary of test results (missing '${CURRENT_SUMMARY_FILE}'); failing early"
+          exit 1
+        fi
 
-        # Set environment variable to indicate whether all failures are intermittent
-        if [ -n "${have_new_failures}" ]; then
+        if [ ${COMPARISON_RESULT} -eq 1 ]; then
           echo "ONLY_INTERMITTENT=false" >> $GITHUB_ENV
           echo "::error ::Found new non-intermittent test failures"
           exit 1
diff --git a/util/compare_test_results.py b/util/compare_test_results.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+"""
+Compare GNU test results between current run and reference to identify
+regressions and fixes.
+
+
+Arguments:
+    CURRENT_JSON       Path to the current run's aggregated results JSON file
+    REFERENCE_JSON     Path to the reference (main branch) aggregated
+                        results JSON file
+    --ignore-file      Path to file containing list of tests to ignore
+                        (for intermittent issues)
+"""
+
+import argparse
+import json
+import os
+import sys
+
+
+def flatten_test_results(results):
+    """Convert nested JSON test results to a flat dictionary of test paths to statuses."""
+    flattened = {}
+    for util, tests in results.items():
+        for test_name, status in tests.items():
+            test_path = f"{util}/{test_name}"
+            flattened[test_path] = status
+    return flattened
+
+
+def load_ignore_list(ignore_file):
+    """Load list of tests to ignore from file."""
+    if not os.path.exists(ignore_file):
+        return set()
+
+    with open(ignore_file, "r") as f:
+        return {line.strip() for line in f if line.strip() and not line.startswith("#")}
+
+
+def identify_test_changes(current_flat, reference_flat):
+    """
+    Identify different categories of test changes between current and reference results.
+
+    Args:
+        current_flat (dict): Flattened dictionary of current test results
+        reference_flat (dict): Flattened dictionary of reference test results
+
+    Returns:
+        tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests
+    """
+    # Find regressions (tests that were passing but now failing)
+    regressions = []
+    for test_path, status in current_flat.items():
+        if status == "FAIL" or status == "ERROR":
+            if test_path in reference_flat:
+                if (
+                    reference_flat[test_path] == "PASS"
+                    or reference_flat[test_path] == "SKIP"
+                ):
+                    regressions.append(test_path)
+
+    # Find fixes (tests that were failing but now passing)
+    fixes = []
+    for test_path, status in reference_flat.items():
+        if status == "FAIL" or status == "ERROR":
+            if test_path in current_flat:
+                if current_flat[test_path] == "PASS":
+                    fixes.append(test_path)
+
+    # Find newly skipped tests (were passing, now skipped)
+    newly_skipped = []
+    for test_path, status in current_flat.items():
+        if (
+            status == "SKIP"
+            and test_path in reference_flat
+            and reference_flat[test_path] == "PASS"
+        ):
+            newly_skipped.append(test_path)
+
+    # Find newly passing tests (were skipped, now passing)
+    newly_passing = []
+    for test_path, status in current_flat.items():
+        if (
+            status == "PASS"
+            and test_path in reference_flat
+            and reference_flat[test_path] == "SKIP"
+        ):
+            newly_passing.append(test_path)
+
+    return regressions, fixes, newly_skipped, newly_passing
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare GNU test results and identify regressions and fixes"
+    )
+    parser.add_argument("current_json", help="Path to current run JSON results")
+    parser.add_argument("reference_json", help="Path to reference JSON results")
+    parser.add_argument(
+        "--ignore-file",
+        required=True,
+        help="Path to file with tests to ignore (for intermittent issues)",
+    )
+
+    args = parser.parse_args()
+
+    # Load test results
+    try:
+        with open(args.current_json, "r") as f:
+            current_results = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError) as e:
+        sys.stderr.write(f"Error loading current results: {e}\n")
+        return 1
+
+    try:
+        with open(args.reference_json, "r") as f:
+            reference_results = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError) as e:
+        sys.stderr.write(f"Error loading reference results: {e}\n")
+        sys.stderr.write("Skipping comparison as reference is not available.\n")
+        return 0
+
+    # Load ignore list (required)
+    if not os.path.exists(args.ignore_file):
+        sys.stderr.write(f"Error: Ignore file {args.ignore_file} does not exist\n")
+        print(f"::error ::Ignore file {args.ignore_file} does not exist")
+        return 1
+
+    ignore_list = load_ignore_list(args.ignore_file)
+    print(f"Loaded {len(ignore_list)} tests to ignore from {args.ignore_file}")
+
+    # Flatten result structures for easier comparison
+    current_flat = flatten_test_results(current_results)
+    reference_flat = flatten_test_results(reference_results)
+
+    # Identify different categories of test changes
+    regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
+        current_flat, reference_flat
+    )
+
+    # Filter out intermittent issues from regressions
+    real_regressions = [r for r in regressions if r not in ignore_list]
+    intermittent_regressions = [r for r in regressions if r in ignore_list]
+
+    have_new_failures = len(real_regressions) > 0
+
+    # Print summary stats
+    print(f"Total tests in current run: {len(current_flat)}")
+    print(f"Total tests in reference: {len(reference_flat)}")
+    print(f"New regressions: {len(real_regressions)}")
+    print(f"Intermittent regressions: {len(intermittent_regressions)}")
+    print(f"Fixed tests: {len(fixes)}")
+    print(f"Newly skipped tests: {len(newly_skipped)}")
+    print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
+
+    # Report regressions
+    if real_regressions:
+        print("\nREGRESSIONS (non-intermittent failures):", file=sys.stderr)
+        for test in sorted(real_regressions):
+            msg = f"GNU test failed: {test}. {test} is passing on 'main'. Maybe you have to rebase?"
+            print(f"::error ::{msg}", file=sys.stderr)
+
+    # Report intermittent issues
+    if intermittent_regressions:
+        print("\nINTERMITTENT ISSUES (ignored):", file=sys.stderr)
+        for test in sorted(intermittent_regressions):
+            msg = f"Skip an intermittent issue {test} (fails in this run but passes in the 'main' branch)"
+            print(f"::notice ::{msg}", file=sys.stderr)
+
+    # Report fixes
+    if fixes:
+        print("\nFIXED TESTS:", file=sys.stderr)
+        for test in sorted(fixes):
+            msg = f"Congrats! The gnu test {test} is no longer failing!"
+            print(f"::notice ::{msg}", file=sys.stderr)
+
+    # Report newly skipped and passing tests
+    if newly_skipped:
+        print("\nNEWLY SKIPPED TESTS:", file=sys.stderr)
+        for test in sorted(newly_skipped):
+            msg = f"Note: The gnu test {test} is now being skipped but was previously passing."
+            print(f"::warning ::{msg}", file=sys.stderr)
+
+    if newly_passing:
+        print("\nNEWLY PASSING TESTS (previously skipped):", file=sys.stderr)
+        for test in sorted(newly_passing):
+            msg = f"Congrats! The gnu test {test} is now passing!"
+            print(f"::notice ::{msg}", file=sys.stderr)
+
+    # Return exit code based on whether we found regressions
+    return 1 if have_new_failures else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())