[CI][E2E-accuracy] Fix bug with aggregation of 1 result (#5169)

Egor-Krivov · web-flow · commit b2ec053563ea · 2025-09-24T15:36:00.000+02:00
Closes #5158 The issue appeared because [download artifact](https://github.com/actions/download-artifact?tab=readme-ov-file#v5---whats-new) will change it's behavior if only one artefact is available. In this case the we will loose the artifact name and extract it's content into selected folder. So it's `separate-reports/contentA` instead of `separate-reports/artifactA/contentA;separate-reports/artifactB/contentB`. The change is that we now always merge artifacts and lose names like `artifactA`. So we have to change result parsing as well.
diff --git a/.github/workflows/e2e-accuracy.yml b/.github/workflows/e2e-accuracy.yml
@@ -151,6 +151,7 @@ jobs:
         uses: actions/download-artifact@v5
         with:
           path: separate-reports
+          merge-multiple: true
 
       - name: Run aggregation script
         run: |
@@ -164,13 +165,14 @@ jobs:
 
       - name: Upload aggregated results
         uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
         with:
           name: aggregated-results-${{ github.run_id }}
           path: aggregated-results
           include-hidden-files: true
 
       - name: Check results against reference
-        if: ${{ inputs.models == 'all' && inputs.only_one_model == '' }}
+        if: ${{ inputs.models == 'all' && inputs.only_one_model == ''  && !cancelled()}}
         run: |
           PYTORCH_XPU_OPS_REF="$(<.github/pins/e2e_reference_torch-xpu-ops.txt)"
           git clone https://github.com/intel/torch-xpu-ops.git
diff --git a/scripts/e2e_checks/aggregate_e2e_results.py b/scripts/e2e_checks/aggregate_e2e_results.py
@@ -10,29 +10,6 @@ def parse_args():
     return parser.parse_args()
 
 
-def parse_folder_name(folder_name):
-    """
-    Parse folder name to extract suite and dtype.
-
-    Expected format: logs-{suite}-{dtype}-{mode}-accuracy, where mode can contain `-` characters
-    Examples:
-    - logs-torchbench-float32-inference-accuracy -> suite=torchbench, dtype=float32
-    - logs-huggingface-amp_bf16-training-accuracy -> suite=huggingface, dtype=amp_bf16
-    """
-    parts = folder_name.split('-')
-
-    # Check if it follows the expected pattern
-    if len(parts) < 4 or parts[0] != 'logs' or parts[-1] != 'accuracy':
-        return None, None, None
-
-    suite = parts[1]
-    dtype = parts[2]
-    # Extract mode, can include dashes
-    mode = '-'.join(parts[3:-1])
-
-    return suite, dtype, mode
-
-
 def build_suite_report(combined_df, output_path):
     print('=======================================')
     print('=           SUMMARY REPORT            =')
@@ -110,6 +87,50 @@ def build_pytorch_report(combined_df, output_path):
         pivoted_df.to_csv(torch_report_dir / f'inductor_{suite}_{mode}.csv', index=False)
 
 
+def parse_mode(report_path):
+    """
+    Parse report file path to extract `mode`.
+
+    Expected filename: 'inductor_{suite}_{dtype}_{mode}_xpu_accuracy.csv', where mode can contain `-` characters
+    and dtype can contain `_` characters (e.g., `amp_bf16`).
+
+    Returns:
+        mode (str): Extracted mode from the filename
+        error (str or None): Error message if parsing fails, otherwise None
+    """
+    parts = report_path.name.split('_')
+
+    # Check if it follows the expected pattern
+    if len(parts) < 6 or parts[0] != 'inductor' or parts[-1] != 'accuracy.csv':
+        txt = f'Unexpected filename format: {report_path.name}, parsed parts: {parts}'
+        print(txt)
+        return None, txt
+    return parts[-3], None
+
+
+def load_reports(input_path):
+    dfs = []
+    problems = []
+    for suite_path in filter(Path.is_dir, input_path.iterdir()):
+        suite = suite_path.name
+
+        for dtype_path in filter(Path.is_dir, suite_path.iterdir()):
+            dtype = dtype_path.name
+
+            for report_path in dtype_path.glob('inductor_*_xpu_accuracy.csv'):
+                print(f'Reading {report_path}')
+                mode, problem = parse_mode(report_path)
+                if mode is None:
+                    problems.append(problem)
+                    continue
+                df = pd.read_csv(report_path)
+                df['suite'] = suite
+                df['mode'] = mode
+                df['dtype'] = dtype
+                dfs.append(df)
+    return dfs, problems
+
+
 def main(input_dir, output_dir):
     """
     Main function to aggregate end-to-end test results.
@@ -129,23 +150,7 @@ def main(input_dir, output_dir):
     print(f'Processing results from: {input_path}')
     print(f'Output will be saved to: {output_path}')
 
-    dfs = []
-    for item_path in input_path.iterdir():
-        name = item_path.name
-        if not item_path.is_dir():
-            continue
-
-        suite, dtype, mode = parse_folder_name(name)
-        if suite is None:
-            print(f'Folder name \'{name}\' does not match expected pattern, skipping')
-            continue
-        filepath = item_path / suite / dtype / f'inductor_{suite}_{dtype}_{mode}_xpu_accuracy.csv'
-        df = pd.read_csv(filepath)
-        df['suite'] = suite
-        df['mode'] = mode
-        df['dtype'] = dtype
-        dfs.append(df)
-
+    dfs, problems = load_reports(input_path)
     combined_df = pd.concat(dfs, ignore_index=True)
     combined_df = combined_df.sort_values(['suite', 'mode', 'dtype'])
 
@@ -157,6 +162,12 @@ def main(input_dir, output_dir):
     # 3. Agg report with 45 rows (suite, mode, dtype, passed, failed_REASON, failed_REASON model list)
     build_suite_report(combined_df, output_path=output_path)
 
+    if problems:
+        print('Problems found during parsing:')
+        for problem in problems:
+            print(problem)
+        raise RuntimeError('Errors found during parsing, see above')
+
 
 if __name__ == '__main__':
     args = parse_args()
diff --git a/scripts/e2e_checks/compare_reference.sh b/scripts/e2e_checks/compare_reference.sh
@@ -80,7 +80,7 @@ for suite in "${suites[@]}"; do
         fi
 
         for dtype in "${dtypes[@]}"; do
-            CSV_FILE="$RESULT_DIR/logs-$suite-$dtype-$mode-accuracy/$suite/$dtype/inductor_${suite}_${dtype}_${mode}_xpu_accuracy.csv"
+            CSV_FILE="$RESULT_DIR/$suite/$dtype/inductor_${suite}_${dtype}_${mode}_xpu_accuracy.csv"
 
             # Check if CSV file exists
             if [ ! -f "$CSV_FILE" ]; then