Add test_all_subset_models mode (#2428)

kwasd · pbchekin · web-flow · commit b6cdccd33d90 · 2024-10-21T23:20:51.000+02:00
For #2246 --------- Co-authored-by: Pavel Chekin <pavel.chekin@intel.com>
diff --git a/.github/workflows/e2e-accuracy.yml b/.github/workflows/e2e-accuracy.yml
@@ -44,6 +44,10 @@ on:
           - all
           - subset
         default: all
+      check_all_subset_models:
+        description: In "subset" mode, check all subset models
+        type: boolean
+        default: false
       only_one_model:
         description: Run only this one model
         type: string
@@ -125,6 +129,7 @@ jobs:
       test_mode: accuracy
       dtype: ${{ matrix.dtype }}
       models: ${{ inputs.models }}
+      check_all_subset_models: ${{ inputs.check_all_subset_models || false }}
       only_one_model: ${{ inputs.only_one_model }}
       runner_label: ${{ inputs.runner_label }}
       TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG }}
diff --git a/.github/workflows/e2e-performance.yml b/.github/workflows/e2e-performance.yml
@@ -44,6 +44,10 @@ on:
           - all
           - subset
         default: subset
+      check_all_subset_models:
+        description: In "subset" mode, do not fail workflow if one of models failed
+        type: boolean
+        default: false
       only_one_model:
         description: Run only this one model
         type: string
@@ -136,6 +140,7 @@ jobs:
       test_mode: performance
       dtype: ${{ matrix.dtype }}
       models: ${{ inputs.models }}
+      check_all_subset_models: ${{ inputs.check_all_subset_models || false }}
       only_one_model: ${{ inputs.only_one_model }}
       runner_label: ${{ inputs.runner_label }}
       TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG }}
diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml
@@ -27,6 +27,10 @@ on:
         description: Run all models or a subset
         type: string
         default: all
+      check_all_subset_models:
+        description: In "subset" mode, check all subset models
+        type: boolean
+        default: false
       only_one_model:
         description: Run only this one model
         type: string
@@ -224,9 +228,19 @@ jobs:
           if [[ "${{ inputs.only_one_model }}" ]]; then
             bash -e $GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dtype }} ${{ inputs.mode }} ${{ inputs.test_mode }} xpu 0 static 1 0 ${{ inputs.only_one_model }}
           elif [[ "${{ inputs.models }}" == "subset" ]]; then
+            models_subset_file="$GITHUB_WORKSPACE/.github/models/${{ inputs.test_mode }}/${{ inputs.suite }}.txt"
             while read model; do
               bash -e $GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dtype }} ${{ inputs.mode }} ${{ inputs.test_mode }} xpu 0 static 1 0 $model
-            done < $GITHUB_WORKSPACE/.github/models/${{ inputs.test_mode }}/${{ inputs.suite }}.txt
+            done < $models_subset_file
+            if [[ "${{ inputs.check_all_subset_models }}" == true ]]; then
+              python $GITHUB_WORKSPACE/scripts/check_inductor_report.py --models-file="$models_subset_file" \
+                --suite=${{ inputs.suite }} \
+                --dtype=${{ inputs.dtype }} \
+                --mode=${{ inputs.mode }} \
+                --test_mode=${{ inputs.test_mode }} \
+                --device=xpu \
+                --inductor-log-dir="${GITHUB_WORKSPACE}/inductor_log"
+            fi
           else
             bash -e $GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dtype }} ${{ inputs.mode }} ${{ inputs.test_mode }} xpu 0 static 1 0
           fi
diff --git a/scripts/check_inductor_report.py b/scripts/check_inductor_report.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+import argparse
+from pathlib import Path
+import csv
+import sys
+
+
+def check_report(suite, dtype, mode, test_mode, device, models_file, inductor_log_dir):
+    inductor_log_dir_leaf = Path(inductor_log_dir) / suite / dtype
+    inductor_report_filename = f"inductor_{suite}_{dtype}_{mode}_{device}_{test_mode}.csv"
+    inductor_report_path = Path(inductor_log_dir_leaf / inductor_report_filename)
+
+    subset = []
+    report = []
+    exitcode = 0
+
+    with open(models_file, encoding="utf-8") as f:
+        subset = f.read().splitlines()
+
+    with open(inductor_report_path, encoding="utf-8") as f:
+        reader = csv.reader(f)
+        report_with_header = []
+        for l in reader:
+            report_with_header.append(l)
+        for r in report_with_header[1:]:
+            if r[0] == device:
+                report.append(r)
+
+    test_list = [r[1] for r in report]
+
+    if test_mode == "performance":
+        for m in subset:
+            if m not in test_list:
+                exitcode = 1
+                print(f"Test is not found in report: {m}")
+
+    if test_mode == "accuracy":
+        test_statuses = [r[3] for r in report]
+        for m in subset:
+            try:
+                idx = test_list.index(m)
+            except ValueError:
+                exitcode = 1
+                print(f"Test is NOT FOUND: {m}")
+                continue
+            if test_statuses[idx] != "pass":
+                exitcode = 1
+                print(f"Test is NOT PASSED: {m}")
+    return exitcode
+
+
+def main():
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--suite", required=True)
+    argparser.add_argument("--dtype", required=True)
+    argparser.add_argument("--mode", required=True, choices=("inference", "training", "inference-no-freezing"))
+    argparser.add_argument("--test_mode", required=True, choices=("performance", "accuracy"))
+    argparser.add_argument("--device", help="i.e. xpu", required=True)
+    argparser.add_argument("--models-file", help="Subset of models list", required=True)
+    argparser.add_argument("--inductor-log-dir", help="Inductor test log directory", default="inductor_log")
+    args = argparser.parse_args()
+    exitcode = check_report(args.suite, args.dtype, args.mode, args.test_mode, args.device, args.models_file,
+                            args.inductor_log_dir)
+    print(f"Report check result: {'SUCCESS' if exitcode == 0 else 'FAIL'}")
+    sys.exit(exitcode)
+
+
+if __name__ == "__main__":
+    main()