diff --git a/.github/workflows/e2e-accuracy.yml b/.github/workflows/e2e-accuracy.yml index 3b2aa1901d..9d8f2deca3 100644 --- a/.github/workflows/e2e-accuracy.yml +++ b/.github/workflows/e2e-accuracy.yml @@ -44,6 +44,10 @@ on: - all - subset default: all + check_all_subset_models: + description: In "subset" mode, check all subset models + type: boolean + default: false only_one_model: description: Run only this one model type: string @@ -125,6 +129,7 @@ jobs: test_mode: accuracy dtype: ${{ matrix.dtype }} models: ${{ inputs.models }} + check_all_subset_models: ${{ inputs.check_all_subset_models || false }} only_one_model: ${{ inputs.only_one_model }} runner_label: ${{ inputs.runner_label }} TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG }} diff --git a/.github/workflows/e2e-performance.yml b/.github/workflows/e2e-performance.yml index 0f524f9ba6..de7a598cba 100644 --- a/.github/workflows/e2e-performance.yml +++ b/.github/workflows/e2e-performance.yml @@ -44,6 +44,10 @@ on: - all - subset default: subset + check_all_subset_models: + description: In "subset" mode, do not fail workflow if one of models failed + type: boolean + default: false only_one_model: description: Run only this one model type: string @@ -136,6 +140,7 @@ jobs: test_mode: performance dtype: ${{ matrix.dtype }} models: ${{ inputs.models }} + check_all_subset_models: ${{ inputs.check_all_subset_models || false }} only_one_model: ${{ inputs.only_one_model }} runner_label: ${{ inputs.runner_label }} TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG }} diff --git a/.github/workflows/e2e-reusable.yml b/.github/workflows/e2e-reusable.yml index c83e196ba8..968ff81915 100644 --- a/.github/workflows/e2e-reusable.yml +++ b/.github/workflows/e2e-reusable.yml @@ -27,6 +27,10 @@ on: description: Run all models or a subset type: string default: all + check_all_subset_models: + description: In "subset" mode, check all subset models + type: boolean + default: false only_one_model: description: Run only this one model type: string @@ -224,9 +228,19 @@ jobs: if [[ "${{ inputs.only_one_model }}" ]]; then bash -e $GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dtype }} ${{ inputs.mode }} ${{ inputs.test_mode }} xpu 0 static 1 0 ${{ inputs.only_one_model }} elif [[ "${{ inputs.models }}" == "subset" ]]; then + models_subset_file="$GITHUB_WORKSPACE/.github/models/${{ inputs.test_mode }}/${{ inputs.suite }}.txt" while read model; do bash -e $GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dtype }} ${{ inputs.mode }} ${{ inputs.test_mode }} xpu 0 static 1 0 $model - done < $GITHUB_WORKSPACE/.github/models/${{ inputs.test_mode }}/${{ inputs.suite }}.txt + done < $models_subset_file + if [[ "${{ inputs.check_all_subset_models }}" == true ]]; then + python $GITHUB_WORKSPACE/scripts/check_inductor_report.py --models-file="$models_subset_file" \ + --suite=${{ inputs.suite }} \ + --dtype=${{ inputs.dtype }} \ + --mode=${{ inputs.mode }} \ + --test_mode=${{ inputs.test_mode }} \ + --device=xpu \ + --inductor-log-dir="${GITHUB_WORKSPACE}/inductor_log" + fi else bash -e $GITHUB_WORKSPACE/scripts/inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dtype }} ${{ inputs.mode }} ${{ inputs.test_mode }} xpu 0 static 1 0 fi diff --git a/scripts/check_inductor_report.py b/scripts/check_inductor_report.py new file mode 100755 index 0000000000..cc639721cf --- /dev/null +++ b/scripts/check_inductor_report.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +import argparse +from pathlib import Path +import csv +import sys + + +def check_report(suite, dtype, mode, test_mode, device, models_file, inductor_log_dir): + inductor_log_dir_leaf = Path(inductor_log_dir) / suite / dtype + inductor_report_filename = f"inductor_{suite}_{dtype}_{mode}_{device}_{test_mode}.csv" + inductor_report_path = Path(inductor_log_dir_leaf / inductor_report_filename) + + subset = [] + report = [] + exitcode = 0 + + with open(models_file, encoding="utf-8") as f: + subset = f.read().splitlines() + + with open(inductor_report_path, encoding="utf-8") as f: + reader = csv.reader(f) + report_with_header = [] + for l in reader: + report_with_header.append(l) + for r in report_with_header[1:]: + if r[0] == device: + report.append(r) + + test_list = [r[1] for r in report] + + if test_mode == "performance": + for m in subset: + if m not in test_list: + exitcode = 1 + print(f"Test is not found in report: {m}") + + if test_mode == "accuracy": + test_statuses = [r[3] for r in report] + for m in subset: + try: + idx = test_list.index(m) + except ValueError: + exitcode = 1 + print(f"Test is NOT FOUND: {m}") + continue + if test_statuses[idx] != "pass": + exitcode = 1 + print(f"Test is NOT PASSED: {m}") + return exitcode + + +def main(): + argparser = argparse.ArgumentParser() + argparser.add_argument("--suite", required=True) + argparser.add_argument("--dtype", required=True) + argparser.add_argument("--mode", required=True, choices=("inference", "training", "inference-no-freezing")) + argparser.add_argument("--test_mode", required=True, choices=("performance", "accuracy")) + argparser.add_argument("--device", help="i.e. xpu", required=True) + argparser.add_argument("--models-file", help="Subset of models list", required=True) + argparser.add_argument("--inductor-log-dir", help="Inductor test log directory", default="inductor_log") + args = argparser.parse_args() + exitcode = check_report(args.suite, args.dtype, args.mode, args.test_mode, args.device, args.models_file, + args.inductor_log_dir) + print(f"Report check result: {'SUCCESS' if exitcode == 0 else 'FAIL'}") + sys.exit(exitcode) + + +if __name__ == "__main__": + main()