E2E accuracy; torchbench; bfloat16; inference #876
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: E2E accuracy | |
| run-name: ${{ inputs.run_name }} | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| pytorch_ref: | |
| description: PyTorch ref, keep empty for default | |
| type: string | |
| default: "" | |
| suite: | |
| description: Test suite | |
| type: choice | |
| options: | |
| - all | |
| - huggingface | |
| - timm_models | |
| - torchbench | |
| default: all | |
| mode: | |
| description: Inference, inference-with-freezing, or training | |
| type: choice | |
| options: | |
| - all | |
| - inference | |
| - inference-with-freezing | |
| - training | |
| default: all | |
| dtype: | |
| description: Data type | |
| type: choice | |
| options: | |
| - all | |
| - amp_bf16 | |
| - amp_fp16 | |
| - bfloat16 | |
| - float16 | |
| - float32 | |
| default: all | |
| models: | |
| description: Run all models or a subset from .github/models/accuracy/{suite}.txt | |
| type: choice | |
| options: | |
| - all | |
| - subset | |
| default: all | |
| check_all_subset_models: | |
| description: In "subset" mode, keep going after errors | |
| type: boolean | |
| default: false | |
| only_one_model: | |
| description: Run only this one model | |
| type: string | |
| default: "" | |
| runner_label: | |
| description: Runner label, keep empty for default | |
| type: string | |
| default: "" | |
| TORCH_COMPILE_DEBUG: | |
| description: TORCH_COMPILE_DEBUG | |
| type: string | |
| default: "" | |
| run_name: | |
| description: Custom run name | |
| type: string | |
| default: "E2E accuracy" | |
| schedule: | |
| # twice a week, on Wednesday and Saturday | |
| - cron: "0 22 * * 3,6" | |
| permissions: read-all | |
| jobs: | |
| setup: | |
| name: Setup | |
| runs-on: linux | |
| outputs: | |
| suite: ${{ steps.set-matrix.outputs.suite }} | |
| mode: ${{ steps.set-matrix.outputs.mode }} | |
| dtype: ${{ steps.set-matrix.outputs.dtype }} | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Set matrix | |
| id: set-matrix | |
| run: | | |
| if [[ -z "${{ inputs.suite }}" || "${{ inputs.suite }}" == "all" ]]; then | |
| suite='["huggingface", "timm_models", "torchbench"]' | |
| else | |
| suite='["${{ inputs.suite }}"]' | |
| fi | |
| if [[ -z "${{ inputs.mode }}" || "${{ inputs.mode }}" == "all" ]]; then | |
| mode='["inference", "inference-with-freezing", "training"]' | |
| else | |
| mode='["${{ inputs.mode }}"]' | |
| fi | |
| if [[ -z "${{ inputs.dtype }}" || "${{ inputs.dtype }}" == "all" ]]; then | |
| dtype='["amp_bf16", "amp_fp16", "bfloat16", "float16", "float32"]' | |
| else | |
| dtype='["${{ inputs.dtype }}"]' | |
| fi | |
| echo "suite=$suite" >> $GITHUB_OUTPUT | |
| echo "mode=$mode" >> $GITHUB_OUTPUT | |
| echo "dtype=$dtype" >> $GITHUB_OUTPUT | |
| - name: Print inputs | |
| run: | | |
| cat <<EOF | |
| ${{ toJSON(github.event.inputs) }} | |
| EOF | |
| - name: Print setup outputs | |
| run: | | |
| cat <<EOF | |
| ${{ toJSON(steps.set-matrix.outputs) }} | |
| EOF | |
| run_tests: | |
| name: Run test matrix | |
| needs: setup | |
| strategy: | |
| matrix: | |
| suite: ${{ fromJson(needs.setup.outputs.suite) }} | |
| mode: ${{ fromJson(needs.setup.outputs.mode) }} | |
| dtype: ${{ fromJson(needs.setup.outputs.dtype) }} | |
| fail-fast: false | |
| uses: ./.github/workflows/e2e-reusable.yml | |
| secrets: | |
| HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
| with: | |
| pytorch_ref: ${{ inputs.pytorch_ref || '' }} | |
| suite: ${{ matrix.suite }} | |
| mode: ${{ matrix.mode }} | |
| test_mode: accuracy | |
| dtype: ${{ matrix.dtype }} | |
| models: ${{ inputs.models || 'all' }} | |
| check_all_subset_models: ${{ inputs.check_all_subset_models || false }} | |
| only_one_model: ${{ inputs.only_one_model || '' }} | |
| runner_label: ${{ inputs.runner_label || 'max1100' }} | |
| TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG || '' }} | |
| summary: | |
| name: Aggregate and check results | |
| needs: [run_tests, setup] | |
| runs-on: linux | |
| if: always() | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Install Python | |
| uses: ./.github/actions/setup-python | |
| with: | |
| python_version: "3.10" | |
| use_pyenv: true | |
| - name: Download all artifacts | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: separate-reports | |
| merge-multiple: true | |
| - name: Run aggregation script | |
| run: | | |
| pip install pandas numpy | |
| ls -la separate-reports | |
| echo "Local dir" | |
| ls -la ./ | |
| python scripts/e2e_checks/aggregate_e2e_results.py \ | |
| --input-dir separate-reports \ | |
| --output-dir aggregated-results | |
| - name: Upload aggregated results | |
| uses: actions/upload-artifact@v5 | |
| if: ${{ !cancelled() }} | |
| with: | |
| name: aggregated-results-${{ github.run_id }} | |
| path: aggregated-results | |
| include-hidden-files: true | |
| - name: Check results against reference | |
| if: ${{ (inputs.models || 'all') == 'all' && (inputs.only_one_model || '') == '' && !cancelled()}} | |
| run: | | |
| PYTORCH_XPU_OPS_REF="$(<.github/pins/e2e_reference_torch-xpu-ops.txt)" | |
| git clone https://github.com/intel/torch-xpu-ops.git | |
| cd torch-xpu-ops | |
| git checkout $PYTORCH_XPU_OPS_REF | |
| cd .. | |
| ./scripts/e2e_checks/compare_reference.sh \ | |
| separate-reports \ | |
| "./torch-xpu-ops" \ | |
| '${{ needs.setup.outputs.suite }}' \ | |
| '${{ needs.setup.outputs.mode }}' \ | |
| '${{ needs.setup.outputs.dtype }}' |