Skip to content

E2E accuracy; torchbench; bfloat16; inference #876

E2E accuracy; torchbench; bfloat16; inference

E2E accuracy; torchbench; bfloat16; inference #876

Workflow file for this run

name: E2E accuracy
run-name: ${{ inputs.run_name }}
on:
workflow_dispatch:
inputs:
pytorch_ref:
description: PyTorch ref, keep empty for default
type: string
default: ""
suite:
description: Test suite
type: choice
options:
- all
- huggingface
- timm_models
- torchbench
default: all
mode:
description: Inference, inference-with-freezing, or training
type: choice
options:
- all
- inference
- inference-with-freezing
- training
default: all
dtype:
description: Data type
type: choice
options:
- all
- amp_bf16
- amp_fp16
- bfloat16
- float16
- float32
default: all
models:
description: Run all models or a subset from .github/models/accuracy/{suite}.txt
type: choice
options:
- all
- subset
default: all
check_all_subset_models:
description: In "subset" mode, keep going after errors
type: boolean
default: false
only_one_model:
description: Run only this one model
type: string
default: ""
runner_label:
description: Runner label, keep empty for default
type: string
default: ""
TORCH_COMPILE_DEBUG:
description: TORCH_COMPILE_DEBUG
type: string
default: ""
run_name:
description: Custom run name
type: string
default: "E2E accuracy"
schedule:
# twice a week, on Wednesday and Saturday
- cron: "0 22 * * 3,6"
permissions: read-all
jobs:
setup:
name: Setup
runs-on: linux
outputs:
suite: ${{ steps.set-matrix.outputs.suite }}
mode: ${{ steps.set-matrix.outputs.mode }}
dtype: ${{ steps.set-matrix.outputs.dtype }}
timeout-minutes: 10
steps:
- name: Set matrix
id: set-matrix
run: |
if [[ -z "${{ inputs.suite }}" || "${{ inputs.suite }}" == "all" ]]; then
suite='["huggingface", "timm_models", "torchbench"]'
else
suite='["${{ inputs.suite }}"]'
fi
if [[ -z "${{ inputs.mode }}" || "${{ inputs.mode }}" == "all" ]]; then
mode='["inference", "inference-with-freezing", "training"]'
else
mode='["${{ inputs.mode }}"]'
fi
if [[ -z "${{ inputs.dtype }}" || "${{ inputs.dtype }}" == "all" ]]; then
dtype='["amp_bf16", "amp_fp16", "bfloat16", "float16", "float32"]'
else
dtype='["${{ inputs.dtype }}"]'
fi
echo "suite=$suite" >> $GITHUB_OUTPUT
echo "mode=$mode" >> $GITHUB_OUTPUT
echo "dtype=$dtype" >> $GITHUB_OUTPUT
- name: Print inputs
run: |
cat <<EOF
${{ toJSON(github.event.inputs) }}
EOF
- name: Print setup outputs
run: |
cat <<EOF
${{ toJSON(steps.set-matrix.outputs) }}
EOF
run_tests:
name: Run test matrix
needs: setup
strategy:
matrix:
suite: ${{ fromJson(needs.setup.outputs.suite) }}
mode: ${{ fromJson(needs.setup.outputs.mode) }}
dtype: ${{ fromJson(needs.setup.outputs.dtype) }}
fail-fast: false
uses: ./.github/workflows/e2e-reusable.yml
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
with:
pytorch_ref: ${{ inputs.pytorch_ref || '' }}
suite: ${{ matrix.suite }}
mode: ${{ matrix.mode }}
test_mode: accuracy
dtype: ${{ matrix.dtype }}
models: ${{ inputs.models || 'all' }}
check_all_subset_models: ${{ inputs.check_all_subset_models || false }}
only_one_model: ${{ inputs.only_one_model || '' }}
runner_label: ${{ inputs.runner_label || 'max1100' }}
TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG || '' }}
summary:
name: Aggregate and check results
needs: [run_tests, setup]
runs-on: linux
if: always()
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Install Python
uses: ./.github/actions/setup-python
with:
python_version: "3.10"
use_pyenv: true
- name: Download all artifacts
uses: actions/download-artifact@v6
with:
path: separate-reports
merge-multiple: true
- name: Run aggregation script
run: |
pip install pandas numpy
ls -la separate-reports
echo "Local dir"
ls -la ./
python scripts/e2e_checks/aggregate_e2e_results.py \
--input-dir separate-reports \
--output-dir aggregated-results
- name: Upload aggregated results
uses: actions/upload-artifact@v5
if: ${{ !cancelled() }}
with:
name: aggregated-results-${{ github.run_id }}
path: aggregated-results
include-hidden-files: true
- name: Check results against reference
if: ${{ (inputs.models || 'all') == 'all' && (inputs.only_one_model || '') == '' && !cancelled()}}
run: |
PYTORCH_XPU_OPS_REF="$(<.github/pins/e2e_reference_torch-xpu-ops.txt)"
git clone https://github.com/intel/torch-xpu-ops.git
cd torch-xpu-ops
git checkout $PYTORCH_XPU_OPS_REF
cd ..
./scripts/e2e_checks/compare_reference.sh \
separate-reports \
"./torch-xpu-ops" \
'${{ needs.setup.outputs.suite }}' \
'${{ needs.setup.outputs.mode }}' \
'${{ needs.setup.outputs.dtype }}'