E2E accuracy; torchbench; bfloat16; inference #876

Workflow file for this run

.github/workflows/e2e-accuracy.yml at 24992d7

	name: E2E accuracy
	run-name: ${{ inputs.run_name }}

	on:
	workflow_dispatch:
	inputs:
	pytorch_ref:
	description: PyTorch ref, keep empty for default
	type: string
	default: ""
	suite:
	description: Test suite
	type: choice
	options:
	- all
	- huggingface
	- timm_models
	- torchbench
	default: all
	mode:
	description: Inference, inference-with-freezing, or training
	type: choice
	options:
	- all
	- inference
	- inference-with-freezing
	- training
	default: all
	dtype:
	description: Data type
	type: choice
	options:
	- all
	- amp_bf16
	- amp_fp16
	- bfloat16
	- float16
	- float32
	default: all
	models:
	description: Run all models or a subset from .github/models/accuracy/{suite}.txt
	type: choice
	options:
	- all
	- subset
	default: all
	check_all_subset_models:
	description: In "subset" mode, keep going after errors
	type: boolean
	default: false
	only_one_model:
	description: Run only this one model
	type: string
	default: ""
	runner_label:
	description: Runner label, keep empty for default
	type: string
	default: ""
	TORCH_COMPILE_DEBUG:
	description: TORCH_COMPILE_DEBUG
	type: string
	default: ""
	run_name:
	description: Custom run name
	type: string
	default: "E2E accuracy"
	schedule:
	# twice a week, on Wednesday and Saturday
	- cron: "0 22 * * 3,6"


	permissions: read-all

	jobs:
	setup:
	name: Setup
	runs-on: linux
	outputs:
	suite: ${{ steps.set-matrix.outputs.suite }}
	mode: ${{ steps.set-matrix.outputs.mode }}
	dtype: ${{ steps.set-matrix.outputs.dtype }}
	timeout-minutes: 10
	steps:
	- name: Set matrix
	id: set-matrix
	run: \|
	if [[ -z "${{ inputs.suite }}" \|\| "${{ inputs.suite }}" == "all" ]]; then
	suite='["huggingface", "timm_models", "torchbench"]'
	else
	suite='["${{ inputs.suite }}"]'
	fi
	if [[ -z "${{ inputs.mode }}" \|\| "${{ inputs.mode }}" == "all" ]]; then
	mode='["inference", "inference-with-freezing", "training"]'
	else
	mode='["${{ inputs.mode }}"]'
	fi
	if [[ -z "${{ inputs.dtype }}" \|\| "${{ inputs.dtype }}" == "all" ]]; then
	dtype='["amp_bf16", "amp_fp16", "bfloat16", "float16", "float32"]'
	else
	dtype='["${{ inputs.dtype }}"]'
	fi
	echo "suite=$suite" >> $GITHUB_OUTPUT
	echo "mode=$mode" >> $GITHUB_OUTPUT
	echo "dtype=$dtype" >> $GITHUB_OUTPUT

	- name: Print inputs
	run: \|
	cat <<EOF
	${{ toJSON(github.event.inputs) }}
	EOF

	- name: Print setup outputs
	run: \|
	cat <<EOF
	${{ toJSON(steps.set-matrix.outputs) }}
	EOF

	run_tests:
	name: Run test matrix
	needs: setup
	strategy:
	matrix:
	suite: ${{ fromJson(needs.setup.outputs.suite) }}
	mode: ${{ fromJson(needs.setup.outputs.mode) }}
	dtype: ${{ fromJson(needs.setup.outputs.dtype) }}
	fail-fast: false
	uses: ./.github/workflows/e2e-reusable.yml
	secrets:
	HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
	with:
	pytorch_ref: ${{ inputs.pytorch_ref \|\| '' }}
	suite: ${{ matrix.suite }}
	mode: ${{ matrix.mode }}
	test_mode: accuracy
	dtype: ${{ matrix.dtype }}
	models: ${{ inputs.models \|\| 'all' }}
	check_all_subset_models: ${{ inputs.check_all_subset_models \|\| false }}
	only_one_model: ${{ inputs.only_one_model \|\| '' }}
	runner_label: ${{ inputs.runner_label \|\| 'max1100' }}
	TORCH_COMPILE_DEBUG: ${{ inputs.TORCH_COMPILE_DEBUG \|\| '' }}

	summary:
	name: Aggregate and check results
	needs: [run_tests, setup]
	runs-on: linux
	if: always()
	steps:
	- name: Checkout repository
	uses: actions/checkout@v5

	- name: Install Python
	uses: ./.github/actions/setup-python
	with:
	python_version: "3.10"
	use_pyenv: true

	- name: Download all artifacts
	uses: actions/download-artifact@v6
	with:
	path: separate-reports
	merge-multiple: true

	- name: Run aggregation script
	run: \|
	pip install pandas numpy
	ls -la separate-reports
	echo "Local dir"
	ls -la ./
	python scripts/e2e_checks/aggregate_e2e_results.py \
	--input-dir separate-reports \
	--output-dir aggregated-results

	- name: Upload aggregated results
	uses: actions/upload-artifact@v5
	if: ${{ !cancelled() }}
	with:
	name: aggregated-results-${{ github.run_id }}
	path: aggregated-results
	include-hidden-files: true

	- name: Check results against reference
	if: ${{ (inputs.models \|\| 'all') == 'all' && (inputs.only_one_model \|\| '') == '' && !cancelled()}}
	run: \|
	PYTORCH_XPU_OPS_REF="$(<.github/pins/e2e_reference_torch-xpu-ops.txt)"
	git clone https://github.com/intel/torch-xpu-ops.git
	cd torch-xpu-ops
	git checkout $PYTORCH_XPU_OPS_REF
	cd ..
	./scripts/e2e_checks/compare_reference.sh \
	separate-reports \
	"./torch-xpu-ops" \
	'${{ needs.setup.outputs.suite }}' \
	'${{ needs.setup.outputs.mode }}' \
	'${{ needs.setup.outputs.dtype }}'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

E2E accuracy; torchbench; bfloat16; inference #876

Workflow file

E2E accuracy; torchbench; bfloat16; inference #876

Uh oh!

Jobs

Run details

Workflow file for this run