Triton benchmarks #3414
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Triton benchmarks | |
| run-name: ${{ inputs.run_name }} | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| runner_label: | |
| description: Runner label, keep empty for default | |
| type: string | |
| default: "" | |
| tag: | |
| description: Tag for benchmark results | |
| type: string | |
| default: "test" | |
| benchmarking_method: | |
| description: The method used to obtain performance numbers | |
| type: choice | |
| options: | |
| - ELAPSED_TIME | |
| - UPSTREAM_PYTORCH_PROFILER | |
| - PROTON_PROFILER | |
| default: UPSTREAM_PYTORCH_PROFILER | |
| verify: | |
| description: Verify the benchmark results | |
| type: boolean | |
| default: true | |
| run_name: | |
| description: Run name | |
| type: string | |
| default: "Triton benchmarks" | |
| n_runs: | |
| description: Number of runs for each benchmark | |
| type: number | |
| default: 1 | |
| benchmarks: | |
| description: JSON list of benchmarks to run. Leave empty to run all benchmarks. | |
| type: string | |
| default: "" | |
| skip_benchmarks: | |
| description: JSON list of benchmarks to skip | |
| type: string | |
| default: "[]" | |
| # This workflow is also called from workflows triton-benchmarks-*.yml. | |
| workflow_call: | |
| inputs: | |
| runner_label: | |
| description: Runner label | |
| type: string | |
| skip_benchmarks: | |
| description: JSON list of benchmarks to skip | |
| type: string | |
| default: "[]" | |
| # Cancels in-progress PR runs when the PR is updated. Manual runs are never cancelled. | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event_name == 'workflow_dispatch' && github.run_id || github.event_name == 'pull_request' && (contains(github.event.pull_request.labels.*.name, 'keep-going') && github.run_id || github.event.pull_request.number) || github.ref }} | |
| cancel-in-progress: true | |
| permissions: read-all | |
| env: | |
| PYTHON_VERSION: "3.10" | |
| BENCHMARKING_METHOD: ${{ inputs.benchmarking_method || 'UPSTREAM_PYTORCH_PROFILER' }} | |
| VERIFY: ${{ (github.event_name == 'pull_request' || github.event_name == 'schedule' || inputs.verify) && '1' || '0' }} | |
| TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }} | |
| N_RUNS: ${{ inputs.n_runs || '1' }} | |
| # FIXME: Enable Level Zero v2 loader once it's stable. | |
| # https://github.com/intel/intel-xpu-backend-for-triton/issues/5572 | |
| UR_LOADER_USE_LEVEL_ZERO_V2: "0" | |
| jobs: | |
| build: | |
| name: Triton benchmarks | |
| runs-on: | |
| - linux | |
| - ${{ inputs.runner_label || 'max1550' }} | |
| timeout-minutes: 720 | |
| defaults: | |
| run: | |
| shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
| steps: | |
| - name: Print inputs | |
| run: | | |
| cat <<EOF | |
| ${{ toJSON(inputs) }} | |
| EOF | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| - name: Install Python (from pyenv) ${{ inputs.python_version }} | |
| uses: ./.github/actions/setup-pyenv-python | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: Identify Python version | |
| run: | | |
| PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')" | |
| echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV | |
| - name: Install Python build dependencies | |
| run: | | |
| pip install cmake | |
| - name: Setup PyTorch | |
| uses: ./.github/actions/setup-pytorch | |
| - name: Setup Triton | |
| uses: ./.github/actions/setup-triton | |
| - name: Create reports dir | |
| run: | | |
| mkdir reports | |
| echo "REPORTS=$PWD/reports" >> $GITHUB_ENV | |
| - name: Install benchmarks | |
| id: install | |
| run: | | |
| cd benchmarks | |
| pip install . | |
| - name: Build PTI | |
| run: | | |
| ./scripts/install-pti.sh --build-level-zero | |
| PTI_LIBS_DIR=$(python ./scripts/pti_lib.py) | |
| ls $PTI_LIBS_DIR | |
| echo "PTI_LIBS_DIR=$PTI_LIBS_DIR" >> $GITHUB_ENV | |
| - name: Run reproducer | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'fused_softmax.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }} | |
| run: | | |
| export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH | |
| python benchmarks/third_party/vllm/unified_attention_upstream.py |