Test removal of TRITON_INTEL_ENABLE_INSTR_SCHED #789
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Triton benchmarks | |
| run-name: ${{ inputs.run_name }} | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| runner_label: | |
| description: Runner label, keep empty for default | |
| type: string | |
| default: "" | |
| tag: | |
| description: Tag for benchmark results | |
| type: string | |
| default: "test" | |
| benchmarking_method: | |
| description: The method used to obtain performance numbers | |
| type: choice | |
| options: | |
| - PYTORCH_LEGACY_PROFILER_USING_IPEX | |
| - ELAPSED_TIME | |
| - UPSTREAM_PYTORCH_PROFILER | |
| default: PYTORCH_LEGACY_PROFILER_USING_IPEX | |
| run_name: | |
| description: Run name | |
| type: string | |
| default: "Triton benchmarks" | |
| schedule: | |
| - cron: "5 23 * * *" | |
| pull_request: | |
| branches: | |
| - main | |
| paths: | |
| - .github/workflows/triton-benchmarks.yml | |
| - benchmarks/** | |
| permissions: read-all | |
| env: | |
| PYTHON_VERSION: "3.10" | |
| BENCHMARKING_METHOD: ${{ inputs.benchmarking_method || 'PYTORCH_LEGACY_PROFILER_USING_IPEX' }} | |
| USE_IPEX: ${{ github.event_name != 'workflow_dispatch' && '1' || inputs.benchmarking_method == 'PYTORCH_LEGACY_PROFILER_USING_IPEX' && '1' || '0' }} | |
| TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && 'pr') || (github.event_name == 'schedule' && 'ci') || 'test' }} | |
| jobs: | |
| build: | |
| name: Triton benchmarks | |
| runs-on: | |
| - ${{ inputs.runner_label || 'max1550' }} | |
| timeout-minutes: 720 | |
| defaults: | |
| run: | |
| shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Load pip cache | |
| id: pip-cache | |
| uses: ./.github/actions/load | |
| with: | |
| path: $HOME/.cache/pip | |
| # pip cache per commit id just to minimize network traffic | |
| key: pip-$PYTHON_VERSION-$GITHUB_SHA | |
| - name: Install Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: Install Python build dependencies | |
| run: | | |
| pip install wheel cmake | |
| - name: Setup PyTorch with IPEX | |
| if: ${{ env.USE_IPEX == '1' }} | |
| uses: ./.github/actions/setup-pytorch | |
| with: | |
| repository: Stonepia/pytorch | |
| - name: Setup PyTorch without IPEX | |
| if: ${{ env.USE_IPEX == '0' }} | |
| uses: ./.github/actions/setup-pytorch | |
| with: | |
| repository: pytorch/pytorch | |
| - name: Setup IPEX | |
| if: ${{ env.USE_IPEX == '1' }} | |
| uses: ./.github/actions/setup-ipex | |
| - name: Build Triton wheels | |
| uses: ./.github/actions/setup-triton | |
| with: | |
| command: DEBUG=1 python setup.py bdist_wheel | |
| - name: Install Triton | |
| run: | | |
| pip install python/dist/*.whl | |
| - name: Install benchmark dependencies | |
| run: | | |
| pip install matplotlib pandas tabulate | |
| - name: Create reports dir | |
| run: | | |
| mkdir reports | |
| echo "REPORTS=$PWD/reports" >> $GITHUB_ENV | |
| - name: Install benchmarks | |
| id: install | |
| run: | | |
| cd benchmarks | |
| python setup.py install | |
| - name: Run Triton FA kernel benchmark - advanced path (w/ TRITON_INTEL_ENABLE_INSTR_SCHED) | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
| run: | | |
| cd benchmarks/triton_kernels_benchmark | |
| TRITON_INTEL_ADVANCED_PATH=1 \ | |
| TRITON_INTEL_ENABLE_INSTR_SCHED=1 \ | |
| TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \ | |
| IGC_VISAOptions=" -enableBCR" \ | |
| python flash_attention_fwd_benchmark.py --reports $REPORTS | |
| TAG="${TAG}-adv" | |
| source ../../scripts/capture-hw-details.sh | |
| python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
| - name: Run Triton FA kernel benchmark - advanced path (w/o TRITON_INTEL_ENABLE_INSTR_SCHED) | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
| run: | | |
| cd benchmarks/triton_kernels_benchmark | |
| TRITON_INTEL_ADVANCED_PATH=1 \ | |
| TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \ | |
| IGC_VISAOptions=" -enableBCR" \ | |
| python flash_attention_fwd_benchmark.py --reports $REPORTS | |
| TAG="${TAG}-adv" | |
| source ../../scripts/capture-hw-details.sh | |
| python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG | |
| - name: Save pip cache | |
| if: ${{ steps.pip-cache.outputs.status == 'miss' }} | |
| uses: ./.github/actions/save | |
| with: | |
| path: ${{ steps.pip-cache.outputs.path }} | |
| dest: ${{ steps.pip-cache.outputs.dest }} | |
| - name: Upload benchmark reports | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-reports | |
| path: reports |