Third party benchmarks #351
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Third party benchmarks | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| runner_label: | |
| description: Runner label, keep empty for default | |
| type: string | |
| default: "" | |
| tag: | |
| description: Tag for benchmark results | |
| type: string | |
| default: "test" | |
| benchmarks: | |
| description: JSON list of benchmarks to run. Leave empty to run all benchmarks. | |
| type: string | |
| default: "" | |
| schedule: | |
| # About midnight PST (UTC-8) | |
| - cron: "5 10 * * *" | |
| permissions: read-all | |
| env: | |
| PYTHON_VERSION: "3.10" | |
| TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }} | |
| HF_TOKEN: ${{ secrets.HF_TOKEN || '' }} | |
| jobs: | |
| build: | |
| name: Third party benchmarks | |
| runs-on: | |
| - linux | |
| - ${{ inputs.runner_label || 'max1550' }} | |
| timeout-minutes: 720 | |
| defaults: | |
| run: | |
| shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
| steps: | |
| - name: Print inputs | |
| run: | | |
| cat <<EOF | |
| ${{ toJSON(inputs) }} | |
| EOF | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| - name: Install Python (from pyenv) ${{ inputs.python_version }} | |
| uses: ./.github/actions/setup-pyenv-python | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: Identify Python version | |
| run: | | |
| PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')" | |
| echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV | |
| - name: Install Python build dependencies | |
| run: | | |
| pip install cmake | |
| - name: Setup PyTorch | |
| uses: ./.github/actions/setup-pytorch | |
| - name: Setup Triton | |
| uses: ./.github/actions/setup-triton | |
| - name: Create reports dir | |
| run: | | |
| mkdir reports | |
| echo "REPORTS=$PWD/reports" >> $GITHUB_ENV | |
| - name: Install benchmark dependencies | |
| id: install | |
| run: | | |
| pip install transformers pandas pytest | |
| - name: Install benchmarks | |
| id: install-benchmarks | |
| run: | | |
| cd benchmarks | |
| pip install . | |
| - name: Run sglang benchmark int8 | |
| if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'sglang')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| ./scripts/test-triton.sh --install-sglang --skip-pip-install --skip-pytorch-install | |
| cd benchmarks/third_party/sglang | |
| python scaled_mm_benchmark.py --reports $REPORTS | |
| python ../vllm/transform_results.py \ | |
| $REPORTS/scaled_mm_benchmark.csv \ | |
| $REPORTS/scaled-mm-int8-report.csv \ | |
| --tag $TAG \ | |
| --bgroup sglang \ | |
| --benchmark scaled-mm-int8 \ | |
| --param_cols="M,N,K" | |
| - name: Run sglang benchmark with fp8 | |
| if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'sglang')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| cd benchmarks/third_party/sglang | |
| FP8="1" python scaled_mm_benchmark.py --reports $REPORTS | |
| python ../vllm/transform_results.py \ | |
| $REPORTS/scaled_mm_benchmark.csv \ | |
| $REPORTS/scaled-mm-fp8-report.csv \ | |
| --tag $TAG \ | |
| --bgroup sglang \ | |
| --benchmark scaled-mm-fp8 \ | |
| --param_cols="M,N,K" | |
| - name: Install vllm | |
| id: install-vllm | |
| if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| ./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install | |
| - name: Run vllm unified attention bf16 | |
| if: ${{ steps.install-vllm.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| cd benchmarks/third_party/vllm | |
| python unified_attention_benchmark.py --reports $REPORTS | |
| python transform_results.py \ | |
| $REPORTS/unified-attention-performance.csv \ | |
| $REPORTS/unified-attention-report.csv \ | |
| --tag $TAG \ | |
| --bgroup "vllm" \ | |
| --benchmark "unified-attn-bf16" \ | |
| --param_cols "q_heads,k_heads,head_size,dtype,qdtype,seq_lens,sliding_window,soft_cap,num_blocks,block_size" | |
| - name: Run vllm batched moe bf16 | |
| if: ${{ steps.install-vllm.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| cp -r vllm/tests benchmarks/third_party/vllm/tests | |
| cd benchmarks/third_party/vllm | |
| python batched_moe_benchmark.py --reports $REPORTS | |
| python transform_results.py \ | |
| $REPORTS/moe-gemm-performance.csv \ | |
| $REPORTS/moe-gemm-report.csv \ | |
| --tag $TAG \ | |
| --bgroup vllm \ | |
| --benchmark moe-bf16-benchmark \ | |
| --param_cols="num_experts,max_tokens_per_expert,K,N" | |
| - name: Run vllm batched moe fp8 | |
| if: ${{ steps.install-vllm.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| cd benchmarks/third_party/vllm | |
| FP8="1" python batched_moe_benchmark.py --reports $REPORTS | |
| python transform_results.py \ | |
| $REPORTS/moe-gemm-performance.csv \ | |
| $REPORTS/moe-gemm-fp8-report.csv \ | |
| --tag $TAG \ | |
| --bgroup vllm \ | |
| --benchmark moe-fp8-benchmark \ | |
| --param_cols="num_experts,max_tokens_per_expert,K,N" | |
| - name: Run Liger-Kernel benchmarks | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'liger')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| ./scripts/test-triton.sh --install-liger --skip-pip-install --skip-pytorch-install | |
| # To remember return code, but still copy results | |
| RET_CODE=0 | |
| bash benchmarks/third_party/liger/run_benchmarks.sh || RET_CODE=$? | |
| cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv | |
| python benchmarks/third_party/liger/transform.py \ | |
| $REPORTS/liger-raw.csv \ | |
| $REPORTS/liger-report.csv \ | |
| --tag $TAG | |
| # Return the captured return code at the end | |
| exit "$RET_CODE" | |
| - name: Run e2e Llama 3.1 flex attention performance benchmark | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'llama3-1')) }} | |
| run: | | |
| source ./scripts/capture-hw-details.sh | |
| git clone https://github.com/huggingface/transformers.git | |
| cd transformers | |
| git checkout $(<../benchmarks/third_party/e2e-flex_attention/transformers-commit.txt) | |
| git apply ../benchmarks/third_party/e2e-flex_attention/transformers-patch-for-timing.diff | |
| git submodule sync | |
| git submodule update --init --recursive | |
| pip install -v -e . | |
| cd ../benchmarks/third_party/e2e-flex_attention | |
| MODEL_NAME="meta-llama/Llama-3.1-8B" | |
| MAX_NEW_TOKENS=128 | |
| INPUT_TOKENS=1024 | |
| BATCH_SIZE=1 | |
| python run_llm_inductor_greedy.py -m $MODEL_NAME --max-new-tokens $MAX_NEW_TOKENS --input-tokens $INPUT_TOKENS --num-warmup 2 --num-iter 7 --compile --profile | tee llm.compile.xpu.profile.log | |
| echo "LLM profiling log is stored into $PWD/llm.compile.xpu.profile.log" | |
| cp llm.compile.xpu.profile.log $REPORTS/llm.compile.xpu.profile.log | |
| python transform_results.py $REPORTS/llm.compile.xpu.profile.log $REPORTS/llm-triton-report.csv \ | |
| --tag $TAG \ | |
| --model "$MODEL_NAME" \ | |
| --max-new-tokens $MAX_NEW_TOKENS \ | |
| --batch-size $BATCH_SIZE | |
| - name: Run launch microbenchmark tests | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'launch_micro_benchmarks')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'launch_micro_benchmarks') }} | |
| run: | | |
| source scripts/capture-hw-details.sh | |
| python python/test/microbenchmark/launch_overhead.py --reports $REPORTS | |
| python benchmarks/third_party/vllm/transform_results.py $REPORTS/launch_overhead_results.csv $REPORTS/launch_overhead-report.csv \ | |
| --tag $TAG \ | |
| --bgroup overhead \ | |
| --benchmark launch-overhead \ | |
| --param_cols="input_type" | |
| - name: Upload benchmark reports | |
| if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: benchmark-reports | |
| path: reports |