9090 ./scripts/test-triton.sh --install-sglang --skip-pip-install --skip-pytorch-install
9191 cd benchmarks/third_party/sglang
9292 python scaled_mm_benchmark.py --reports $REPORTS
93- python ../vllm/transform_results.py $REPORTS/scaled_mm_benchmark.csv $REPORTS/scaled-mm-int8-report.csv --tag $TAG --benchmark scaled-mm-int8 --param_cols="M,N,K" --bgroup sglang
93+ python ../vllm/transform_results.py \
94+ $REPORTS/scaled_mm_benchmark.csv \
95+ $REPORTS/scaled-mm-int8-report.csv \
96+ --tag $TAG \
97+ --bgroup sglang \
98+ --benchmark scaled-mm-int8 \
99+ --param_cols="M,N,K"
94100
95101 - name : Run sglang benchmark with fp8
96102 if : ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'sglang')) }}
@@ -99,29 +105,68 @@ jobs:
99105
100106 cd benchmarks/third_party/sglang
101107 FP8="1" python scaled_mm_benchmark.py --reports $REPORTS
102- python ../vllm/transform_results.py $REPORTS/scaled_mm_benchmark.csv $REPORTS/scaled-mm-fp8-report.csv --tag $TAG --benchmark scaled-mm-fp8 --param_cols="M,N,K" --bgroup sglang
108+ python ../vllm/transform_results.py \
109+ $REPORTS/scaled_mm_benchmark.csv \
110+ $REPORTS/scaled-mm-fp8-report.csv \
111+ --tag $TAG \
112+ --bgroup sglang \
113+ --benchmark scaled-mm-fp8 \
114+ --param_cols="M,N,K"
103115
104- - name : Run vllm benchmarks bf16
116+ - name : Install vllm
117+ id : install-vllm
105118 if : ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
106119 run : |
107120 source ./scripts/capture-hw-details.sh
108-
109121 ./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install
122+
123+ - name : Run vllm unified attention bf16
124+ if : ${{ steps.install-vllm.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
125+ run : |
126+ source ./scripts/capture-hw-details.sh
127+
128+ cd benchmarks/third_party/vllm
129+ python unified_attention_benchmark.py --reports $REPORTS
130+ python transform_results.py \
131+ $REPORTS/unified-attention-performance.csv \
132+ $REPORTS/unified-attention-report.csv \
133+ --tag $TAG \
134+ --bgroup "vllm" \
135+ --benchmark "unified-attn-bf16" \
136+ --param_cols "q_heads,k_heads,head_size,dtype,qdtype,seq_lens,sliding_window,soft_cap,num_blocks,block_size"
137+
138+ - name : Run vllm batched moe bf16
139+ if : ${{ steps.install-vllm.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
140+ run : |
141+ source ./scripts/capture-hw-details.sh
142+
110143 cp -r vllm/tests benchmarks/third_party/vllm/tests
111144
112145 cd benchmarks/third_party/vllm
113146 python batched_moe_benchmark.py --reports $REPORTS
114- python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-bf16-benchmark --param_cols="num_experts,max_tokens_per_expert,K,N" --bgroup vllm
147+ python transform_results.py \
148+ $REPORTS/moe-gemm-performance.csv \
149+ $REPORTS/moe-gemm-report.csv \
150+ --tag $TAG \
151+ --bgroup vllm \
152+ --benchmark moe-bf16-benchmark \
153+ --param_cols="num_experts,max_tokens_per_expert,K,N"
115154
116155
117- - name : Run vllm benchmarks fp8
118- if : ${{ steps.install-benchmarks .outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
156+ - name : Run vllm batched moe fp8
157+ if : ${{ steps.install-vllm .outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
119158 run : |
120159 source ./scripts/capture-hw-details.sh
121160
122161 cd benchmarks/third_party/vllm
123162 FP8="1" python batched_moe_benchmark.py --reports $REPORTS
124- python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-fp8-report.csv --tag $TAG --benchmark moe-fp8-benchmark --param_cols="num_experts,max_tokens_per_expert,K,N" --bgroup vllm
163+ python transform_results.py \
164+ $REPORTS/moe-gemm-performance.csv \
165+ $REPORTS/moe-gemm-fp8-report.csv \
166+ --tag $TAG \
167+ --bgroup vllm \
168+ --benchmark moe-fp8-benchmark \
169+ --param_cols="num_experts,max_tokens_per_expert,K,N"
125170
126171
127172 - name : Run Liger-Kernel benchmarks
@@ -136,7 +181,10 @@ jobs:
136181 bash benchmarks/third_party/liger/run_benchmarks.sh || RET_CODE=$?
137182
138183 cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv
139- python benchmarks/third_party/liger/transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG
184+ python benchmarks/third_party/liger/transform.py \
185+ $REPORTS/liger-raw.csv \
186+ $REPORTS/liger-report.csv \
187+ --tag $TAG
140188
141189 # Return the captured return code at the end
142190 exit "$RET_CODE"
0 commit comments