Update run_benchmark.sh

mengniwang95 · web-flow · commit 11f5c5e9fcf7 · 2026-01-30T19:08:41.000+08:00
diff --git a/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_benchmark.sh b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_benchmark.sh
@@ -67,6 +67,7 @@ function run_benchmark {
 	    extra_model_args="max_model_len=66000,gpu_memory_utilization=0.7"
     else
         model="vllm"
+		extra_model_args="max_model_len=8192,max_num_seqs=1024,max_gen_toks=2048,gpu_memory_utilization=0.7"
     fi
 
     if [[ "${kv_cache_dtype}" == "fp8" ]]; then