@@ -42,6 +42,27 @@ eval_base_model() {
4242 --output_path $EVAL_OUTPUT_DIR
4343 fi
4444
45+ # If vllm without TP failed, try with expert parallel (useful for MoE models)
46+ if [ $? -ne 0 ]; then
47+ echo " Evaluation without tensor_parallel failed, retrying with expert parallel..."
48+ run 4 lm_eval \
49+ --model vllm \
50+ --model_args pretrained=$model_id ,dtype=auto,max_model_len=$max_model_len ,add_bos_token=True,enable_expert_parallel=True \
51+ --tasks $eval_task \
52+ --batch_size auto \
53+ --output_path $EVAL_OUTPUT_DIR
54+ fi
55+
56+ if [ $? -ne 0 ]; then
57+ echo " Evaluation without tensor_parallel failed, retrying with expert parallel..."
58+ run 2 lm_eval \
59+ --model vllm \
60+ --model_args pretrained=$model_id ,dtype=auto,max_model_len=$max_model_len ,add_bos_token=True,enable_expert_parallel=True \
61+ --tasks $eval_task \
62+ --batch_size auto \
63+ --output_path $EVAL_OUTPUT_DIR
64+ fi
65+
4566 # If vllm failed, try hf
4667 if [ $? -ne 0 ]; then
4768 echo " Evaluation with vllm failed, retrying with hf backend..."
@@ -132,7 +153,7 @@ run_and_eval() {
132153}
133154
134155# W4A16
135- run_and_eval " llama3_ddp_example.py" 4 " Meta-Llama-3-8B-Instruct-W4A16-G128-DDP4" " gsm8k" 2048 # .7111 .7127
156+ # run_and_eval "llama3_ddp_example.py" 4 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP4" "gsm8k" 2048 # .7111 .7127
136157# run_and_eval "llama3_ddp_example.py" 1 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP1" "gsm8k" 2048 # .702 .702
137158
138159# run_and_eval "qwen3_vl_8b_gptq_int4_ddp_example.py" 4 "Qwen3-VL-8B-Instruct-GPTQ-W4A16-G128-DDP4" "gsm8k" 2048 # .8514 .8476
@@ -150,31 +171,31 @@ run_and_eval "llama3_ddp_example.py" 4 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP4
150171
151172
152173# NVFP4
153- # run_and_eval "llama3_ddp_nvfp4.py" 4 "Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
154- # run_and_eval "llama3_ddp_nvfp4.py" 1 "Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
174+ run_and_eval " llama3_ddp_nvfp4.py" 4 " Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP4" " gsm8k" 2048
175+ run_and_eval " llama3_ddp_nvfp4.py" 1 " Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP1" " gsm8k" 2048
155176
156- # run_and_eval "qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 4 "Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
157- # run_and_eval "qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 1 "Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
177+ run_and_eval " qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 4 " Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP4" " gsm8k" 2048
178+ run_and_eval " qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 1 " Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP1" " gsm8k" 2048
158179
159- # run_and_eval "qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 4 "Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
160- # run_and_eval "qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 1 "Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
180+ run_and_eval " qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 4 " Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP4" " gsm8k" 2048
181+ run_and_eval " qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 1 " Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP1" " gsm8k" 2048
161182
162- # run_and_eval "llama4_gptq_nvfp4_ddp_example.py" 4 "Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 8192
163- # run_and_eval "llama4_gptq_nvfp4_ddp_example.py" 1 "Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 8192
183+ run_and_eval " llama4_gptq_nvfp4_ddp_example.py" 4 " Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP4" " gsm8k" 8192
184+ run_and_eval " llama4_gptq_nvfp4_ddp_example.py" 1 " Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP1" " gsm8k" 8192
164185
165- # run_and_eval "qwen3_vl_235b_moe_nvfp4_ddp_example.py" 8 "Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP8" "gsm8k" 2048
166- # run_and_eval "qwen3_vl_235b_moe_nvfp4_ddp_example.py" 1 "Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
186+ run_and_eval " qwen3_vl_235b_moe_nvfp4_ddp_example.py" 8 " Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP8" " gsm8k" 2048
187+ run_and_eval " qwen3_vl_235b_moe_nvfp4_ddp_example.py" 1 " Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP1" " gsm8k" 2048
167188
168189
169190# Base model evaluations
170191echo " ============================================"
171192echo " Starting base model evaluations"
172193echo " ============================================"
173194
174- # eval_base_model "meta-llama/Meta-Llama-3-8B-Instruct" "gsm8k" 2048
175- # eval_base_model "Qwen/Qwen3-VL-8B-Instruct" "gsm8k" 2048
176- # eval_base_model "Qwen/Qwen3-30B-A3B" "gsm8k" 2048
177- # eval_base_model "meta-llama/Llama-4-Scout-17B-16E-Instruct" "gsm8k" 8192
195+ # eval_base_model "meta-llama/Meta-Llama-3-8B-Instruct" "gsm8k" 2048 # 0.7513 0.7536
196+ # eval_base_model "Qwen/Qwen3-VL-8B-Instruct" "gsm8k" 2048 # 0.8560 0.8347
197+ # eval_base_model "Qwen/Qwen3-30B-A3B" "gsm8k" 2048 # 0.8484 0.8916
198+ eval_base_model " meta-llama/Llama-4-Scout-17B-16E-Instruct" " gsm8k" 8192
178199
179200echo " ============================================"
180201echo " All runs complete!"
0 commit comments