Skip to content

Commit dc55eb0

Browse files
committed
more testing
Summary Signed-off-by: HDCharles <charlesdavidhernandez@gmail.com>
1 parent 610d1c7 commit dc55eb0

File tree

1 file changed

+36
-15
lines changed
  • examples/quantization_w4a16

1 file changed

+36
-15
lines changed

examples/quantization_w4a16/run.sh

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,27 @@ eval_base_model() {
4242
--output_path $EVAL_OUTPUT_DIR
4343
fi
4444

45+
# If vllm without TP failed, try with expert parallel (useful for MoE models)
46+
if [ $? -ne 0 ]; then
47+
echo "Evaluation without tensor_parallel failed, retrying with expert parallel..."
48+
run 4 lm_eval \
49+
--model vllm \
50+
--model_args pretrained=$model_id,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,enable_expert_parallel=True \
51+
--tasks $eval_task \
52+
--batch_size auto \
53+
--output_path $EVAL_OUTPUT_DIR
54+
fi
55+
56+
if [ $? -ne 0 ]; then
57+
echo "Evaluation without tensor_parallel failed, retrying with expert parallel..."
58+
run 2 lm_eval \
59+
--model vllm \
60+
--model_args pretrained=$model_id,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,enable_expert_parallel=True \
61+
--tasks $eval_task \
62+
--batch_size auto \
63+
--output_path $EVAL_OUTPUT_DIR
64+
fi
65+
4566
# If vllm failed, try hf
4667
if [ $? -ne 0 ]; then
4768
echo "Evaluation with vllm failed, retrying with hf backend..."
@@ -132,7 +153,7 @@ run_and_eval() {
132153
}
133154

134155
# W4A16
135-
run_and_eval "llama3_ddp_example.py" 4 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP4" "gsm8k" 2048 # .7111 .7127
156+
# run_and_eval "llama3_ddp_example.py" 4 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP4" "gsm8k" 2048 # .7111 .7127
136157
# run_and_eval "llama3_ddp_example.py" 1 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP1" "gsm8k" 2048 # .702 .702
137158

138159
# run_and_eval "qwen3_vl_8b_gptq_int4_ddp_example.py" 4 "Qwen3-VL-8B-Instruct-GPTQ-W4A16-G128-DDP4" "gsm8k" 2048 # .8514 .8476
@@ -150,31 +171,31 @@ run_and_eval "llama3_ddp_example.py" 4 "Meta-Llama-3-8B-Instruct-W4A16-G128-DDP4
150171

151172

152173
# NVFP4
153-
# run_and_eval "llama3_ddp_nvfp4.py" 4 "Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
154-
# run_and_eval "llama3_ddp_nvfp4.py" 1 "Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
174+
run_and_eval "llama3_ddp_nvfp4.py" 4 "Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
175+
run_and_eval "llama3_ddp_nvfp4.py" 1 "Meta-Llama-3-8B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
155176

156-
# run_and_eval "qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 4 "Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
157-
# run_and_eval "qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 1 "Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
177+
run_and_eval "qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 4 "Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
178+
run_and_eval "qwen3_vl_8b_gptq_nvfp4_ddp_example.py" 1 "Qwen3-VL-8B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
158179

159-
# run_and_eval "qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 4 "Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
160-
# run_and_eval "qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 1 "Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
180+
run_and_eval "qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 4 "Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP4" "gsm8k" 2048
181+
run_and_eval "qwen3_30b_moe_gptq_nvfp4_ddp_example.py" 1 "Qwen3-30B-A3B-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
161182

162-
# run_and_eval "llama4_gptq_nvfp4_ddp_example.py" 4 "Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 8192
163-
# run_and_eval "llama4_gptq_nvfp4_ddp_example.py" 1 "Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 8192
183+
run_and_eval "llama4_gptq_nvfp4_ddp_example.py" 4 "Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP4" "gsm8k" 8192
184+
run_and_eval "llama4_gptq_nvfp4_ddp_example.py" 1 "Llama-4-Scout-17B-16E-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 8192
164185

165-
# run_and_eval "qwen3_vl_235b_moe_nvfp4_ddp_example.py" 8 "Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP8" "gsm8k" 2048
166-
# run_and_eval "qwen3_vl_235b_moe_nvfp4_ddp_example.py" 1 "Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
186+
run_and_eval "qwen3_vl_235b_moe_nvfp4_ddp_example.py" 8 "Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP8" "gsm8k" 2048
187+
run_and_eval "qwen3_vl_235b_moe_nvfp4_ddp_example.py" 1 "Qwen3-VL-235B-A22B-Instruct-GPTQ-NVFP4A16-DDP1" "gsm8k" 2048
167188

168189

169190
# Base model evaluations
170191
echo "============================================"
171192
echo "Starting base model evaluations"
172193
echo "============================================"
173194

174-
# eval_base_model "meta-llama/Meta-Llama-3-8B-Instruct" "gsm8k" 2048
175-
# eval_base_model "Qwen/Qwen3-VL-8B-Instruct" "gsm8k" 2048
176-
# eval_base_model "Qwen/Qwen3-30B-A3B" "gsm8k" 2048
177-
# eval_base_model "meta-llama/Llama-4-Scout-17B-16E-Instruct" "gsm8k" 8192
195+
# eval_base_model "meta-llama/Meta-Llama-3-8B-Instruct" "gsm8k" 2048 # 0.7513 0.7536
196+
# eval_base_model "Qwen/Qwen3-VL-8B-Instruct" "gsm8k" 2048 # 0.8560 0.8347
197+
# eval_base_model "Qwen/Qwen3-30B-A3B" "gsm8k" 2048 # 0.8484 0.8916
198+
eval_base_model "meta-llama/Llama-4-Scout-17B-16E-Instruct" "gsm8k" 8192
178199

179200
echo "============================================"
180201
echo "All runs complete!"

0 commit comments

Comments
 (0)