fix qwen 2.5 vl

DylanChen-NV · DylanChen-NV · commit 923c916e5833 · 2025-08-28T04:34:33.000Z
diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
@@ -64,7 +64,7 @@ def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs):
             ckpt_path, trust_remote_code=trust_remote_code, **kwargs
         )
 
-        if "qwen" in type(tokenizer).__name__.lower():
+        if "qwen" in type(tokenizer).__name__.lower() and 'vl' not in ckpt_path.lower():
             # qwen use token id 151643 as pad and eos tokens
             tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
             tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
diff --git a/examples/vlm_ptq/scripts/huggingface_example.sh b/examples/vlm_ptq/scripts/huggingface_example.sh
@@ -30,10 +30,10 @@ for i in $(env | grep ^PMI_ | cut -d"=" -f 1); do unset -v $i; done
 for i in $(env | grep ^PMIX_ | cut -d"=" -f 1); do unset -v $i; done
 
 case $MODEL_TYPE in
-    llava|phi|vila|mllama)
+    llava|phi|vila|mllama|qwen)
         ;;
     *)
-        echo "Unsupported type argument: Expected one of: [llava, phi, vila, mllama]" >&2
+        echo "Unsupported type argument: Expected one of: [llava, phi, vila, mllama, qwen]" >&2
         exit 1
 esac
 
@@ -91,7 +91,7 @@ fi
 
 BUILD_MAX_OUTPUT_LEN=512
 
-if [ "$MODEL_TYPE" = "llava" ] || [ "$MODEL_TYPE" = "vila" ]; then
+if [ "$MODEL_TYPE" = "llava" ] || [ "$MODEL_TYPE" = "vila" ] || [ "$MODEL_TYPE" = "qwen" ]; then
     BUILD_MAX_BATCH_SIZE=20
 else
     BUILD_MAX_BATCH_SIZE=4
@@ -145,6 +145,10 @@ case "${MODEL_TYPE}" in
         VISUAL_FEATURE=576
         VLM_ARGS=" --max_multimodal_len=$((BUILD_MAX_BATCH_SIZE * VISUAL_FEATURE)) "
         ;;
+    "qwen")
+        VISUAL_FEATURE=1280
+        VLM_ARGS=" --max_multimodal_len=$((BUILD_MAX_BATCH_SIZE * VISUAL_FEATURE)) "
+        ;;
     "mllama")
         PTQ_ARGS+=" --kv_cache_qformat none "
         VLM_ARGS=" --max_encoder_input_len=6404 --skip_run"
@@ -182,6 +186,10 @@ if [[ $TASKS =~ "build" ]] || [[ ! -d "$ENGINE_DIR" ]] || [[ ! $(ls -A $ENGINE_D
         echo "Quantized model config $MODEL_CONFIG exists, skipping the quantization stage"
     fi
 
+    if [ "${MODEL_TYPE}" = "qwen" ]; then
+        cp ${MODEL_PATH}/preprocessor_config.json ${SAVE_PATH}
+    fi
+
     if [ $EXPORT_FORMAT != "tensorrt_llm" ]; then
         echo "Please continue deployment with $EXPORT_FORMAT. Checkpoint export_path: $SAVE_PATH"
         exit 0

Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs):`
`64`	`64`	`ckpt_path, trust_remote_code=trust_remote_code, **kwargs`
`65`	`65`	`)`
`66`	`66`
`67`		`- if "qwen" in type(tokenizer).__name__.lower():`
	`67`	`+ if "qwen" in type(tokenizer).__name__.lower() and 'vl' not in ckpt_path.lower():`
`68`	`68`	`# qwen use token id 151643 as pad and eos tokens`
`69`	`69`	`tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)`
`70`	`70`	`tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)`