diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index 3ac167db..4167147b 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -56,7 +56,7 @@ def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs): ckpt_path, trust_remote_code=trust_remote_code, **kwargs ) - if "qwen" in type(tokenizer).__name__.lower(): + if "qwen" in type(tokenizer).__name__.lower() and "vl" not in ckpt_path.lower(): # qwen use token id 151643 as pad and eos tokens tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643) tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643) diff --git a/examples/vlm_ptq/scripts/huggingface_example.sh b/examples/vlm_ptq/scripts/huggingface_example.sh index 9bab141d..6578266f 100755 --- a/examples/vlm_ptq/scripts/huggingface_example.sh +++ b/examples/vlm_ptq/scripts/huggingface_example.sh @@ -145,6 +145,10 @@ case "${MODEL_TYPE}" in VISUAL_FEATURE=576 VLM_ARGS=" --max_multimodal_len=$((BUILD_MAX_BATCH_SIZE * VISUAL_FEATURE)) " ;; + "qwen") + VISUAL_FEATURE=1280 + VLM_ARGS=" --max_multimodal_len=$((BUILD_MAX_BATCH_SIZE * VISUAL_FEATURE)) " + ;; "mllama") PTQ_ARGS+=" --kv_cache_qformat none " VLM_ARGS=" --max_encoder_input_len=6404 --skip_run"