[Bug fix 5528642] [Bug fix 5528695] VLM NVBug fix (#355)

yueshen2016 · web-flow · commit 7d5f63641972 · 2025-09-23T09:02:59.000-07:00
Signed-off-by: Yue &lt;yueshen@nvidia.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -316,6 +316,7 @@ def main(args):
                     mtq.quantize(child, disabled_quant_cfg, forward_loop=None)
 
             model = model.language_model
+            model_type = get_model_type(model)
 
     if args.sparsity_fmt != "dense":
         if args.batch_size == 0:
diff --git a/examples/vlm_ptq/scripts/huggingface_example.sh b/examples/vlm_ptq/scripts/huggingface_example.sh
@@ -73,7 +73,7 @@ if [ -n "$KV_CACHE_QUANT" ]; then
     PTQ_ARGS+=" --kv_cache_qformat=$KV_CACHE_QUANT "
 fi
 
-if [ "${MODEL_TYPE}" = "vila" ]; then
+if [[ "${MODEL_NAME,,}" == *"vila"* ]]; then
     # Install required dependency for VILA
     pip install -r ../vlm_ptq/requirements-vila.txt
     # Clone original VILA repo