make vlm detection more robust in ptq workflow

Edwardf0t1 · Edwardf0t1 · commit c0d16612fe76 · 2025-09-05T01:15:27.000Z
Signed-off-by: Zhiyu Cheng &lt;zhiyuc@nvidia.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -25,7 +25,9 @@
 from accelerate.hooks import remove_hook_from_module
 from example_utils import apply_kv_cache_quant, get_model, get_processor, get_tokenizer, is_enc_dec
 from transformers import (
+    AutoConfig,
     AutoModelForCausalLM,
+    AutoProcessor,
     PreTrainedTokenizer,
     PreTrainedTokenizerFast,
     WhisperProcessor,
@@ -567,9 +569,18 @@ def output_decode(generated_ids, input_shape):
 
         export_path = args.export_path
 
-        if hasattr(full_model, "language_model"):
+        # Check for VLMs by looking for vision_config in model config or language_model attribute
+        is_vlm = False
+        try:
+            is_vlm = hasattr(full_model.config, "vision_config") or hasattr(
+                full_model, "language_model"
+            )
+        except Exception:
+            # Fallback to the original check if config access fails
+            is_vlm = hasattr(full_model, "language_model")
+
+        if is_vlm:
             # Save original model config and the preprocessor config to the export path for VLMs.
-            from transformers import AutoConfig, AutoProcessor
 
             print(f"Saving original model and processor configs to {export_path}")