update

Edwardf0t1 · Edwardf0t1 · commit 4b6b38873355 · 2025-10-23T01:49:31.000Z
Signed-off-by: Zhiyu Cheng &lt;zhiyuc@nvidia.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -211,9 +211,6 @@ def main(args):
     random.seed(RAND_SEED)
     np.random.seed(RAND_SEED)
 
-    # Detect if this is a Nemotron VL model
-    is_nemotron_vl = "nemotron" in args.pyt_ckpt_path.lower() and "vl" in args.pyt_ckpt_path.lower()
-
     # launch a memory monitor to read the currently used GPU memory.
     launch_memory_monitor()
 
@@ -288,6 +285,9 @@ def main(args):
 
     full_model = model
 
+    # Detect if this is a Nemotron VL model using model-based detection
+    is_nemotron_vl = is_multimodal_model(full_model) and "nemotron" in args.pyt_ckpt_path.lower()
+
     if model_type == "mllama":
         processor = get_processor(
             args.pyt_ckpt_path,
diff --git a/modelopt/torch/export/unified_export_hf.py b/modelopt/torch/export/unified_export_hf.py
@@ -150,9 +150,6 @@ def _output_hook(module, input, output):
             fake_input = torch.ones(
                 [1, model.config.num_mel_bins, feature_extractor.nb_max_frames], dtype=model.dtype
             ).to(model.device)
-        elif is_vl_model:
-            # For VL models, run optimization on language model component only
-            print("Detected VL model during export - optimizing language model component")
 
         # Run forward pass so that all modules sharing the same input are collected using forward hook.