Move phi4mm warning to above

cjluo-nv · web-flow · commit 2d98b377f0fe · 2025-09-29T08:36:28.000-07:00
Signed-off-by: Chenjie Luo &lt;108829653+cjluo-nv@users.noreply.github.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -328,6 +328,11 @@ def main(args):
             model = model.language_model
             model_type = get_model_type(model)
 
+    if model_type == "phi4mm":
+        warnings.warn(
+            "Please set the default input_mode to InputMode.LANGUAGE before quantizing."
+        )
+
     if args.sparsity_fmt != "dense":
         if args.batch_size == 0:
             # Sparse algorithm takes more GPU memory so we reduce the batch_size by 4.
@@ -478,9 +483,6 @@ def main(args):
                 quant_cfg["quant_cfg"]["*audio*"] = {"enable": False}
                 quant_cfg["quant_cfg"]["*image*"] = {"enable": False}
                 quant_cfg["quant_cfg"]["*vision*"] = {"enable": False}
-                warnings.warn(
-                    "Please set the default input_mode to InputMode.LANGUAGE before quantizing."
-                )
 
         if not model_is_already_quantized or calibration_only:
             # Only run single sample for preview