Move phi4_mm warning to above (NVIDIA#389)

cjluo-nv · web-flow · commit 17439e653df9 · 2025-09-30T18:24:35.000Z
Signed-off-by: Chenjie Luo &lt;108829653+cjluo-nv@users.noreply.github.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -328,6 +328,9 @@ def main(args):
             model = model.language_model
             model_type = get_model_type(model)
 
+    if model_type == "phi4mm":
+        warnings.warn("Please set the default input_mode to InputMode.LANGUAGE before quantizing.")
+
     if args.sparsity_fmt != "dense":
         if args.batch_size == 0:
             # Sparse algorithm takes more GPU memory so we reduce the batch_size by 4.
@@ -478,9 +481,6 @@ def main(args):
                 quant_cfg["quant_cfg"]["*audio*"] = {"enable": False}
                 quant_cfg["quant_cfg"]["*image*"] = {"enable": False}
                 quant_cfg["quant_cfg"]["*vision*"] = {"enable": False}
-                warnings.warn(
-                    "Please set the default input_mode to InputMode.LANGUAGE before quantizing."
-                )
 
         if not model_is_already_quantized or calibration_only:
             # Only run single sample for preview