From 2d98b377f0febf06da21f1deced7c620139d60f8 Mon Sep 17 00:00:00 2001
From: Chenjie Luo <108829653+cjluo-nv@users.noreply.github.com>
Date: Mon, 29 Sep 2025 08:36:28 -0700
Subject: [PATCH 1/2] Move phi4mm warning to above

Signed-off-by: Chenjie Luo <108829653+cjluo-nv@users.noreply.github.com>
---
 examples/llm_ptq/hf_ptq.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
index 0ac11f2f5..0ba1e2b3a 100755
--- a/examples/llm_ptq/hf_ptq.py
+++ b/examples/llm_ptq/hf_ptq.py
@@ -328,6 +328,11 @@ def main(args):
             model = model.language_model
             model_type = get_model_type(model)
 
+    if model_type == "phi4mm":
+        warnings.warn(
+            "Please set the default input_mode to InputMode.LANGUAGE before quantizing."
+        )
+
     if args.sparsity_fmt != "dense":
         if args.batch_size == 0:
             # Sparse algorithm takes more GPU memory so we reduce the batch_size by 4.
@@ -478,9 +483,6 @@ def main(args):
                 quant_cfg["quant_cfg"]["*audio*"] = {"enable": False}
                 quant_cfg["quant_cfg"]["*image*"] = {"enable": False}
                 quant_cfg["quant_cfg"]["*vision*"] = {"enable": False}
-                warnings.warn(
-                    "Please set the default input_mode to InputMode.LANGUAGE before quantizing."
-                )
 
         if not model_is_already_quantized or calibration_only:
             # Only run single sample for preview

From 8932e2c125c169c9b32667e23314e9f7ff394c14 Mon Sep 17 00:00:00 2001
From: Chenjie Luo <108829653+cjluo-nv@users.noreply.github.com>
Date: Tue, 30 Sep 2025 10:22:18 -0700
Subject: [PATCH 2/2] Update hf_ptq.py

Signed-off-by: Chenjie Luo <108829653+cjluo-nv@users.noreply.github.com>
---
 examples/llm_ptq/hf_ptq.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
index 0ba1e2b3a..da6761252 100755
--- a/examples/llm_ptq/hf_ptq.py
+++ b/examples/llm_ptq/hf_ptq.py
@@ -329,9 +329,7 @@ def main(args):
             model_type = get_model_type(model)
 
     if model_type == "phi4mm":
-        warnings.warn(
-            "Please set the default input_mode to InputMode.LANGUAGE before quantizing."
-        )
+        warnings.warn("Please set the default input_mode to InputMode.LANGUAGE before quantizing.")
 
     if args.sparsity_fmt != "dense":
         if args.batch_size == 0: