Fix: quantize custom ops only if int8 precision is indicated

gcunhase · gcunhase · commit 505f7159cd25 · 2025-09-05T14:49:23.000-04:00
Signed-off-by: Gwena Cunha &lt;gwena.cs@gmail.com&gt;
diff --git a/modelopt/onnx/trt_utils.py b/modelopt/onnx/trt_utils.py
@@ -416,9 +416,10 @@ def interpret_trt_plugins_precision_flag(
             # Will add Q/DQ nodes in the requested I/O indices
             inp_precision_quant = [i for i, p in enumerate(inp_precision) if p in ["int8", "fp8"]]
             out_precision_quant = [i for i, p in enumerate(out_precision) if p in ["int8", "fp8"]]
-            custom_ops_to_quantize[op_type] = {
-                "inp": inp_precision_quant,
-                "out": out_precision_quant,
-            }
+            if inp_precision_quant or out_precision_quant:
+                custom_ops_to_quantize[op_type] = {
+                    "inp": inp_precision_quant,
+                    "out": out_precision_quant,
+                }
 
     return custom_ops_to_cast, custom_ops_to_quantize