coderabbitai fixes

i-riyad · i-riyad · commit 9e7cd86c7f77 · 2025-09-09T01:29:21.000-07:00
Signed-off-by: Riyad Islam &lt;rislam@nvidia.com&gt;
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -5,10 +5,12 @@ Model Optimizer Changelog (Linux)
 ^^^^^^^^^^^^^^^^^
 
 **Deprecations**
+- Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
 
 **Bug Fixes**
 
 **New Features**
+- ``high_precision_dtype`` default to fp16 in ONNX quantization, i.e. quantized output model weights are now FP16 by default.
 
 0.35 (2025-09-04)
 ^^^^^^^^^^^^^^^^^
diff --git a/modelopt/onnx/quantization/qdq_utils.py b/modelopt/onnx/quantization/qdq_utils.py
@@ -527,11 +527,13 @@ def _get_successive_consumers(
         raise ValueError(f"Invalid consumer for {node.name}")
 
     quantized_node = tensor_consumers.get(dq_node.output[0], [None])[0]
-    if quantized_node.op_type == "Cast":
-        quantized_node = tensor_consumers.get(quantized_node.output[0], [None])[0]
-
     if not quantized_node:
         raise ValueError(f"No consumer found for {dq_node.name}")
+    if quantized_node.op_type == "Cast":
+        next_node = tensor_consumers.get(quantized_node.output[0], [None])[0]
+        if not next_node:
+            raise ValueError(f"No consumer found after Cast for {quantized_node.name}")
+        quantized_node = next_node
 
     return dq_node, quantized_node
 
diff --git a/modelopt/torch/_deploy/_runtime/tensorrt/tensorrt_utils.py b/modelopt/torch/_deploy/_runtime/tensorrt/tensorrt_utils.py
@@ -24,7 +24,7 @@
 from modelopt.onnx.utils import get_batch_size
 from modelopt.onnx.utils import get_input_names as get_onnx_input_names
 
-from .constants import TENSORRT_8_MAJOR_VERSION, TRTMode
+from .constants import TENSORRT_8_MAJOR_VERSION
 
 
 def is_trt8():
@@ -131,11 +131,6 @@ def get_output_shapes(
     return output_shapes
 
 
-def validate_precision(precision: str) -> bool:
-    """Returns whether an input precision is in supported set."""
-    return precision in [TRTMode.FLOAT32, TRTMode.FLOAT16, TRTMode.INT8]
-
-
 def calib_data_generator(onnx_bytes: bytes, input_tensors: list[np.ndarray]):
     """The calibation data generator that yields calibration feed_dict to tensorrt."""
     input_names = get_onnx_input_names(onnx.load_from_string(onnx_bytes))
diff --git a/tests/_test_utils/onnx_quantization/utils.py b/tests/_test_utils/onnx_quantization/utils.py
@@ -20,11 +20,11 @@ def _assert_nodes_are_quantized(nodes):
     for node in nodes:
         for inp_idx, inp in enumerate(node.inputs):
             if isinstance(inp, gs.Variable):
-                qnode = node
-                # After quantization, the quantized node can be casted
-                if qnode.i(inp_idx).op == "Cast":
-                    qnode = qnode.i(inp_idx)
-                assert qnode.i(inp_idx).op == "DequantizeLinear", (
-                    f"Input '{inp.name}' of node '{qnode.name}' is not quantized but should be!"
+                producer = node.i(inp_idx)
+                # Quantized path may include a Cast right after DQ
+                if producer and producer.op == "Cast":
+                    producer = producer.i(0)
+                assert producer and producer.op == "DequantizeLinear", (
+                    f"Input '{inp.name}' of node '{node.name}' is not quantized but should be!"
                 )
     return True