Added CodeRabbit suggestions

gcunhase · gcunhase · commit 1ec0af0da51d · 2025-09-25T18:41:20.000-04:00
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/quantization/__main__.py b/modelopt/onnx/quantization/__main__.py
@@ -107,8 +107,8 @@ def get_parser() -> argparse.ArgumentParser:
         default=[],
         nargs="+",
         help=(
-            "A space-separated list of node types to exclude from FP16 conversion. "
-            "This is only relevant if '--high_precision_dtype != fp32'."
+            "A space-separated list of node types to exclude from FP16/BF16 conversion. "
+            "Relevant when --high_precision_dtype is 'fp16' or 'bf16'."
         ),
     )
     argparser.add_argument(
diff --git a/modelopt/onnx/quantization/fp8.py b/modelopt/onnx/quantization/fp8.py
@@ -168,7 +168,7 @@ def quantize(
     calibration_eps: list[str] = ["cpu", "cuda:0", "trt"],
     op_types_to_quantize: list[str] | None = None,
     op_types_to_exclude: list[str] | None = None,
-    op_types_to_exclude_fp16: list[str] = [],
+    op_types_to_exclude_fp16: list[str] | None = None,
     nodes_to_quantize: list[str] | None = None,
     nodes_to_exclude: list[str] | None = None,
     use_external_data_format: bool = False,
@@ -319,7 +319,7 @@ def quantize(
         onnx_model = convert_to_f16(
             onnx_model,
             keep_io_types=not direct_io_types,
-            op_block_list=op_types_to_exclude_fp16,
+            op_block_list=op_types_to_exclude_fp16 or [],
             low_precision_type=high_precision_dtype,
             trt_plugins=trt_extra_plugin_lib_paths,
         )
diff --git a/modelopt/onnx/quantization/int8.py b/modelopt/onnx/quantization/int8.py
@@ -119,7 +119,7 @@ def quantize(
     calibration_eps: list[str] = ["cpu", "cuda:0", "trt"],
     op_types_to_quantize: list[str] | None = None,
     op_types_to_exclude: list[str] | None = None,
-    op_types_to_exclude_fp16: list[str] = [],
+    op_types_to_exclude_fp16: list[str] | None = None,
     nodes_to_quantize: list[str] | None = None,
     nodes_to_exclude: list[str] | None = None,
     use_external_data_format: bool = False,
@@ -280,7 +280,7 @@ def quantize(
         onnx_model = convert_to_f16(
             onnx_model,
             keep_io_types=not direct_io_types,
-            op_block_list=op_types_to_exclude_fp16,
+            op_block_list=op_types_to_exclude_fp16 or [],
             low_precision_type=high_precision_dtype,
             trt_plugins=trt_extra_plugin_lib_paths,
         )
diff --git a/modelopt/onnx/quantization/quantize.py b/modelopt/onnx/quantization/quantize.py
@@ -426,8 +426,9 @@ def quantize(
         quantize_mode,
     )
     trt_plugins = update_trt_ep_support(calibration_eps, has_dds_op, has_custom_op, trt_plugins)  # type: ignore[arg-type]
-    op_types_to_exclude_fp16 = op_types_to_exclude_fp16 or []
-    op_types_to_exclude_fp16.extend(list(custom_ops_to_cast_fp32.keys()))
+    op_types_to_exclude_fp16 = list(
+        dict.fromkeys((op_types_to_exclude_fp16 or []) + list(custom_ops_to_cast_fp32.keys()))
+    )
 
     # Use random scales if calibration data is not supplied
     if calibration_data is None: