[OMNIML-2493] AutoCast: Configure target opset from CLI (#519)

galagam · web-flow · commit ae915eaea9cb · 2025-11-06T19:13:09.000+02:00
## What does this PR do? new feature **Overview:** AutoCast: Allow configuring target opset from CLI and main API. ## Usage  python -m modelopt.onnx.autocast --onnx_path model.onnx --opset 22 ```python # Add a code snippet demonstrating how to use this ``` ## Testing  pytest tests/unit/onnx/autocast/test_autocast.py -k test_opset ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes - **Did you write any new necessary tests?**: Yes - **Did you add or update any necessary documentation?**: Yes - **Did you update [Changelog](https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/CHANGELOG.rst)?**: No - minor API addition  ## Additional Information  Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
diff --git a/docs/source/guides/8_autocast.rst b/docs/source/guides/8_autocast.rst
@@ -41,6 +41,7 @@ AutoCast can also be used programmatically through its Python API:
       providers=["cpu"],                    # list of Execution Providers for ONNX-Runtime backend
       trt_plugins=[],                       # list of TensorRT plugin library paths in .so format
       max_depth_of_reduction=None,          # maximum depth of reduction allowed in low precision
+      opset=None,                           # optional target ONNX opset version (default: 13 for fp16, 22 for bf16)
    )
 
    # Save the converted model
@@ -55,7 +56,7 @@ AutoCast follows these steps to convert a model:
 
    - Loads the ONNX model
    - Performs graph sanitization and optimizations
-   - Ensures minimum opset version requirements (22 for BF16, 13 for FP16)
+   - Ensures minimum opset version requirements (22 for BF16, 13 for FP16 by default, or user-specified via ``--opset``)
 
 #. **Node Classification**:
 
@@ -135,6 +136,14 @@ Best Practices
    - To also enable the CUDA execution provider, use ``--providers cpu cuda:x``, where ``x`` is your device ID (``x=0`` if your system only has 1 GPU).
    - Use ``--trt_plugins`` to provide the paths to the necessary TensorRT plugin libraries (in ``.so`` format).
 
+#. **Opset Version Control**
+
+   - Use ``--opset`` to specify a target ONNX opset version for the converted model.
+   - If not specified, AutoCast keeps the existing model's opset, subject to a minimum opset based on precision type (13 for FP16, 22 for BF16).
+   - A warning will be issued if you specify an opset lower than recommended minimum.
+   - A warning will be issued if you specify an opset lower than the original model's opset, as downgrading opset versions may cause compatibility issues.
+   - The opset may be automatically increased beyond your specified value if certain operations require it (e.g., quantization nodes require opset >= 19).
+
 Limitations and Restrictions
 ----------------------------
 - AutoCast does not yet support quantized models.
@@ -176,3 +185,15 @@ Limit depth of reduction for precision-sensitive operations:
 .. code-block:: bash
 
    python -m modelopt.onnx.autocast --onnx_path model.onnx --max_depth_of_reduction 1024
+
+Specify a target opset version:
+
+.. code-block:: bash
+
+   python -m modelopt.onnx.autocast --onnx_path model.onnx --opset 19
+
+Convert to BF16 with a specific opset:
+
+.. code-block:: bash
+
+   python -m modelopt.onnx.autocast --onnx_path model.onnx --low_precision_type bf16 --opset 22
diff --git a/modelopt/onnx/autocast/__main__.py b/modelopt/onnx/autocast/__main__.py
@@ -175,6 +175,16 @@ def get_parser() -> argparse.ArgumentParser:
             "For example: op_type_1:fp16 op_type_2:[fp16,fp32]:[fp16]."
         ),
     )
+    parser.add_argument(
+        "--opset",
+        type=int,
+        help=(
+            "Target ONNX opset version for the converted model. If not specified, uses default minimum opset "
+            "based on precision type (22 for bf16, 13 for fp16). Note: BF16 requires opset >= 22 for full support. "
+            "The opset may be automatically increased if certain operations (e.g., quantization nodes) require a "
+            "higher version."
+        ),
+    )
 
     return parser
 
@@ -207,6 +217,7 @@ def main(argv=None):
         trt_plugins=args.trt_plugins,
         trt_plugins_precision=args.trt_plugins_precision,
         max_depth_of_reduction=args.max_depth_of_reduction,
+        opset=args.opset,
     )
 
     output_path = args.output_path
diff --git a/modelopt/onnx/autocast/convert.py b/modelopt/onnx/autocast/convert.py
@@ -60,6 +60,7 @@ def convert_to_mixed_precision(
     trt_plugins: list[str] = [],
     trt_plugins_precision: list[str] = [],
     max_depth_of_reduction: int | None = None,
+    opset: int | None = None,
 ) -> onnx.ModelProto:
     """Convert model to mixed precision.
 
@@ -81,6 +82,9 @@ def convert_to_mixed_precision(
         trt_plugins: List of TensorRT plugin library paths in .so format (compiled shared library).
         trt_plugins_precision: List indicating the precision for each custom op.
         max_depth_of_reduction: Maximum depth of reduction for node classification.
+        opset: Target ONNX opset version. If None, uses default minimum opset based on low_precision_type
+               (22 for bf16, 13 for fp16). The opset may be automatically increased if certain operations
+               require a higher version.
 
     Returns:
         onnx.ModelProto: The converted mixed precision model.
@@ -89,10 +93,34 @@ def convert_to_mixed_precision(
     model = onnx.load(onnx_path, load_external_data=True)
     assert low_precision_type in ["fp16", "bf16"], "low_precision_type must be either fp16 or bf16"
 
+    # Get original model's opset version
+    original_opset = onnx_utils.get_opset_version(model)
+
     # Apply graph sanitization and optimizations
     # Opsets < 22 have a very limited support for bfloat16
     # Otherwise, prefer to keep the original opset version unless it's very old
-    min_opset = 22 if low_precision_type == "bf16" else 13
+    if opset is not None:
+        min_opset = opset
+        # Validate opset compatibility
+        if low_precision_type == "bf16" and opset < 22:
+            logger.warning(
+                f"Opset {opset} has limited BF16 support. Recommended minimum opset is 22. "
+                "The conversion may fail or produce unexpected results."
+            )
+        elif low_precision_type == "fp16" and opset < 13:
+            logger.warning(
+                f"Opset {opset} has limited FP16 support. Recommended minimum opset is 13. "
+                "The conversion may fail or produce unexpected results."
+            )
+        # Warn if user-specified opset is lower than original
+        if opset < original_opset:
+            logger.warning(
+                f"Specified opset {opset} is lower than the original model's opset {original_opset}. "
+                "Downgrading opset version may cause compatibility issues or conversion failures."
+            )
+    else:
+        min_opset = 22 if low_precision_type == "bf16" else 13
+
     graph_sanitizer = GraphSanitizer(
         model,
         min_opset,
diff --git a/modelopt/onnx/autocast/logging_config.py b/modelopt/onnx/autocast/logging_config.py
@@ -65,8 +65,10 @@ def configure_logging(level=logging.INFO, log_file=None):
         except Exception as e:
             logger.error(f"Failed to setup file logging to {log_file}: {e!s}")
 
-    # Prevent log messages from propagating to the root logger
-    logger.propagate = False
+    # Allow log messages to propagate to the root logger for testing compatibility
+    # This enables pytest's caplog fixture to capture logs while still maintaining
+    # our custom formatting through the handlers above
+    logger.propagate = True
 
     # Ensure all child loggers inherit the level setting
     for name in logging.root.manager.loggerDict:
diff --git a/tests/unit/onnx/autocast/test_autocast.py b/tests/unit/onnx/autocast/test_autocast.py
@@ -25,6 +25,7 @@
 import modelopt.onnx.autocast.utils as utils
 import modelopt.onnx.utils as onnx_utils
 from modelopt.onnx.autocast import convert_to_mixed_precision
+from modelopt.onnx.autocast.__main__ import get_parser, main
 from modelopt.onnx.autocast.logging_config import configure_logging
 
 configure_logging("DEBUG")
@@ -187,3 +188,112 @@ def test_conv_isinf_conversion(tmp_path, opset_version):
     opset_version = onnx_utils.get_opset_version(converted_model)
     supported_dtype = "float32" if opset_version < 20 else "float16"
     assert assert_input_precision(isinf_nodes, dtype=supported_dtype)
+
+
+@pytest.mark.parametrize("target_opset", [13, 17, 19, 21])
+def test_opset_parameter(temp_model_path, target_opset):
+    """Test that the opset parameter correctly sets the output model's opset version."""
+    # Convert with specific opset
+    converted_model = convert_to_mixed_precision(
+        onnx_path=temp_model_path, low_precision_type="fp16", opset=target_opset
+    )
+
+    # Verify the output model has the correct opset
+    output_opset = onnx_utils.get_opset_version(converted_model)
+    assert output_opset >= target_opset, f"Expected opset >= {target_opset}, got {output_opset}"
+
+    # Validate the model
+    onnx.checker.check_model(converted_model)
+
+
+def test_opset_fp16_warning(temp_model_path, caplog):
+    """Test that a warning is issued when using fp16 with opset < 13."""
+    # Convert with fp16 and very low opset
+    converted_model = convert_to_mixed_precision(
+        onnx_path=temp_model_path, low_precision_type="fp16", opset=11
+    )
+
+    # Check that a warning was logged
+    assert "limited FP16 support" in caplog.text, (
+        "Expected warning about FP16 support with low opset"
+    )
+    assert "Recommended minimum opset is 13" in caplog.text
+
+    # Model should still be created
+    assert isinstance(converted_model, onnx.ModelProto)
+
+
+def test_opset_bf16_warning(temp_model_path, caplog):
+    """Test that a warning is issued when using bf16 with opset < 22."""
+    # Convert with bf16 and low opset
+    converted_model = convert_to_mixed_precision(
+        onnx_path=temp_model_path, low_precision_type="bf16", opset=13
+    )
+
+    # Check that a warning was logged
+    assert "limited BF16 support" in caplog.text, (
+        "Expected warning about BF16 support with low opset"
+    )
+    assert "Recommended minimum opset is 22" in caplog.text
+
+    # Model should still be created
+    assert isinstance(converted_model, onnx.ModelProto)
+
+
+def test_opset_downgrade_warning(temp_model_path, caplog):
+    """Test that a warning is issued when specified opset is lower than original model's opset."""
+    # temp_model_path fixture creates a model with opset 20
+    # Convert with lower opset
+    converted_model = convert_to_mixed_precision(
+        onnx_path=temp_model_path, low_precision_type="fp16", opset=13
+    )
+
+    # Check that a warning was logged about downgrading
+    assert "lower than the original model's opset" in caplog.text, (
+        "Expected warning about downgrading opset"
+    )
+
+    # Model should still be created
+    assert isinstance(converted_model, onnx.ModelProto)
+
+
+def test_opset_cli_argument(temp_model_path, tmp_path):
+    """Test that the --opset CLI argument is properly parsed and used."""
+    # Test the CLI with opset argument
+    output_path = tmp_path / "test_output.onnx"
+    args = [
+        "--onnx_path",
+        temp_model_path,
+        "--output_path",
+        str(output_path),
+        "--opset",
+        "21",
+        "--low_precision_type",
+        "fp16",
+    ]
+
+    result_model = main(args)
+
+    # Verify the output model has the correct opset
+    output_opset = onnx_utils.get_opset_version(result_model)
+    assert output_opset >= 21, f"Expected opset >= 21, got {output_opset}"
+
+    # Verify the file was created
+    assert output_path.exists()
+
+    # Load and validate the saved model
+    saved_model = onnx.load(str(output_path))
+    onnx.checker.check_model(saved_model)
+
+
+def test_opset_parser_argument():
+    """Test that the parser correctly accepts the --opset argument."""
+    parser = get_parser()
+
+    # Test parsing with opset
+    args = parser.parse_args(["--onnx_path", "test.onnx", "--opset", "19"])
+    assert args.opset == 19
+
+    # Test parsing without opset (should be None)
+    args = parser.parse_args(["--onnx_path", "test.onnx"])
+    assert args.opset is None