From 797171321bf387e0be510f23b4e949d91ebed6b6 Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:48:33 -0400 Subject: [PATCH 1/8] Ensure that the ONNX IR version is the max supported version (10) Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- modelopt/onnx/trt_utils.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/modelopt/onnx/trt_utils.py b/modelopt/onnx/trt_utils.py index e5a5d9a4a..f7262afa4 100644 --- a/modelopt/onnx/trt_utils.py +++ b/modelopt/onnx/trt_utils.py @@ -36,6 +36,8 @@ except ImportError: TRT_PYTHON_AVAILABLE = False +MAX_IR_VERSION = 10 + def get_custom_layers( onnx_path: str | onnx.ModelProto, @@ -318,11 +320,23 @@ def load_onnx_model( # Infer types and shapes in the graph for ORT compatibility onnx_model = infer_types_shapes_tensorrt(onnx_model, trt_plugins or [], all_tensor_info) + # Enforce IR version = 10 + ir_version_onnx_path = None + if onnx_model.ir_version > MAX_IR_VERSION: + onnx_model.ir_version = MAX_IR_VERSION + ir_version_onnx_path = ( + static_shaped_onnx_path.replace(".onnx", f"_ir{MAX_IR_VERSION}.onnx") + if static_shaped_onnx_path + else onnx_path.replace(".onnx", f"_ir{MAX_IR_VERSION}.onnx") + ) + save_onnx(onnx_model, ir_version_onnx_path, use_external_data_format) + intermediate_generated_files.append(ir_version_onnx_path) # type: ignore[union-attr] + return ( onnx_model, has_custom_op, custom_ops, - static_shaped_onnx_path or onnx_path, + ir_version_onnx_path or static_shaped_onnx_path or onnx_path, use_external_data_format, ) From 260a2eb5269f675049a101ddc0a1a03e63b73815 Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Wed, 8 Oct 2025 20:51:49 -0400 Subject: [PATCH 2/8] Add unittest Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- .../onnx_quantization/lib_test_models.py | 57 +++++++++++++++++++ tests/unit/onnx/test_onnx_utils.py | 12 ++++ 2 files changed, 69 insertions(+) diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py index 0f8df3e61..09183a2b4 100644 --- a/tests/_test_utils/onnx_quantization/lib_test_models.py +++ b/tests/_test_utils/onnx_quantization/lib_test_models.py @@ -555,3 +555,60 @@ def build_convtranspose_conv_residual_model(): onnx.checker.check_model(model_inferred) return model_inferred + + +def build_matmul_relu_model_ir_12(): + # Define your model inputs and outputs + input_names = ["input_0"] + output_names = ["output_0"] + input_shapes = [(1, 1024, 1024)] + output_shapes = [(1, 1024, 16)] + + inputs = [ + helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape) + for input_name, input_shape in zip(input_names, input_shapes) + ] + outputs = [ + helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, output_shape) + for output_name, output_shape in zip(output_names, output_shapes) + ] + + # Create the ONNX graph with the nodes + nodes = [ + helper.make_node( + op_type="MatMul", + inputs=["input_0", "weights_1"], + outputs=["matmul1_matmul/MatMul:0"], + name="matmul1_matmul/MatMul", + ), + helper.make_node( + op_type="Relu", + inputs=["matmul1_matmul/MatMul:0"], + outputs=["output_0"], + name="relu1_relu/Relu", + ), + ] + + # Create the ONNX initializers + initializers = [ + helper.make_tensor( + name="weights_1", + data_type=onnx.TensorProto.FLOAT, + dims=(1024, 16), + vals=np.random.uniform(low=0.5, high=1.0, size=1024 * 16), + ), + ] + + # Create the ONNX graph with the nodes and initializers + graph = helper.make_graph(nodes, "r1a", inputs, outputs, initializer=initializers) + + # Create the ONNX model + model = helper.make_model(graph) + model.opset_import[0].version = 13 + model.ir_version = 12 + + # Check the ONNX model + model_inferred = onnx.shape_inference.infer_shapes(model) + onnx.checker.check_model(model_inferred) + + return model_inferred diff --git a/tests/unit/onnx/test_onnx_utils.py b/tests/unit/onnx/test_onnx_utils.py index ede97302d..cebe18619 100644 --- a/tests/unit/onnx/test_onnx_utils.py +++ b/tests/unit/onnx/test_onnx_utils.py @@ -18,6 +18,7 @@ import numpy as np import onnx import pytest +from _test_utils.onnx_quantization.lib_test_models import build_matmul_relu_model_ir_12 from _test_utils.torch_model.vision_models import get_tiny_resnet_and_input from onnx.helper import ( make_graph, @@ -28,6 +29,7 @@ make_tensor_value_info, ) +from modelopt.onnx.trt_utils import load_onnx_model from modelopt.onnx.utils import ( get_input_names_from_bytes, get_output_names_from_bytes, @@ -253,3 +255,13 @@ def test_remove_node_extra_training_outputs(): value_info_names = [vi.name for vi in result_model.graph.value_info] assert "saved_mean" not in value_info_names assert "saved_inv_std" not in value_info_names + + +def test_ir_version_support(tmp_path="./"): + model = build_matmul_relu_model_ir_12() + model_path = os.path.join(tmp_path, "test_matmul_relu.onnx") + onnx.save(model, model_path) + model_reload, _, _, _, _ = load_onnx_model(model_path, intermediate_generated_files=[]) + assert model_reload.ir_version == 10, ( + f"The maximum supported IR version is 10, but version {model_reload.ir_version} was detected." + ) From a1e9a8e434618a0b57a3510758e48ebf676811cd Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:05:34 -0400 Subject: [PATCH 3/8] nit: fix tmp_path and updated model name Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- tests/_test_utils/onnx_quantization/lib_test_models.py | 2 +- tests/unit/onnx/test_onnx_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py index 09183a2b4..b231826ed 100644 --- a/tests/_test_utils/onnx_quantization/lib_test_models.py +++ b/tests/_test_utils/onnx_quantization/lib_test_models.py @@ -600,7 +600,7 @@ def build_matmul_relu_model_ir_12(): ] # Create the ONNX graph with the nodes and initializers - graph = helper.make_graph(nodes, "r1a", inputs, outputs, initializer=initializers) + graph = helper.make_graph(nodes, "matmul_relu", inputs, outputs, initializer=initializers) # Create the ONNX model model = helper.make_model(graph) diff --git a/tests/unit/onnx/test_onnx_utils.py b/tests/unit/onnx/test_onnx_utils.py index cebe18619..9ca9f593a 100644 --- a/tests/unit/onnx/test_onnx_utils.py +++ b/tests/unit/onnx/test_onnx_utils.py @@ -257,7 +257,7 @@ def test_remove_node_extra_training_outputs(): assert "saved_inv_std" not in value_info_names -def test_ir_version_support(tmp_path="./"): +def test_ir_version_support(tmp_path): model = build_matmul_relu_model_ir_12() model_path = os.path.join(tmp_path, "test_matmul_relu.onnx") onnx.save(model, model_path) From da9a2038966ce3c41b2132dc83e0bed847c3ebcd Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:09:13 -0400 Subject: [PATCH 4/8] nit: added ir_version as arg in model building function Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- tests/_test_utils/onnx_quantization/lib_test_models.py | 8 +++++--- tests/unit/onnx/test_onnx_utils.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py index b231826ed..25fce6394 100644 --- a/tests/_test_utils/onnx_quantization/lib_test_models.py +++ b/tests/_test_utils/onnx_quantization/lib_test_models.py @@ -557,7 +557,7 @@ def build_convtranspose_conv_residual_model(): return model_inferred -def build_matmul_relu_model_ir_12(): +def build_matmul_relu_model(ir_version=12): # Define your model inputs and outputs input_names = ["input_0"] output_names = ["output_0"] @@ -600,12 +600,14 @@ def build_matmul_relu_model_ir_12(): ] # Create the ONNX graph with the nodes and initializers - graph = helper.make_graph(nodes, "matmul_relu", inputs, outputs, initializer=initializers) + graph = helper.make_graph( + nodes, f"matmul_relu_ir_{ir_version}", inputs, outputs, initializer=initializers + ) # Create the ONNX model model = helper.make_model(graph) model.opset_import[0].version = 13 - model.ir_version = 12 + model.ir_version = ir_version # Check the ONNX model model_inferred = onnx.shape_inference.infer_shapes(model) diff --git a/tests/unit/onnx/test_onnx_utils.py b/tests/unit/onnx/test_onnx_utils.py index 9ca9f593a..8b97c393f 100644 --- a/tests/unit/onnx/test_onnx_utils.py +++ b/tests/unit/onnx/test_onnx_utils.py @@ -18,7 +18,7 @@ import numpy as np import onnx import pytest -from _test_utils.onnx_quantization.lib_test_models import build_matmul_relu_model_ir_12 +from _test_utils.onnx_quantization.lib_test_models import build_matmul_relu_model from _test_utils.torch_model.vision_models import get_tiny_resnet_and_input from onnx.helper import ( make_graph, @@ -258,7 +258,7 @@ def test_remove_node_extra_training_outputs(): def test_ir_version_support(tmp_path): - model = build_matmul_relu_model_ir_12() + model = build_matmul_relu_model(ir_version=12) model_path = os.path.join(tmp_path, "test_matmul_relu.onnx") onnx.save(model, model_path) model_reload, _, _, _, _ = load_onnx_model(model_path, intermediate_generated_files=[]) From db8f48748df492ed654a134fe67ad4ee173d845d Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Fri, 10 Oct 2025 10:50:06 -0400 Subject: [PATCH 5/8] Move model building function to test_onnx_utils Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- tests/unit/onnx/test_onnx_utils.py | 62 +++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/tests/unit/onnx/test_onnx_utils.py b/tests/unit/onnx/test_onnx_utils.py index 8b97c393f..a58da7002 100644 --- a/tests/unit/onnx/test_onnx_utils.py +++ b/tests/unit/onnx/test_onnx_utils.py @@ -18,7 +18,6 @@ import numpy as np import onnx import pytest -from _test_utils.onnx_quantization.lib_test_models import build_matmul_relu_model from _test_utils.torch_model.vision_models import get_tiny_resnet_and_input from onnx.helper import ( make_graph, @@ -257,8 +256,67 @@ def test_remove_node_extra_training_outputs(): assert "saved_inv_std" not in value_info_names +def _make_matmul_relu_model(ir_version=12): + # Define your model inputs and outputs + input_names = ["input_0"] + output_names = ["output_0"] + input_shapes = [(1, 1024, 1024)] + output_shapes = [(1, 1024, 16)] + + inputs = [ + make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape) + for input_name, input_shape in zip(input_names, input_shapes) + ] + outputs = [ + make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, output_shape) + for output_name, output_shape in zip(output_names, output_shapes) + ] + + # Create the ONNX graph with the nodes + nodes = [ + make_node( + op_type="MatMul", + inputs=["input_0", "weights_1"], + outputs=["matmul1_matmul/MatMul:0"], + name="matmul1_matmul/MatMul", + ), + make_node( + op_type="Relu", + inputs=["matmul1_matmul/MatMul:0"], + outputs=["output_0"], + name="relu1_relu/Relu", + ), + ] + + # Create the ONNX initializers + initializers = [ + make_tensor( + name="weights_1", + data_type=onnx.TensorProto.FLOAT, + dims=(1024, 16), + vals=np.random.uniform(low=0.5, high=1.0, size=1024 * 16), + ), + ] + + # Create the ONNX graph with the nodes and initializers + graph = make_graph( + nodes, f"matmul_relu_ir_{ir_version}", inputs, outputs, initializer=initializers + ) + + # Create the ONNX model + model = make_model(graph) + model.opset_import[0].version = 13 + model.ir_version = ir_version + + # Check the ONNX model + model_inferred = onnx.shape_inference.infer_shapes(model) + onnx.checker.check_model(model_inferred) + + return model_inferred + + def test_ir_version_support(tmp_path): - model = build_matmul_relu_model(ir_version=12) + model = _make_matmul_relu_model(ir_version=12) model_path = os.path.join(tmp_path, "test_matmul_relu.onnx") onnx.save(model, model_path) model_reload, _, _, _, _ = load_onnx_model(model_path, intermediate_generated_files=[]) From f167a5d28419c1495994ebea2011e49622a98d7a Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:04:39 -0400 Subject: [PATCH 6/8] Add model checker Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- modelopt/onnx/trt_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modelopt/onnx/trt_utils.py b/modelopt/onnx/trt_utils.py index f7262afa4..2c2f05f9d 100644 --- a/modelopt/onnx/trt_utils.py +++ b/modelopt/onnx/trt_utils.py @@ -332,6 +332,9 @@ def load_onnx_model( save_onnx(onnx_model, ir_version_onnx_path, use_external_data_format) intermediate_generated_files.append(ir_version_onnx_path) # type: ignore[union-attr] + # Check that the model is valid + onnx.checker.check_model(onnx_model) + return ( onnx_model, has_custom_op, From e5dcb0d58ce904c6bdfdba76e45c628598fb7a69 Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Thu, 16 Oct 2025 12:49:32 -0400 Subject: [PATCH 7/8] Remove build model from quant model zoo Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- .../onnx_quantization/lib_test_models.py | 59 ------------------- 1 file changed, 59 deletions(-) diff --git a/tests/_test_utils/onnx_quantization/lib_test_models.py b/tests/_test_utils/onnx_quantization/lib_test_models.py index 25fce6394..0f8df3e61 100644 --- a/tests/_test_utils/onnx_quantization/lib_test_models.py +++ b/tests/_test_utils/onnx_quantization/lib_test_models.py @@ -555,62 +555,3 @@ def build_convtranspose_conv_residual_model(): onnx.checker.check_model(model_inferred) return model_inferred - - -def build_matmul_relu_model(ir_version=12): - # Define your model inputs and outputs - input_names = ["input_0"] - output_names = ["output_0"] - input_shapes = [(1, 1024, 1024)] - output_shapes = [(1, 1024, 16)] - - inputs = [ - helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape) - for input_name, input_shape in zip(input_names, input_shapes) - ] - outputs = [ - helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, output_shape) - for output_name, output_shape in zip(output_names, output_shapes) - ] - - # Create the ONNX graph with the nodes - nodes = [ - helper.make_node( - op_type="MatMul", - inputs=["input_0", "weights_1"], - outputs=["matmul1_matmul/MatMul:0"], - name="matmul1_matmul/MatMul", - ), - helper.make_node( - op_type="Relu", - inputs=["matmul1_matmul/MatMul:0"], - outputs=["output_0"], - name="relu1_relu/Relu", - ), - ] - - # Create the ONNX initializers - initializers = [ - helper.make_tensor( - name="weights_1", - data_type=onnx.TensorProto.FLOAT, - dims=(1024, 16), - vals=np.random.uniform(low=0.5, high=1.0, size=1024 * 16), - ), - ] - - # Create the ONNX graph with the nodes and initializers - graph = helper.make_graph( - nodes, f"matmul_relu_ir_{ir_version}", inputs, outputs, initializer=initializers - ) - - # Create the ONNX model - model = helper.make_model(graph) - model.opset_import[0].version = 13 - model.ir_version = ir_version - - # Check the ONNX model - model_inferred = onnx.shape_inference.infer_shapes(model) - onnx.checker.check_model(model_inferred) - - return model_inferred From b57d6cde76692acc4c95b927f55dbdc84bf9516c Mon Sep 17 00:00:00 2001 From: gcunhase <4861122+gcunhase@users.noreply.github.com> Date: Mon, 20 Oct 2025 14:41:56 -0400 Subject: [PATCH 8/8] nit: check if array is None Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> --- modelopt/onnx/trt_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modelopt/onnx/trt_utils.py b/modelopt/onnx/trt_utils.py index 2c2f05f9d..fe01d672f 100644 --- a/modelopt/onnx/trt_utils.py +++ b/modelopt/onnx/trt_utils.py @@ -298,7 +298,8 @@ def load_onnx_model( static_shaped_onnx_path = onnx_path.replace(".onnx", "_static.onnx") save_onnx(onnx_model, static_shaped_onnx_path, use_external_data_format) - intermediate_generated_files.append(static_shaped_onnx_path) # type: ignore[union-attr] + if intermediate_generated_files is not None: + intermediate_generated_files.append(static_shaped_onnx_path) if TRT_PYTHON_AVAILABLE and platform.system() != "Windows": # Check if there's a custom TensorRT op in the ONNX model. If so, make it ORT compatible by adding @@ -330,7 +331,8 @@ def load_onnx_model( else onnx_path.replace(".onnx", f"_ir{MAX_IR_VERSION}.onnx") ) save_onnx(onnx_model, ir_version_onnx_path, use_external_data_format) - intermediate_generated_files.append(ir_version_onnx_path) # type: ignore[union-attr] + if intermediate_generated_files is not None: + intermediate_generated_files.append(ir_version_onnx_path) # Check that the model is valid onnx.checker.check_model(onnx_model)