sdpython · sdpython · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,7 +4,13 @@ Change Logs
 0.7.0
 +++++
 
-* :pr:`143`: compares intermediate results
+* :pr:`144`: support for second inputs with different dimension,
+  rename test_helper into validate,
+  support ``interpolate_pos_encoding`` for ``VitModel``,
+  update model builder helpers for this PR
+  `Use ONNX IR for model builder
+  <https://github.com/microsoft/onnxruntime-genai/pull/1416>`_
+* :pr:`143`: compares intermediate results,
 
 0.6.3
 +++++

diff --git a/_doc/api/torch_models/index.rst b/_doc/api/torch_models/index.rst
@@ -7,7 +7,7 @@ onnx_diagnostic.torch_models
 
     hghub/index
     llms
-    test_helper
+    validate
 
 .. automodule:: onnx_diagnostic.torch_models
     :members:

diff --git a/_doc/api/torch_models/test_helper.rst b/_doc/api/torch_models/test_helper.rst
diff --git a/_doc/api/torch_models/validate.rst b/_doc/api/torch_models/validate.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.torch_models.validate
+=====================================
+
+.. automodule:: onnx_diagnostic.torch_models.validate
+    :members:
+    :no-undoc-members:
diff --git a/_doc/cmds/validate.rst b/_doc/cmds/validate.rst
@@ -4,7 +4,7 @@
 ===================================================
 
 The command line is a wrapper around function
-:func:`onnx_diagnostic.torch_models.test_helper.validate_model`.
+:func:`onnx_diagnostic.torch_models.validate.validate_model`.
 
 Description
 +++++++++++
@@ -110,7 +110,7 @@ Run onnxruntime fusions
 
 This option runs `transformers optimizations <https://onnxruntime.ai/docs/performance/transformers-optimization.html>`_ 
 implemented in :epkg:`onnxruntime`. The list of supported ``model_type`` can be found in the documentation
-of function :func:`onnx_diagnostic.torch_models.test_helper.run_ort_fusion`.
+of function :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`.
 
 .. code-block::
 

diff --git a/_unittests/ut_helpers/test_doc_helper.py b/_unittests/ut_helpers/test_doc_helper.py
@@ -56,7 +56,7 @@ def test_custom_doc_kernels_layer_normalization(self):
         )
         expected = torch_sess.run(None, feeds)
         got = torch_sess_custom.run(None, feeds)
-        self.assertEqualAny(expected, got)
+        self.assertEqualAny(expected, got, atol=1e-3)
 
     def test_custom_doc_kernels_matmul(self):
         model = oh.make_model(

diff --git a/_unittests/ut_helpers/test_model_builder_helper.py b/_unittests/ut_helpers/test_model_builder_helper.py
@@ -1,4 +1,3 @@
-import os
 import unittest
 from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
@@ -48,32 +47,17 @@ def test_model_builder_id(self):
             cache_dir=folder,
             verbose=1,
         )
-        self.assertGreater(len(onnx_model.nodes), 5)
+        self.assertGreater(onnx_model.model.graph.num_nodes(), 5)
+        model_name = save_model_builder(onnx_model, folder, verbose=1)
+        self.assertExists(model_name)
 
-        proto = save_model_builder(onnx_model, verbose=1)
         import onnxruntime
 
-        onnxruntime.InferenceSession(
-            proto.SerializeToString(), providers=["CPUExecutionProvider"]
-        )
-
-        # We need to start again.
-        onnx_model = create_model_builder(
-            data["configuration"],
-            data["model"],
-            precision="fp32",
-            execution_provider="cpu",
-            cache_dir=folder,
-            verbose=1,
-        )
-        save_model_builder(onnx_model, folder, verbose=1)
-        model_name = os.path.join(folder, "model.onnx")
-        self.assertExists(model_name)
-
-        feeds = make_feeds(proto, data["inputs"], use_numpy=True)
+        sess = onnxruntime.InferenceSession(model_name, providers=["CPUExecutionProvider"])
+        del data["inputs"]["position_ids"]
+        feeds = make_feeds([i.name for i in sess.get_inputs()], data["inputs"], use_numpy=True)
         expected = data["model"](**data["inputs"])
 
-        sess = onnxruntime.InferenceSession(model_name, providers=["CPUExecutionProvider"])
         try:
             got = sess.run(None, feeds)
         except onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument as e:

diff --git a/_unittests/ut_torch_models/test_validate_models.py b/_unittests/ut_torch_models/test_validate_models.py
@@ -8,7 +8,7 @@
     requires_experimental,
     requires_transformers,
 )
-from onnx_diagnostic.torch_models.test_helper import validate_model
+from onnx_diagnostic.torch_models.validate import validate_model
 
 
 class TestValidateModel(ExtTestCase):

diff --git a/...ests/ut_torch_models/test_test_helpers.py → ...orch_models/test_validate_whole_models.py b/...ests/ut_torch_models/test_test_helpers.py → ...orch_models/test_validate_whole_models.py
@@ -11,7 +11,7 @@
     requires_onnxscript,
     requires_transformers,
 )
-from onnx_diagnostic.torch_models.test_helper import (
+from onnx_diagnostic.torch_models.validate import (
     get_inputs_for_task,
     validate_model,
     filter_inputs,
@@ -21,7 +21,7 @@
 from onnx_diagnostic.tasks import supported_tasks
 
 
-class TestTestHelper(ExtTestCase):
+class TestValidateWholeModels(ExtTestCase):
     def test_get_inputs_for_task(self):
         fcts = supported_tasks()
         for task in self.subloop(sorted(fcts)):
@@ -221,14 +221,39 @@ def test_validate_model_modelbuilder(self):
             do_run=True,
             verbose=10,
             exporter="modelbuilder",
-            dump_folder="dump_test_validate_model_onnx_dynamo",
+            dump_folder="dump_test_validate_model_modelbuilder",
         )
         self.assertIsInstance(summary, dict)
         self.assertIsInstance(data, dict)
         self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-4)
         onnx_filename = data["onnx_filename"]
         self.assertExists(onnx_filename)
 
+    @requires_torch("2.7")
+    @hide_stdout()
+    @ignore_warnings(FutureWarning)
+    @requires_transformers("4.51")
+    def test_validate_model_vit_model(self):
+        mid = "ydshieh/tiny-random-ViTForImageClassification"
+        summary, data = validate_model(
+            mid,
+            do_run=True,
+            verbose=10,
+            exporter="onnx-dynamo",
+            dump_folder="dump_test_validate_model_onnx_dynamo",
+            inputs2=True,
+        )
+        self.assertIsInstance(summary, dict)
+        self.assertIsInstance(data, dict)
+        self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-3)
+        self.assertLess(summary["disc_onnx_ort_run2_abs"], 1e-3)
+        self.assertEqual("dict(pixel_values:A1s2x3x30x30)", summary["run_feeds_inputs"])
+        self.assertEqual("dict(pixel_values:A1s3x3x31x31)", summary["run_feeds_inputs2"])
+        self.assertEqual("#1[A1s2x2]", summary["run_output_inputs"])
+        self.assertEqual("#1[A1s3x2]", summary["run_output_inputs2"])
+        onnx_filename = data["onnx_filename"]
+        self.assertExists(onnx_filename)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -373,6 +373,13 @@ def get_parser_validate() -> ArgumentParser:
         action=BooleanOptionalAction,
         help="validate the trained model (requires downloading)",
     )
+    parser.add_argument(
+        "--inputs2",
+        default=True,
+        action=BooleanOptionalAction,
+        help="if run is on, the command lines validates the model on a "
+        "second set of inputs to check the exported model supports dynamism",
+    )
     parser.add_argument(
         "--runtime",
         choices=["onnxruntime", "torch", "ref"],
@@ -440,7 +447,7 @@ def get_parser_validate() -> ArgumentParser:
 
 def _cmd_validate(argv: List[Any]):
     from .helpers import string_type
-    from .torch_models.test_helper import get_inputs_for_task, validate_model
+    from .torch_models.validate import get_inputs_for_task, validate_model
     from .tasks import supported_tasks
 
     parser = get_parser_validate()
@@ -492,6 +499,7 @@ def _cmd_validate(argv: List[Any]):
             runtime=args.runtime,
             repeat=args.repeat,
             warmup=args.warmup,
+            inputs2=args.inputs2,
         )
         print("")
         print("-- summary --")

diff --git a/onnx_diagnostic/helpers/model_builder_helper.py b/onnx_diagnostic/helpers/model_builder_helper.py
@@ -3,9 +3,9 @@
 import requests
 import sys
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from urllib.parse import urlparse
-from onnx import helper, save_model, external_data_helper, ModelProto
+from onnx import ModelProto, TensorProto
 
 CACHE_SUBDIR = "onnx-diagnostic"
 
@@ -114,87 +114,58 @@ def _make_model(self, model, verbose: int = 0):
                 self.make_lm_head(module)
 
 
-def save_model_builder(self, out_dir: Optional[str] = "", verbose: int = 0) -> ModelProto:
+def save_model_builder(
+    self, out_dir: Optional[str] = "", verbose: int = 0
+) -> Union[str, ModelProto]:
     """
     Saves a model created by function :func:`create_model_builder`.
     If out_dir is empty or not specified, the function still returns the
     generated model.
     """
-    if verbose:
-        print(f"[save_model_builder] Saving ONNX model in {out_dir}")
-
-    # Create ONNX model
-    model = helper.make_model(
-        opset_imports=[
-            self.clear_field(
-                helper.make_operatorsetid("", 21 if self.quant_attrs["use_qdq"] else 14),
-                "domain",
-            ),
-            helper.make_operatorsetid("com.microsoft", 1),
-        ],
-        ir_version=7,
-        producer_name="onnxruntime-genai",
-        producer_version="0.0.0",
-        graph=self.make_graph(
-            name="main_graph",
-            inputs=self.inputs,
-            outputs=self.outputs,
-            initializer=self.initializers,
-            value_info=self.value_infos,
-            nodes=self.nodes,
-        ),
-    )
-
-    # Load external data into ONNX model
-    external_data_helper.load_external_data_for_model(model, self.cache_dir)
-
-    # Delete external data files on disk before re-saving
-    for path in os.listdir(self.cache_dir):
-        if path.endswith(".bin"):
-            os.remove(os.path.join(self.cache_dir, path))
+    import onnx_ir
 
-    # Delete temporary cache dir if empty
-    # if len(os.listdir(self.cache_dir)) == 0:
-    #    os.rmdir(self.cache_dir)
+    if verbose:
+        print(f"[save_model_builder] Saving ONNX model in {out_dir!r}")
 
-    # Quantize ONNX model to desired precision
+    # Skip quantizing `MatMul` in `DequantizeLinear --> Transpose --> MatMul` path
     already_quantized_in_qdq_format = (
         self.quant_type is not None and self.quant_attrs["use_qdq"]
-    )  # Skip quantizing `MatMul` in `DequantizeLinear --> Transpose --> MatMul` path
-    if self.onnx_dtype == "int4" and not already_quantized_in_qdq_format:
-        model = self.to_int4(model)
+    )
+    model = (
+        self.to_int4()
+        if self.onnx_dtype in {onnx_ir.DataType.INT4, onnx_ir.DataType.UINT4}
+        and not already_quantized_in_qdq_format
+        else self.model
+    )
+    model.graph.sort()
+    if not out_dir:
+        return onnx_ir.to_proto(model)
 
-    # Save ONNX model with only one external data file and delete any existing duplicate copies
-    if out_dir:
-        out_path = os.path.join(out_dir, self.filename)
-        data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
-        if os.path.exists(out_path):
-            if verbose:
-                print(f"[save_model_builder] Overwriting {out_path!r}")
-            os.remove(out_path)
-        if os.path.exists(data_path):
-            if verbose:
-                print(f"[save_model_builder] Overwriting {data_path!r}")
-            os.remove(data_path)
+    out_path = os.path.join(out_dir, self.filename)
+    data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
 
-    if out_dir:
-        location = os.path.basename(data_path)
-        if os.path.exists(location):
-            os.remove(location)
+    # Save ONNX model with only one external data file and delete any existing duplicate copies
+    out_path = os.path.join(out_dir, self.filename)
+    data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
+    if os.path.exists(out_path):
         if verbose:
-            print(f"[save_model_builder] out_path={out_path!r}")
-            print(f"[save_model_builder] location={location!r}")
-        save_model(
-            model,
-            out_path,
-            save_as_external_data=True,
-            all_tensors_to_one_file=True,
-            location=location,
-            size_threshold=1024,
-            convert_attribute=False,
-        )
-        return None
-    return model
+            print(f"[save_model_builder] Overwriting {out_path!r}")
+        os.remove(out_path)
+    if os.path.exists(data_path):
+        if verbose:
+            print(f"[save_model_builder] Overwriting {data_path!r}")
+        os.remove(data_path)
+
+    onnx_ir.save(
+        model,
+        out_path,
+        external_data=os.path.basename(data_path),
+        size_threshold_bytes=2**10,
+    )
+    if verbose:
+        print(f"[save_model_builder] saved in {out_dir!r}")
+
+    return out_path
 
 
 def create_model_builder(
@@ -335,13 +306,23 @@ def _post(onnx_model):
     for c in remove:
         delattr(config, c)
 
-    onnx_model = cls(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
+    convert = {
+        "fp32": TensorProto.FLOAT,
+        "fp16": TensorProto.FLOAT16,
+        "bfp16": TensorProto.BFLOAT16,
+    }
+    assert (
+        precision in convert
+    ), f"Unexpected value for precision={precision!r}, should be in {convert}"
+    onnx_model = cls(
+        config, io_dtype, convert[precision], execution_provider, cache_dir, extra_options
+    )
 
     if post:
         post(onnx_model)
     _make_model(onnx_model, model, verbose=verbose)
 
-    assert onnx_model.nodes, (
+    assert onnx_model.model, (
         f"No node in the model, io_dtype={io_dtype!r}, "
         f"precision={precision!r}, execution_provider={execution_provider!r}, "
         f"extra_options={extra_options!r}, cache_dir={cache_dir!r}, "

diff --git a/onnx_diagnostic/tasks/image_classification.py b/onnx_diagnostic/tasks/image_classification.py
@@ -52,7 +52,7 @@ def get_inputs(
         input_width, int
     ), f"Unexpected type for input_width {type(input_width)}{config}"
     assert isinstance(
-        input_width, int
+        input_height, int
     ), f"Unexpected type for input_height {type(input_height)}{config}"
 
     shapes = {
@@ -67,6 +67,9 @@ def get_inputs(
             -1, 1
         ),
     )
+    if model.__class__.__name__ == "ViTForImageClassification":
+        inputs["interpolate_pos_encoding"] = True
+        shapes["interpolate_pos_encoding"] = None  # type: ignore[assignment]
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
         res["inputs2"] = get_inputs(