sdpython · sdpython · Mar 30, 2025 · Mar 30, 2025 · Mar 30, 2025 · Mar 30, 2025
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.3.0
 +++++
 
+* :pr:`26`: creates a folder ``helpers`` to gather all the functions
+  used in many places
 * :pr:`25`: improve patches for DynamicCache
   (issue with register_pytree_flatten_spec being deprecated)
 * :pr:`24`: dummy inputs for ``text2text-generation``, add new function

diff --git a/README.rst b/README.rst
@@ -98,7 +98,7 @@ Snapshot of usefuls tools
 .. code-block:: python
 
         import onnx
-        from onnx_diagnostic.helpers import onnx_dtype_name
+        from onnx_diagnostic.helpers.onnx_helper import onnx_dtype_name
 
         itype = onnx.TensorProto.BFLOAT16
         print(onnx_dtype_name(itype))

diff --git a/_doc/api/args.rst b/_doc/api/args.rst
diff --git a/_doc/api/cache_helpers.rst b/_doc/api/cache_helpers.rst
diff --git a/_doc/api/helpers.rst b/_doc/api/helpers.rst
diff --git a/_doc/api/helpers/args_helper.rst b/_doc/api/helpers/args_helper.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.args_helper
+===================================
+
+.. automodule:: onnx_diagnostic.helpers.args_helper
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/helpers/cache_helper.rst b/_doc/api/helpers/cache_helper.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.cache_helper
+====================================
+
+.. automodule:: onnx_diagnostic.helpers.cache_helper
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/helpers/helper.rst b/_doc/api/helpers/helper.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.helper
+==============================
+
+.. automodule:: onnx_diagnostic.helpers.helper
+    :no-undoc-members:
+    :exclude-members: max_diff, string_diff, string_sig, string_type
diff --git a/_doc/api/helpers/index.rst b/_doc/api/helpers/index.rst
@@ -0,0 +1,26 @@
+
+onnx_diagnostic.helpers
+=======================
+
+.. toctree::
+    :maxdepth: 1
+    :caption: submodules
+
+    args_helper
+    cache_helper
+    helper
+    onnx_helper
+    ort_session
+    torch_test_helper
+
+.. autofunction:: onnx_diagnostic.helpers.max_diff
+
+.. autofunction:: onnx_diagnostic.helpers.string_diff
+
+.. autofunction:: onnx_diagnostic.helpers.string_sig
+
+.. autofunction:: onnx_diagnostic.helpers.string_type
+
+.. automodule:: onnx_diagnostic.helpers
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/helpers/onnx_helper.rst b/_doc/api/helpers/onnx_helper.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.onnx_helper
+===================================
+
+.. automodule:: onnx_diagnostic.helpers.onnx_helper
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/helpers/ort_session.rst b/_doc/api/helpers/ort_session.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.ort_session
+===================================
+
+.. automodule:: onnx_diagnostic.helpers.ort_session
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/helpers/torch_test_helper.rst b/_doc/api/helpers/torch_test_helper.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.torch_test_helper
+=========================================
+
+.. automodule:: onnx_diagnostic.helpers.torch_test_helper
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/index.rst b/_doc/api/index.rst
@@ -8,22 +8,17 @@ API of onnx_diagnostic
     :caption: submodules
 
     export/index
+    helpers/index
     reference/index
     torch_export_patches/index
     torch_models/index
+    torch_onnx/index
 
 .. toctree::
     :maxdepth: 1
     :caption: modules
 
-    args
-    cache_helpers
     ext_test_case
-    helpers
-    onnx_tools
-    ort_session
-    torch_test_helper
-
 
 .. automodule:: onnx_diagnostic
     :members:

diff --git a/_doc/api/onnx_tools.rst b/_doc/api/onnx_tools.rst
diff --git a/_doc/api/ort_session.rst b/_doc/api/ort_session.rst
diff --git a/_doc/api/torch_onnx/index.rst b/_doc/api/torch_onnx/index.rst
@@ -0,0 +1,12 @@
+onnx_diagnostic.torch_onnx
+==========================
+
+.. toctree::
+    :maxdepth: 1
+    :caption: submodules
+
+    sbs
+
+.. automodule:: onnx_diagnostic.torch_onnx
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/torch_onnx/sbs.rst b/_doc/api/torch_onnx/sbs.rst
@@ -0,0 +1,8 @@
+
+onnx_diagnostic.torch_onnx.sbs
+==============================
+
+.. automodule:: onnx_diagnostic.torch_onnx.sbs
+    :members:
+    :no-undoc-members:
+
diff --git a/_doc/api/torch_test_helper.rst b/_doc/api/torch_test_helper.rst
diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py
@@ -31,8 +31,8 @@
 import transformers
 from onnx_diagnostic import doc
 from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.helpers.torch_test_helper import steel_forward
 from onnx_diagnostic.torch_models.llms import get_tiny_llm
-from onnx_diagnostic.torch_test_helper import steel_forward
 
 
 MODEL_NAME = "arnir0/Tiny-LLM"
@@ -77,7 +77,7 @@ def _forward_(*args, _f=None, **kwargs):
 model.forward = keep_model_forward
 
 # %%
-# Another syntax with :func:`onnx_diagnostic.torch_test_helper.steel_forward`.
+# Another syntax with :func:`onnx_diagnostic.helpers.torch_test_helper.steel_forward`.
 
 with steel_forward(model):
     model.generate(inputs, max_length=50, temperature=1, top_k=50, top_p=0.95, do_sample=True)

diff --git a/_doc/examples/plot_export_tiny_llm_patched.py b/_doc/examples/plot_export_tiny_llm_patched.py
@@ -67,7 +67,7 @@
 import torch
 import transformers
 from onnx_diagnostic import doc
-from onnx_diagnostic.cache_helpers import is_cache_dynamic_registered
+from onnx_diagnostic.helpers.cache_helper import is_cache_dynamic_registered
 from onnx_diagnostic.helpers import string_type
 from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 from onnx_diagnostic.torch_models.llms import get_tiny_llm

diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py
@@ -0,0 +1,159 @@
+"""
+.. _l-plot-export_tiny_phi2:
+
+Untrained microsoft/phi-2
+=========================
+
+:epkg:`microsoft/phi-2` is not a big models but still quite big
+when it comes to write unittest. Function
+:func:`onnx_diagnostic.torch_models.hghub.get_untrained_model_with_inputs`
+can be used to create a reduced untrained version of a model coming from
+:epkg:`HuggingFace`. It downloads the configuration from the website
+but creates a dummy model with 1 or 2 hidden layers in order to reduce
+the size and get a fast execution. The goal is usually to test
+the export or to compare performance. The relevance does not matter.
+
+Create the dummy model
+++++++++++++++++++++++
+"""
+
+import copy
+import pprint
+import warnings
+import torch
+import onnxruntime
+from onnx_diagnostic import doc
+from onnx_diagnostic.helpers import max_diff, string_diff, string_type
+from onnx_diagnostic.helpers.cache_helper import is_cache_dynamic_registered
+from onnx_diagnostic.helpers.ort_session import make_feeds
+from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
+from onnx_diagnostic.torch_models.hghub import (
+    get_untrained_model_with_inputs,
+)
+
+warnings.simplefilter("ignore")
+
+# another tiny id: arnir0/Tiny-LLM
+data = get_untrained_model_with_inputs("microsoft/phi-2")
+untrained_model, inputs, dynamic_shapes, config, size, n_weights = (
+    data["model"],
+    data["inputs"],
+    data["dynamic_shapes"],
+    data["configuration"],
+    data["size"],
+    data["n_weights"],
+)
+
+print(f"model {size / 2**10:1.3f} Kb with {n_weights} parameters.")
+# %%
+# The original model has 2.7 billion parameters. It was divided by more than 10.
+# Let's see the configuration.
+print(config)
+
+
+# %%
+# Inputs:
+
+print(string_type(inputs, with_shape=True))
+
+# %%
+# With min/max values.
+print(string_type(inputs, with_shape=True, with_min_max=True))
+
+# %%
+# And the dynamic shapes
+pprint.pprint(dynamic_shapes)
+
+# %%
+# We execute the model to produce expected outputs.
+expected = untrained_model(**copy.deepcopy(inputs))
+print(f"expected: {string_type(expected, with_shape=True, with_min_max=True)}")
+
+
+# %%
+# Export
+# ++++++
+
+
+with bypass_export_some_errors(patch_transformers=True) as modificator:
+
+    # Unnecessary steps but useful in case of an error
+    # We check the cache is registered.
+    assert is_cache_dynamic_registered()
+
+    # We check there is no discrepancies when the cache is applied.
+    d = max_diff(expected, untrained_model(**copy.deepcopy(inputs)))
+    assert (
+        d["abs"] < 1e-5
+    ), f"The model with patches produces different outputs: {string_diff(d)}"
+
+    # Then we export.
+    ep = torch.export.export(
+        untrained_model,
+        (),
+        kwargs=modificator(copy.deepcopy(inputs)),
+        dynamic_shapes=dynamic_shapes,
+        strict=False,  # mandatory for torch==2.6
+    )
+
+    # We check the exported program produces the same results as well.
+    d = max_diff(expected, ep.module()(**copy.deepcopy(inputs)))
+    assert d["abs"] < 1e-5, f"The exported model different outputs: {string_diff(d)}"
+
+# %%
+# Export to ONNX
+# ++++++++++++++
+#
+# The export works. We can export to ONNX now.
+# Patches are still needed because the export
+# applies :meth:`torch.export.ExportedProgram.run_decompositions`
+# may export local pieces of the model again.
+
+with bypass_export_some_errors(patch_transformers=True):
+    epo = torch.onnx.export(
+        ep, (), kwargs=copy.deepcopy(inputs), dynamic_shapes=dynamic_shapes, dynamo=True
+    )
+
+# %%
+# We can save it.
+epo.save("plot_export_tiny_phi2.onnx", external_data=True)
+
+# Or directly get the :class:`onnx.ModelProto`.
+onx = epo.model_proto
+
+
+# %%
+# Discrepancies
+# +++++++++++++
+#
+# The we check the conversion to ONNX.
+# Let's make sure the ONNX model produces the same outputs.
+# It takes flatten inputs.
+
+feeds = make_feeds(onx, copy.deepcopy(inputs), use_numpy=True, copy=True)
+
+print(f"torch inputs: {string_type(inputs)}")
+print(f"onxrt inputs: {string_type(feeds)}")
+
+# %%
+# We then create a :class:`onnxruntime.InferenceSession`.
+
+sess = onnxruntime.InferenceSession(
+    onx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
+
+# %%
+# Let's run.
+got = sess.run(None, feeds)
+
+# %%
+# And finally the discrepancies.
+
+diff = max_diff(expected, got, flatten=True)
+print(f"onnx discrepancies: {string_diff(diff)}")
+
+# %%
+# It looks good.
+
+# %%
+doc.plot_legend("untrained smaller\nmicrosoft/phi-2", "torch.onnx.export", "green")
diff --git a/_doc/examples/plot_export_with_dynamic_cache.py b/_doc/examples/plot_export_with_dynamic_cache.py
@@ -24,7 +24,7 @@
 import pprint
 import torch
 from onnx_diagnostic import doc
-from onnx_diagnostic.cache_helpers import make_dynamic_cache
+from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
 from onnx_diagnostic.helpers import string_type
 from onnx_diagnostic.export import ModelInputs
 

diff --git a/_doc/examples/plot_failing_model_extract.py b/_doc/examples/plot_failing_model_extract.py
@@ -26,8 +26,8 @@
 import onnx.helper as oh
 import onnxruntime
 from onnx_diagnostic import doc
-from onnx_diagnostic.helpers import from_array_extended
-from onnx_diagnostic.ort_session import investigate_onnxruntime_issue
+from onnx_diagnostic.helpers.onnx_helper import from_array_extended
+from onnx_diagnostic.helpers.ort_session import investigate_onnxruntime_issue
 
 TFLOAT = onnx.TensorProto.FLOAT