Improves investigate_onnxruntime_issue

xadupre · xadupre · commit 849049b3dfd4 · 2025-03-22T11:53:06.000+01:00
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -1,6 +1,11 @@
 Change Logs
 ===========
 
+0.2.0
++++++
+
+* :pr:`7`: improves function ``investigate_onnxruntime_issue``
+
 0.1.0
 +++++
 
diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py
@@ -1,23 +1,33 @@
 """
 .. _l-plot-tiny-llm-export:
 
-Export LLM with dynamic shapes
-==============================
+Steel method forward to guess the dynamic shapes
+================================================
+
+Inputs are always dynamic with LLMs that is why dyanmic shapes
+needs to be specified when a LLM is exported with:func:`torch.export.export`.
+Most of the examples on :epkg:`HuggingFace` use method
+:meth:`transformers.GenerationMixin.generate` but we only want to
+export the model and its method ``forward``.
+
+That example shows to guess the inputs of this method even though the model
+is executed through meth ``generate``.
 
 We focus on the model
 `Tiny-LLM <https://huggingface.co/arnir0/Tiny-LLM>`_.
 To avoid downloading any weigths, we write a function creating a
 random model based on the same architecture.
 
-Guess the cache dimension
-+++++++++++++++++++++++++
+Steel the forward method
+++++++++++++++++++++++++
 
 The first step is to guess the dummy inputs.
 Let's use the true model for that.
 We use the dummy example from the model page.
 """
 
 import copy
+import pprint
 import torch
 import transformers
 from onnx_diagnostic.helpers import string_type
@@ -64,8 +74,13 @@ def _forward_(*args, _f=None, **kwargs):
 model.forward = keep_model_forward
 
 # %%
-# The model creation
-# ++++++++++++++++++
+# Untrained model
+# +++++++++++++++
+#
+# This part can skipped if you are only interested in exporting
+# the original model. It is useful to create a unit test to ensure
+# a specific architecture can be exported despite the many changes
+# brought to :epkg:`torch` or :epkg:`transformers`.
 #
 # Let's create an untrained model using the config file provided
 # `config.json <https://huggingface.co/arnir0/Tiny-LLM/blob/main/config.json>`_
@@ -126,6 +141,22 @@ def _forward_(*args, _f=None, **kwargs):
 # ++++++++++++++++++++++++++
 #
 # Let's use the same dummy inputs but we use the downloaded model.
+# Dummy inputs and dynamic shapes are created by function
+# :func:`onnx_diagnostic.torch_models.llms.get_tiny_llm`.
+
+data = get_tiny_llm()
+inputs, dynamic_shapes = data["inputs"], data["dynamic_shapes"]
+
+# %%
+# Let's print the inputs.
+
+print(string_type(inputs, with_shape=True))
+
+# %% Let's print the dynamic shapes
+pprint.pprint(dynamic_shapes)
+
+# %%
+# And Let's finally export.
 
 try:
     ep = torch.export.export(model, (), kwargs=cloned_inputs, dynamic_shapes=dynamic_shapes)
diff --git a/_doc/examples/plot_failing_model_extract.py b/_doc/examples/plot_failing_model_extract.py
@@ -0,0 +1,93 @@
+"""
+.. _l-plot-failing-model-extract:
+
+Find where a model failing by running submodels
+===============================================
+
+Let's assume :epkg:`onnxruntime` crashes without telling why or where.
+The first thing is do is to locate where. For that, we extract every submodel
+starting from the inputs and running the first *n* nodes of the model.
+The model is likely to fail for some *n*. Then the failing is known.
+
+A failing model
++++++++++++++++
+
+The issue here is a an operator ``Cast`` trying to convert a result
+into a non-existing type.
+"""
+
+import numpy as np
+import onnx
+import onnx.helper as oh
+import onnxruntime
+from onnx_diagnostic.helpers import from_array_extended
+from onnx_diagnostic.ort_session import investigate_onnxruntime_issue
+
+TFLOAT = onnx.TensorProto.FLOAT
+
+model = oh.make_model(
+    oh.make_graph(
+        [
+            oh.make_node("Mul", ["X", "Y"], ["xy"], name="n0"),
+            oh.make_node("Sigmoid", ["xy"], ["sy"], name="n1"),
+            oh.make_node("Add", ["sy", "one"], ["C"], name="n2"),
+            oh.make_node("Cast", ["C"], ["X999"], to=999, name="failing"),
+            oh.make_node("CastLike", ["X999", "Y"], ["Z"], name="n4"),
+        ],
+        "nd",
+        [
+            oh.make_tensor_value_info("X", TFLOAT, ["a", "b", "c"]),
+            oh.make_tensor_value_info("Y", TFLOAT, ["a", "b", "c"]),
+        ],
+        [oh.make_tensor_value_info("Z", TFLOAT, ["a", "b", "c"])],
+        [from_array_extended(np.array([1], dtype=np.float32), name="one")],
+    ),
+    opset_imports=[oh.make_opsetid("", 18)],
+    ir_version=9,
+)
+
+# %%
+# We check it is failing.
+
+try:
+    onnxruntime.InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"])
+except onnxruntime.capi.onnxruntime_pybind11_state.Fail as e:
+    print(e)
+
+
+# %%
+# Shape Inference
+# +++++++++++++++
+#
+# Building submodels requires to known the output type.
+# We run shape inference on the model.
+shaped_model = onnx.shape_inference.infer_shapes(model)
+
+
+# %%
+# Looping over the nodes
+# ++++++++++++++++++++++
+#
+#
+
+failing = investigate_onnxruntime_issue(shaped_model, providers="cpu", verbose=1, quiet=True)
+
+# %%
+# Let's print the failing node.
+print(failing)
+
+
+# %%
+# Detect an issue with shape Inference
+# ++++++++++++++++++++++++++++++++++++
+#
+# We could have caught the error sooner by asking shape inference
+# to raise an exception if one node could not be processed.
+# It means either the node is a custom node
+# and shape inference has no way to guess the output type and shape
+# for this node or shape inference failed.
+
+try:
+    onnx.shape_inference.infer_shapes(model, strict_mode=True)
+except onnx.onnx_cpp2py_export.shape_inference.InferenceError as e:
+    print(e)
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -45,8 +45,11 @@ Source are `sdpython/onnx-diagnostic
     CHANGELOGS
     license
 
+**Enlightening Examples**
 
-**Some usefuls tools**
+* :ref:`l-plot-tiny-llm-export`
+
+**Some Usefuls Tools**
 
 .. code-block:: python
 
@@ -135,7 +138,6 @@ Size of the package:
     gr = df[["dir", "ext", "lines", "chars"]].groupby(["ext", "dir"]).sum()
     print(gr)
 
-Older versions
-++++++++++++++
+**Older versions**
 
 * `0.1.0 <../v0.1.0/index.html>`_
diff --git a/_unittests/ut_xrun_doc/test_ort_session.py b/_unittests/ut_xrun_doc/test_ort_session.py
@@ -182,6 +182,17 @@ def test_investigate_onnxruntime_issue_torch(self):
             dump_filename="test_investigate_onnxruntime_issue_torch.onnx",
         )
 
+    @hide_stdout()
+    def test_investigate_onnxruntime_issue_torch_quiet(self):
+        model, feeds, _expected = self._get_model()
+        investigate_onnxruntime_issue(
+            model,
+            feeds=feeds,
+            verbose=10,
+            dump_filename="test_investigate_onnxruntime_issue_torch.onnx",
+            quiet=True,
+        )
+
     @hide_stdout()
     def test_investigate_onnxruntime_issue_numpy(self):
         model, feeds, _expected = self._get_model()
diff --git a/onnx_diagnostic/ort_session.py b/onnx_diagnostic/ort_session.py
@@ -408,6 +408,7 @@ def investigate_onnxruntime_issue(
     verbose: int = 0,
     dump_filename: Optional[str] = None,
     infer_shapes: bool = True,
+    quiet: bool = False,
 ):
     """
     Invgestigates a crashing model. It tries every node until
@@ -433,6 +434,8 @@ def investigate_onnxruntime_issue(
     :param verbosity: verbosity level
     :param dump_filename: if not None, the function dumps the last model run
     :param infer_shapes: run shape inference
+    :param quiet: if True, raises an exception, False, just stops and
+        return the failing node
 
     The most simple use:
 
@@ -531,7 +534,19 @@ def investigate_onnxruntime_issue(
                 f"{', '.join(node.output)}"
             )
         e = onnx.utils.Extractor(onx)
-        extracted = e.extract_model(input_names, node.output)
+        if quiet:
+            try:
+                extracted = e.extract_model(input_names, node.output)
+            except Exception as e:
+                if verbose > 0:
+                    print(
+                        f"[investigate_onnxruntime_issue] cannot extract "
+                        f"model at node {i} due to {e}"
+                    )
+                return node
+        else:
+            extracted = e.extract_model(input_names, node.output)
+
         if dump_filename:
             if verbose > 1:
                 print(f"[investigate_onnxruntime_issue]   save into {dump_filename}")
@@ -540,11 +555,11 @@ def investigate_onnxruntime_issue(
         if verbose > 1:
             print("[investigate_onnxruntime_issue]   create the session")
 
-        if onnx_to_session:
-            sess = onnx_to_session(onx)
-        else:
-            sess = cls(
-                extracted,
+        def _make_session(proto):
+            if onnx_to_session:
+                return onnx_to_session(proto)
+            return cls(
+                proto,
                 session_options=session_options,
                 providers=providers,
                 nvtx=nvtx,
@@ -557,6 +572,19 @@ def investigate_onnxruntime_issue(
                 use_training_api=use_training_api,
             )
 
+        if quiet:
+            try:
+                sess = _make_session(extracted)
+            except Exception as e:
+                if verbose > 0:
+                    print(
+                        f"[investigate_onnxruntime_issue] cannot create session "
+                        f"at node {i} due to {e}"
+                    )
+                return node
+        else:
+            sess = _make_session(extracted)
+
         if not feeds:
             if verbose > 1:
                 print("[investigate_onnxruntime_issue]   session created")
@@ -565,7 +593,18 @@ def investigate_onnxruntime_issue(
         if verbose > 1:
             print("[investigate_onnxruntime_issue]   running session")
 
-        sess.run(None, feeds)
+        if quiet:
+            try:
+                sess.run(None, feeds)
+            except Exception as e:
+                if verbose > 0:
+                    print(
+                        f"[investigate_onnxruntime_issue] cannot run session "
+                        f"at node {i} due to {e}"
+                    )
+                return node
+        else:
+            sess.run(None, feeds)
 
     if verbose > 0:
         print("[investigate_onnxruntime_issue] done.")