resolve conflicts

titaiwangms · titaiwangms · commit 5c55755d25fc · 2025-09-22T20:29:41.000Z
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -1,6 +1,12 @@
 Change Logs
 ===========
 
+0.7.12
+++++++
+
+* :pr:`227`: better support for ``model_id//pretrained``, adds speed up when running command validate
+* :pr:`226`: fix input order for models created with modelbuilder
+
 0.7.11
 ++++++
 
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -122,6 +122,7 @@ def linkcode_resolve(domain, info):
     ("py:class", "CacheProcessor"),
     ("py:class", "default=sklearn.utils.metadata_routing.UNCHANGED"),
     ("py:class", "diffusers.models.unets.unet_2d_condition.UNet2DConditionOutput"),
+    ("py:class", "MambaCache"),
     ("py:class", "ModelProto"),
     ("py:class", "Model"),
     ("py:class", "Module"),
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -239,9 +239,8 @@ The function replaces dynamic dimensions defined as strings by
 Older versions
 ==============
 
+* `0.7.12 <../v0.7.12/index.html>`_
 * `0.7.11 <../v0.7.11/index.html>`_
-* `0.7.10 <../v0.7.10/index.html>`_
-* `0.7.9 <../v0.7.9/index.html>`_
 * `0.6.3 <../v0.6.3/index.html>`_
 * `0.5.0 <../v0.5.0/index.html>`_
 * `0.4.4 <../v0.4.4/index.html>`_
diff --git a/_unittests/ut_torch_models/test_validate_whole_models.py b/_unittests/ut_torch_models/test_validate_whole_models.py
@@ -195,17 +195,18 @@ def test_k_filter_inputs(self):
     @ignore_warnings(FutureWarning)
     @requires_transformers("4.51")
     def test_l_validate_model_modelbuilder(self):
-        mid = "arnir0/Tiny-LLM"
+        mid = "microsoft/phi-2"
         summary, data = validate_model(
             mid,
             do_run=True,
             verbose=10,
             exporter="modelbuilder",
             dump_folder="dump_test/validate_model_modelbuilder",
+            patch=True,
         )
         self.assertIsInstance(summary, dict)
         self.assertIsInstance(data, dict)
-        self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-4)
+        self.assertLess(summary["disc_onnx_ort_run_abs"], 3e-2)
         onnx_filename = data["onnx_filename"]
         self.assertExists(onnx_filename)
 
diff --git a/onnx_diagnostic/__init__.py b/onnx_diagnostic/__init__.py
@@ -3,5 +3,5 @@
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
 
-__version__ = "0.7.11"
+__version__ = "0.7.12"
 __author__ = "Xavier Dupré"
diff --git a/onnx_diagnostic/helpers/rt_helper.py b/onnx_diagnostic/helpers/rt_helper.py
@@ -3,7 +3,6 @@
 import onnx
 import torch
 from .helper import string_type, flatten_object
-from .onnx_helper import dtype_to_tensor_dtype
 from .cache_helper import is_cache_dynamic_registered
 
 
@@ -23,6 +22,7 @@ def make_feeds(
     use_numpy: bool = False,
     copy: bool = False,
     check_flatten: bool = True,
+    is_modelbuilder: bool = False,
 ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
     """
     Serializes the inputs to produce feeds expected
@@ -35,10 +35,15 @@ def make_feeds(
         by ``OrtValue``
     :param check_flatten: if True, checks the ``torch.utils._pytree.tree_flatten``
         returns the same number of outputs
+    :param is_modelbuilder: if True, the exporter is ModelBuilder, and we need to reorder
+        the past_key_values inputs to match the expected order, and get rid of position_ids.
     :return: feeds dictionary
     """
-    # position_ids is a special case because ModelBuilder does not usually use it.
-    # We use types to detect the best inputs.
+    # NOTE: position_ids is a special case because ModelBuilder does not usually use it,
+    # because it's fued into rotary embedding in GQA.
+    if is_modelbuilder and isinstance(inputs, dict):
+        inputs.pop("position_ids", None)  # Ensure 'position_ids' absent before removing.
+
     flat = flatten_object(inputs, drop_keys=True)
     assert (
         not check_flatten
@@ -76,39 +81,6 @@ def make_feeds(
         f"\n-- inputs={string_type(inputs, with_shape=True)}"
         f"\n-- names={names}"
     )
-    if len(names) < len(flat) and (
-        isinstance(proto, onnx.ModelProto) or hasattr(proto, "get_inputs")
-    ):
-
-        typed_names = (
-            [(i.name, i.type.tensor_type.elem_type) for i in proto.graph.input]
-            if isinstance(proto, onnx.ModelProto)
-            else [(i.name, name_type_to_onnx_dtype(i.type)) for i in proto.get_inputs()]
-        )
-
-        new_flat = []
-        pos = 0
-        for _name, dtype in typed_names:
-            assert isinstance(
-                dtype, int
-            ), f"Unexpected value for dtype={dtype!r}, type(proto)={type(proto)}"
-            itype = dtype_to_tensor_dtype(flat[pos].dtype)
-            while dtype != itype:
-                pos += 1
-                if pos >= len(flat):
-                    break
-                itype = dtype_to_tensor_dtype(flat[pos].dtype)
-            if pos >= len(flat):
-                break
-            new_flat.append(flat[pos])
-            pos += 1
-        assert len(new_flat) == len(names), (
-            f"Unable to align expected input {names} with the given input, "
-            f"type(proto)={type(proto)}"
-            f"\n-- inputs: {string_type(inputs, with_shape=True)}"
-            f"\n-- typed_names: {typed_names}"
-        )
-        flat = new_flat
 
     if copy:
         flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
@@ -122,4 +94,49 @@ def make_feeds(
         elif isinstance(i, float):
             i = np.array(i, dtype=np.float32)
         new_flat.append(i)
+
+    # NOTE: model builder has a different order for past_key_values
+    #       we need to reorder them to match the expected order
+    if is_modelbuilder:
+        # We assume that if "past_key_values" is in the names when it's
+        # modelbuilder
+        non_past_kv_input_names = [n for n in names if "past_key_values" not in n]
+        past_kv_names = [n for n in names if "past_key_values" in n]
+        reorder_past_kv_names = reorder_modelbuilder_cache_to_torch(past_kv_names)
+        names = non_past_kv_input_names + reorder_past_kv_names
     return dict(zip(names, new_flat))
+
+
+def reorder_modelbuilder_cache_to_torch(past_kv: List[Any]) -> List[Any]:
+    """
+    Reorders the past_kvs for ModelBuilder to match the expected order
+    by PyTorch exported models.
+
+    .. note::
+        This function can take either the names or the actual tensors
+        as long as they are in a list.
+
+    Conceptually,
+
+    From::
+
+        [past_key_values.0.key, past_key_values.0.value,
+        past_key_values.1.key, past_key_values.1.value, ...]
+
+    To::
+
+        [past_key_values.0.key, past_key_values.1.key,
+        ..., past_key_values.0.value, past_key_values.1.value, ...]
+
+    :param past_kv: list of flattened inputs
+    :return: reordered list of flattened inputs
+    """
+    total_len = len(past_kv)
+    if total_len % 2 != 0:
+        raise ValueError("The length of past_key_values should be even.")
+    keys = []
+    values = []
+    for i in range(0, total_len, 2):
+        keys.append(past_kv[i])
+        values.append(past_kv[i + 1])
+    return keys + values
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -189,7 +189,7 @@ def get_untrained_model_with_inputs(
                     f"subfolder={subfolder!r}"
                 )
             model = transformers.AutoModel.from_pretrained(
-                model_id, subfolder=subfolder, trust_remote_code=True, **mkwargs
+                model_id, subfolder=subfolder or "", trust_remote_code=True, **mkwargs
             )
             if verbose:
                 print(
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py
@@ -11,7 +11,7 @@
 from ..export import CoupleInputsDynamicShapes
 from ..helpers import max_diff, string_type, string_diff
 from ..helpers.helper import flatten_object
-from ..helpers.rt_helper import make_feeds
+from ..helpers.rt_helper import make_feeds, reorder_modelbuilder_cache_to_torch
 from ..helpers.torch_helper import to_any, torch_deepcopy
 from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 from ..tasks import random_input_kwargs
@@ -264,14 +264,18 @@ def shrink_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
     return new_cfg
 
 
-def _preprocess_model_id(model_id, subfolder):
+def _preprocess_model_id(
+    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
+) -> Tuple[str, Optional[str], bool, bool]:
     if subfolder or "//" not in model_id:
-        return model_id, subfolder
+        return model_id, subfolder, same_as_pretrained, use_pretrained
     spl = model_id.split("//")
+    if spl[-1] == "pretrained":
+        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
     if spl[-1] in {"transformer", "vae"}:
         # known subfolder
-        return "//".join(spl[:-1]), spl[-1]
-    return model_id, subfolder
+        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
+    return model_id, subfolder, same_as_pretrained, use_pretrained
 
 
 def validate_model(
@@ -384,7 +388,12 @@ def validate_model(
     if ``runtime == 'ref'``,
     ``orteval10`` increases the verbosity.
     """
-    model_id, subfolder = _preprocess_model_id(model_id, subfolder)
+    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+        model_id,
+        subfolder,
+        same_as_pretrained=same_as_pretrained,
+        use_pretrained=use_pretrained,
+    )
     if isinstance(patch, bool):
         patch_kwargs = (
             dict(patch_transformers=True, patch_diffusers=True, patch=True)
@@ -812,6 +821,8 @@ def validate_model(
                 )
                 summary.update(summary_valid)
 
+    _compute_final_statistics(summary)
+
     if verbose:
         print("[validate_model] -- done (final)")
     if dump_stats:
@@ -824,15 +835,24 @@ def validate_model(
 def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
     """Computes some statistics on the model itself."""
     onx = onnx.load(onnx_filename, load_external_data=False)
+    cache_functions = {(f.domain, f.name): f for f in onx.functions}
+    local_domains = set(f.domain for f in onx.functions)
 
     def node_iter(proto):
         if isinstance(proto, onnx.ModelProto):
-            yield from node_iter(proto.graph)
             for f in proto.functions:
                 yield from node_iter(f)
+            yield from node_iter(proto.graph)
         elif isinstance(proto, (onnx.FunctionProto, onnx.GraphProto)):
             for node in proto.node:
                 yield node
+
+                # Let's inline the function
+                key = node.domain, node.op_type
+                if key in cache_functions:
+                    yield from node_iter(cache_functions[key])
+
+                # Let's continue
                 for att in node.attribute:
                     if att.type == onnx.AttributeProto.GRAPH:
                         yield from node_iter(att.g)
@@ -850,6 +870,11 @@ def node_iter(proto):
             n_nodes += 1
             if proto.op_type != "Constant":
                 n_nodes_nocst += 1
+            if proto.domain in local_domains:
+                key = "n_node_local_function"
+                if key not in counts:
+                    counts[key] = 0
+                counts[key] += 1
         else:
             key = f"n_node_initializer_{proto.data_type}"
 
@@ -1298,7 +1323,13 @@ def _mk(key, flavour=flavour):
             print(
                 f"[validate_onnx_model] inputs={string_type(data[k_input], with_shape=True)}"
             )
-        feeds = make_feeds(sess, data[k_input], use_numpy=True, check_flatten=False)
+        feeds = make_feeds(
+            sess,
+            data[k_input],
+            use_numpy=True,
+            check_flatten=False,
+            is_modelbuilder=data["exporter"] == "modelbuilder",
+        )
         if verbose:
             print(f"[validate_onnx_model] ort inputs={string_type(feeds, with_shape=True)}")
         summary[_mk(f"onnx_ort_inputs{suffix}")] = string_type(feeds, with_shape=True)
@@ -1318,6 +1349,13 @@ def _mk(key, flavour=flavour):
             repeat=repeat,
             warmup=warmup,
         )
+        # NOTE: modelbuilder has different order on past_kv outputs
+        if data["exporter"] == "modelbuilder":
+            logits = got[:1]
+            past_key_values = got[1:]
+            reorder_past_key_values = reorder_modelbuilder_cache_to_torch(past_key_values)
+            got = logits + reorder_past_key_values
+
         if f"ERR_{_mk(f'time_onnx_ort_run{suffix}')}" in summary:
             return summary, data
 
@@ -1358,7 +1396,7 @@ def call_torch_export_onnx(
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
-    available = {None, "", "ir", "os_ort"}
+    available = {None, "", "ir", "os_ort", "ir+default"}
     assert (
         optimization in available
     ), f"unexpected value for optimization={optimization}, available={available}"
@@ -1448,11 +1486,31 @@ def call_torch_export_onnx(
         print(epo)
         print("[call_torch_export_onnx] -- End of ONNXProgram")
 
-    if optimization in {"ir", "os_ort"}:
+    if optimization in {"ir", "os_ort", "ir+default"}:
         if verbose:
             print(f"[call_torch_export_onnx] starts optimization={optimization!r}...")
         if optimization == "ir":
             label, f_optim = "export_onnx_opt_ir", (lambda epo=epo: epo.optimize())
+        elif optimization == "ir+default":
+            import onnxscript
+            from experimental_experiment.xbuilder import GraphBuilder, OptimizationOptions
+
+            def _ir_default_opt(epo):
+                onnxscript.optimizer.optimize_ir(epo.model)
+                onx = epo.model_proto
+                # not very efficient
+                gr = GraphBuilder(
+                    onx,
+                    infer_shapes_options=True,
+                    optimization_options=OptimizationOptions(patterns="default"),
+                )
+                cont = gr.to_onnx(large_model=True)
+                epo.model = cont.to_ir()
+
+            label, f_optim = "export_onnx_opt_ir_default", (
+                lambda epo=epo: _ir_default_opt(epo)
+            )
+
         else:
             import onnxscript
             import onnxscript.rewriter.ort_fusions as ort_fusions
@@ -1851,3 +1909,21 @@ def run_ort_fusion(
         f"opt_ort_{model_type}_duration": duration,
         f"opt_ort_{model_type}_duration_save": d,
     }, {f"opt_ort_{model_type}": output_path}
+
+
+def _compute_final_statistics(summary: Dict[str, Any]):
+    """
+    Updates inline the list of statistics. It adds:
+
+    - speedup
+    """
+    stats = {}
+    if (
+        "time_run_latency" in summary
+        and "time_run_onnx_ort_latency" in summary
+        and summary["time_run_onnx_ort_latency"] > 0
+    ):
+        stats["stat_estimated_speedup_ort"] = (
+            summary["time_run_latency"] / summary["time_run_onnx_ort_latency"]
+        )
+    summary.update(stats)

Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,7 @@ def get_untrained_model_with_inputs(`
`189`	`189`	`f"subfolder={subfolder!r}"`
`190`	`190`	`)`
`191`	`191`	`model = transformers.AutoModel.from_pretrained(`
`192`		`- model_id, subfolder=subfolder, trust_remote_code=True, **mkwargs`
	`192`	`+ model_id, subfolder=subfolder or "", trust_remote_code=True, **mkwargs`
`193`	`193`	`)`
`194`	`194`	`if verbose:`
`195`	`195`	`print(`