better

xadupre · xadupre · commit e8d2d12e5145 · 2025-11-17T15:29:48.000Z
diff --git a/_unittests/ut_tasks/try_export.py b/_unittests/ut_tasks/try_export.py
@@ -115,6 +115,15 @@ def _config_reduction(config, task):
             verbose=1,
             stop_if_static=2,
         ):
+            if exporter == "onnx-dynamo":
+                # The exported program in ONNXProgram cannot be restored.
+                ep2 = torch.export.export(
+                    model.visual,
+                    (),
+                    kwargs=export_inputs,
+                    dynamic_shapes=self.use_dyn_not_str(dynamic_shapes),
+                )
+                torch.export.save(ep2, f"{fileep}.backup.pt2")
             to_onnx(
                 model.visual,
                 kwargs=export_inputs,
@@ -127,7 +136,10 @@ def _config_reduction(config, task):
                 optimize=True,
             )
 
-        pt2_file = f"{fileep}.ep.pt2"
+        pt2_files = [f"{fileep}.backup.pt2", f"{fileep}.ep.pt2", f"{fileep}.pt2"]
+        pt2_file = [f for f in pt2_files if os.path.exists(f)]
+        assert pt2_file, f"Unable to find an existing file among {pt2_files}"
+        pt2_file = pt2_file[0]
         # self.assertExists(pt2_file)
         # ep = torch.export.load(pt2_file)
         # diff = self.max_diff(ep.module()(**export_inputs), model.visual(**export_inputs))
diff --git a/onnx_diagnostic/export/api.py b/onnx_diagnostic/export/api.py
@@ -112,6 +112,10 @@ def to_onnx(
             ort_fusions.optimize_for_ort(epo.model)
         if filename:
             epo.save(filename, external_data=True)
+        if save_ep:
+            if isinstance(save_ep, tuple):
+                save_ep = save_ep[0]
+            torch.export.save(epo.exported_program, f"{save_ep}.pt2")
         return epo
 
     if exporter == "modelbuilder":
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -1301,7 +1301,7 @@ def assert_onnx_disc(
 
                 ep = torch.export.load(ep)
             ep_inputs = copy.deepcopy(inputs) if copy_inputs else inputs
-            ep_model = ep.module()
+            ep_model = ep.module()  # type: ignore[union-attr]
             ep_expected = (
                 ep_model(*copy.deepcopy(ep_inputs))
                 if isinstance(ep_inputs, tuple)
@@ -1356,6 +1356,11 @@ def max_diff(self, *args, **kwargs):
 
         return max_diff(*args, **kwargs)
 
+    def use_dyn_not_str(self, *args, **kwargs):
+        from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
+
+        return use_dyn_not_str(*args, *kwargs)
+
     def subloop(self, *args, verbose: int = 0):
         "Loops over elements and calls :meth:`unittests.TestCase.subTest`."
         if len(args) == 1:
diff --git a/onnx_diagnostic/helpers/ort_session.py b/onnx_diagnostic/helpers/ort_session.py
@@ -134,7 +134,13 @@ def __init__(
 
         self.sess = sess
         self.input_names = [i.name for i in sess.get_inputs()]
+        assert (
+            "" not in self.input_names
+        ), f"Input name cannot be empty but input_names={self.input_names}"
         self.output_names = [i.name for i in sess.get_outputs()]
+        assert (
+            "" not in self.input_names
+        ), f"Output name cannot be empty but output_names={self.output_names}"
         self.input_shapes = [i.shape for i in sess.get_inputs()]
         self.output_shapes = [i.shape for i in sess.get_outputs()]
         self.input_types = [i.type for i in sess.get_inputs()]
@@ -497,6 +503,7 @@ def run_dlpack(
         values = ORTC.OrtValueVector()
         device = -1
         for k, v in feeds.items():
+            assert k != "", f"Input cannot be empty but feeds names={list(feeds)}"
             device = max(device, v.get_device())
             assert hasattr(v, "__dlpack__"), f"class {type(v)} should be serialized"
             if not v.is_contiguous():
diff --git a/onnx_diagnostic/reference/ort_evaluator.py b/onnx_diagnostic/reference/ort_evaluator.py
@@ -564,18 +564,14 @@ def _run(self, node: NodeProto, inputs: List[Any], results: Dict[str, Any]) -> L
             onx, sess = self._get_sess(node, inputs)
             self._cache[key] = onx, sess
 
-        feeds = dict(zip(node.input, inputs))
-        if "" in feeds:
-            cls = None
-            for k, v in feeds.items():
-                if k != "":
-                    cls = v.__class__
-                    break
-            assert (
-                cls is not None
-            ), f"Unable to get input class (array or tensor), feeds={string_type(feeds)}"
-            feeds[""] = cls([0])
-
+        feeds = {}
+        for i, val in zip(node.input, inputs):
+            if i == "":
+                assert (
+                    val is None
+                ), f"input name={i!r} but val={string_type(val, with_shape=True)}"
+                continue
+            feeds[i] = val
         assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}"
         outputs = list(sess.run(None, feeds))
         assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}"
diff --git a/onnx_diagnostic/torch_onnx/sbs.py b/onnx_diagnostic/torch_onnx/sbs.py
@@ -567,10 +567,11 @@ def _loop_cmp(
             print(f"[run_aligned-nx] +inp: {inp.name}: {string_type(v, **str_kws)}")
 
     placeholders = {node.name for node in ep.graph.nodes if node.op == "placeholder"}
-    ep_state_dict = {**ep.state_dict, **dict(ep.named_buffers())}
+    ep_state_dict = {**ep.state_dict, **dict(ep.named_buffers(), **ep.tensor_constants)}
     placeholders_to_state_dict = {
         **{f"p_{name.replace('.', '_')}": name for name in ep.state_dict},
         **{f"b_{name.replace('.', '_')}": name for name, _ in ep.named_buffers()},
+        **{f"c_{name.replace('.', '_')}": name for name in ep.tensor_constants},
     }
     for n in onnx_results:
         if n not in placeholders:
@@ -588,6 +589,7 @@ def _loop_cmp(
     else:
         loop = list(enumerate(ep_graph_nodes))
 
+    already_run = set()
     ep_durations = {}
     yielded_nodes = 0
     max_abs = 0
@@ -641,8 +643,8 @@ def _loop_cmp(
                 yield record
             else:
                 assert node.name in placeholders_to_state_dict, (
-                    f"Unable to find placeholder {node.name!r} in "
-                    f"{sorted(placeholders_to_state_dict)}"
+                    f"Unable to find placeholder {node.name!r} (node.op={node.op!r}), "
+                    f"existing: {sorted(placeholders_to_state_dict)}"
                 )
                 torch_results[node.name] = ep_state_dict[placeholders_to_state_dict[node.name]]
                 if verbose > 1:
@@ -683,6 +685,8 @@ def _loop_cmp(
             continue
 
         for i_onnx in range(last_position, max_pos + 1):
+            if i_onnx in already_run:
+                continue
             node = onx.graph.node[i_onnx]
             if verbose > 1:
                 print(
@@ -695,9 +699,16 @@ def _loop_cmp(
                     f"mapped {yielded_nodes} maxabs {max_abs:1.5f}"
                 )
             ref = run_cls(node, **run_cls_kwargs)
-            feeds = {k: onnx_results[k] for k in node.input}
+            feeds = {k: onnx_results[k] for k in node.input if k}
+            assert "" not in feeds, f"Unexpected feeds={string_type(feeds, **str_kws)}"
             begin = time.perf_counter()
-            res = ref.run(None, feeds)  # type: ignore[attr-defined]
+            try:
+                res = ref.run(None, feeds)  # type: ignore[attr-defined]
+            except Exception as e:
+                raise RuntimeError(
+                    f"Unable to run node {node.op_type}, domain={node.domain} "
+                    f"with inputs={node.input}, feeds={string_type(feeds, **str_kws)}"
+                ) from e
             duration = time.perf_counter() - begin
             assert (
                 not has_cuda
@@ -748,6 +759,7 @@ def _loop_cmp(
                     if tmp.err_abs is not None:
                         max_abs = max(max_abs, tmp.err_abs)
                     yield tmp
+            already_run.add(i_onnx)
 
         last_position = max_pos + 1
 
@@ -758,14 +770,17 @@ def _loop_cmp(
             f"to {len(onx.graph.node)}"
         )
     for i_onnx in range(last_position, len(onx.graph.node)):
+        if i_onnx in already_run:
+            continue
         node = onx.graph.node[i_onnx]
         if verbose > 1:
             print(
                 f"[run_aligned] run onx.graph.node[{i_onnx}]: "
                 f"{node.op_type}({', '.join(node.input)}) -> {', '.join(node.output)}"
             )
         ref = run_cls(node, **run_cls_kwargs)
-        feeds = {k: onnx_results[k] for k in node.input}
+        feeds = {k: onnx_results[k] for k in node.input if k}
+        assert "" not in feeds, f"Unexpected feeds={string_type(feeds, **str_kws)}"
         begin = time.perf_counter()
         res = ref.run(None, feeds)  # type: ignore[attr-defined]
         duration = time.perf_counter() - begin
@@ -800,6 +815,8 @@ def _loop_cmp(
                 if tmp.err_abs is not None:
                     max_abs = max(max_abs, tmp.err_abs)
                 yield tmp
+        already_run.add(i_onnx)
+
     if verbose:
         print(f"[run_aligned] done with {yielded_nodes} mapped nodes")
         print(f"[run_aligned] max absolution error={max_abs}")