changes

xadupre · xadupre · commit 0e9b867b47f8 · 2025-06-03T00:01:35.000+02:00
diff --git a/onnx_diagnostic/reference/torch_evaluator.py b/onnx_diagnostic/reference/torch_evaluator.py
@@ -44,6 +44,7 @@ class TorchOnnxEvaluator:
     :param providers: where to run the model
     :param opsets: needed if proto is a graph
     :param functions: known local functions
+    :param verbose: verbosity level
 
     The class holds the following attributes:
 
@@ -56,6 +57,7 @@ class TorchOnnxEvaluator:
     * `last_used`: contains the list of intermediate results,
        to remove after every node execution,
        this avoid the memory to grow too much
+    * `functions`: local functions
 
     The class is not multithreaded. `runtime_info` gets updated
     by the the class. The list of available kernels is returned by function
@@ -68,12 +70,15 @@ def __init__(
         providers: Tuple[str, ...] = ("CPUExecutionProvider",),
         opsets: Optional[Dict[str, int]] = None,
         local_functions: Optional[Dict[Tuple[str, str], "TorchOnnxEvaluator"]] = None,
+        verbose: int = 0,
     ):
+        assert verbose
         self.providers = providers
         self.constants: Dict[str, torch.Tensor] = {}
         self.kernels: List[Optional[torch_ops.OpRun]] = []
         self.functions = local_functions.copy() if local_functions else {}
         self.CPU = torch.tensor([0]).to("cpu").device
+        self.verbose = verbose
         if "CUDAExecutionProvider" in providers:
             self.CUDA = torch.tensor([0]).to("cuda").device
             self.default_device = self.CUDA
@@ -87,8 +92,11 @@ def __init__(
             assert not proto.graph.sparse_initializer, "sparse_initializer not support yet"
             self.opsets = {d.domain: d.version for d in proto.opset_import}
             for f in proto.functions:
-                self.functions[f.domain, f.name] = TorchOnnxEvaluator(
-                    f, providers=providers, local_functions=self.functions
+                self.functions[f.domain, f.name] = self.__class__(
+                    f,
+                    providers=providers,
+                    local_functions=self.functions,
+                    verbose=self.verbose,
                 )
             self._build_initializers(proto.graph.initializer)
             self._build_initializers(proto.graph.node)
@@ -206,22 +214,36 @@ def run(
             if not r.has_value:
                 r.set_value(
                     torch_ops.OpRunValue(
-                        v.to(self.CUDA) if r.is_shape and self.on_cuda else v, True
+                        v.to(self.CUDA) if not r.is_shape and self.on_cuda else v,
+                        is_constant=True,
+                        may_cpu=len(v.shape) == 1 and v.numel() < 8 and v.dtype == torch.int64,
                     )
                 )
+            if self.verbose:
+                print(f"+C {r.name}: {r.string_type()}")
 
         # inputs
         for k, v in feeds.items():
             r = self.runtime_info[k]
             r.set_value(
                 torch_ops.OpRunValue(
-                    v.to(self.CUDA) if r.is_shape and self.on_cuda else v, False
+                    v.to(self.CUDA) if not r.is_shape and self.on_cuda else v,
+                    is_constant=False,
+                    may_cpu=len(v.shape) == 1 and v.numel() < 8 and v.dtype == torch.int64,
                 )
             )
+            if self.verbose:
+                print(f"+I {r.name}: {r.string_type()}")
 
         # node execution
         for it, kernel in enumerate(self.kernels):
             if kernel is not None:
+                if self.verbose:
+                    print(
+                        f"{kernel.__class__.__name__}"
+                        f"({', '.join(kernel.input)}) -> "
+                        f"{', '.join(kernel.output)}"
+                    )
                 # kernel execution
                 inputs = [(self.runtime_info[i].value if i else None) for i in kernel.input]
                 if kernel.has_subgraphs():
@@ -236,26 +258,42 @@ def run(
                     )
                     for name, t in zip(kernel.output, res):
                         self.runtime_info[name].set_value(t)
+                    if self.verbose:
+                        for name in kernel.output:
+                            print(f"+R {name}: {self.runtime_info[name].string_type()}")
                 else:
                     assert isinstance(
                         res, torch_ops.OpRunValue
                     ), f"Unexpected output type {type(res)} for kernel {type(kernel)}."
                     self.runtime_info[kernel.output[0]].set_value(res)
+                    if self.verbose:
+                        print(
+                            f"+R {kernel.output[0]}: "
+                            f"{self.runtime_info[kernel.output[0]].string_type()}"
+                        )
 
             # free intermediate results
             for name in self.last_used[it]:
                 self.runtime_info[name].clean_value()
+                if self.verbose:
+                    print(f"- clean {name}")
 
         assert all(
             self.runtime_info[o].value is not None for o in outputs
         ), "Not implemented yet when one output is None."
         fres = [self.runtime_info[o].value.tensor for o in outputs]  # type: ignore[union-attr]
+        if self.verbose:
+            print(f"++ outputs {', '.join(outputs)}")
 
         # clean previous execution
         for k in feeds:
             self.runtime_info[k].clean_value()
+            if self.verbose:
+                print(f"- clean {k}")
         for o in outputs:
             self.runtime_info[o].clean_value()
+            if self.verbose:
+                print(f"- clean {o}")
 
         if use_numpy:
             return [None if a is None else a.detach().cpu().numpy() for a in fres]
@@ -285,7 +323,9 @@ def run_with_values(
             if not r.has_value:
                 r.set_value(
                     torch_ops.OpRunValue(
-                        v.to(self.CUDA) if r.is_shape and self.on_cuda else v, True
+                        v.to(self.CUDA) if r.is_shape is False and self.on_cuda else v,
+                        is_constant=True,
+                        may_cpu=len(v.shape) == 1 and v.numel() < 8 and v.dtype == torch.int64,
                     )
                 )
 
diff --git a/onnx_diagnostic/reference/torch_ops/_op_run.py b/onnx_diagnostic/reference/torch_ops/_op_run.py
@@ -11,15 +11,32 @@ class OpRunValue:
 
     :param tensor: torch.Tensor
     :param is_constant: is it a constant
+    :param may_cpu: change the device the tensor is if
+        more appropriate
     """
 
     __slots__ = ("cached", "is_constant", "tensor")
 
-    def __init__(self, tensor, is_constant: bool = False):
-        self.tensor = tensor
+    def __init__(self, tensor, is_constant: bool = False, may_cpu: bool = False):
+        self.tensor = (
+            tensor.cpu()
+            if may_cpu
+            and len(tensor.shape) == 1
+            and tensor.numel() < 8
+            and tensor.dtype == torch.int64
+            and tensor.get_device() >= 0
+            else tensor
+        )
         self.is_constant = is_constant
         self.cached: Optional[Tuple[int, ...]] = None
 
+    def string_type(self) -> str:
+        "Returns informations about the value as a string."
+        s = string_type(self.tensor, with_shape=True, with_min_max=True, with_device=True)
+        if self.is_constant:
+            return f"CST({s})"
+        return s
+
     def __repr__(self) -> str:
         "usual"
         if self.is_constant:
@@ -42,6 +59,19 @@ def dtype(self):
     def _tensor_as_tuple_int(self) -> Tuple[int, ...]:
         return tuple(map(int, self.tensor))
 
+    def numel(self) -> int:
+        "Returns the number of elements."
+        return 0 if self.tensor is None else self.tensor.numel()
+
+    def get_device(self) -> int:
+        "Returns the device id."
+        return -1 if self.tensor is None else self.tensor.get_device()
+
+    @property
+    def device(self):
+        "Returns the device."
+        return -1 if self.tensor is None else self.tensor.device
+
     @property
     def as_tuple_int(self) -> Tuple[int, ...]:
         "value as int"
diff --git a/onnx_diagnostic/reference/torch_ops/control_flow.py b/onnx_diagnostic/reference/torch_ops/control_flow.py
@@ -28,6 +28,7 @@ def __init__(
                     providers=parent.providers,
                     opsets=parent.opsets,
                     local_functions=parent.functions,
+                    verbose=parent.verbose,
                 )
                 setattr(self, att.name, rt)
 
diff --git a/onnx_diagnostic/reference/torch_ops/generator_ops.py b/onnx_diagnostic/reference/torch_ops/generator_ops.py
@@ -1,11 +1,35 @@
+from typing import Optional
+import onnx
 import torch
 from . import OpRun, OpRunValue
 
 
 class Range_11(OpRun):
     """Range"""
 
+    @classmethod
+    def device_dependent(cls) -> bool:
+        """
+        Returns True if the kernel needs a device to be efficiently initialized.
+        """
+        return True
+
+    def __init__(
+        self,
+        node: onnx.NodeProto,
+        version: Optional[int] = None,
+        device: Optional[torch.device] = None,
+    ):
+        super().__init__(node, version)
+        self.device = device
+
     def run(self, starts: OpRunValue, limit: OpRunValue, delta: OpRunValue) -> OpRunValue:
         return OpRunValue(
-            torch.arange(starts.tensor, limit.tensor, delta.tensor, dtype=starts.dtype)
+            torch.arange(
+                starts.tensor,
+                limit.tensor,
+                delta.tensor,
+                dtype=starts.dtype,
+                device=self.device,
+            )
         )
diff --git a/onnx_diagnostic/torch_models/test_helper.py b/onnx_diagnostic/torch_models/test_helper.py
@@ -968,7 +968,11 @@ def _mk(key):
             )
         )
         if runtime == "onnxruntime"
-        else (lambda model, providers: TorchOnnxEvaluator(model, providers=providers))
+        else (
+            lambda model, providers: TorchOnnxEvaluator(
+                model, providers=providers, verbose=max(verbose - 1, 0)
+            )
+        )
     )
     sess = _quiet_or_not_quiet(
         quiet,
diff --git a/onnx_diagnostic/torch_onnx/runtime_info.py b/onnx_diagnostic/torch_onnx/runtime_info.py
@@ -80,7 +80,11 @@ def __repr__(self) -> str:
             if v is not None:
                 ad[att] = v
         if self.value is not None:
-            ad["value"] = string_type(self.value, with_shape=True)
+            ad["value"] = (
+                self.value.string_type()
+                if hasattr(self.value, "string_type")
+                else string_type(self.value, with_shape=True)
+            )
         msg = ", ".join(
             f"{name}={t.to_str()}" if hasattr(t, "to_str") else f"{name}={t}"
             for name, t in ad.items()
@@ -92,9 +96,32 @@ def has_value(self) -> bool:
         "Tells if value is specified."
         return self.value is not None
 
+    def string_type(self) -> str:
+        "Returns a string describing the value."
+        rows = []
+        if self.shape is not None:
+            rows.append(f"shape={self.shape}")
+        if self.is_shape is not None:
+            rows.append(f"is_shape={self.is_shape}")
+        if self.device is not None:
+            rows.append(f"device={self.device}")
+        text = f", {', '.join(rows)}" if rows else ""
+        if self.value is None:
+            return (
+                f"RuntimeValue(name={self.name!r}{text}"
+                f", dtype={self.dtype}, kind={self.kind})"
+            )
+        return (
+            f"RuntimeValue(name={self.name!r}, "
+            f"kind={self.kind}{text}, value={self.value.string_type()})"
+        )
+
     def set_value(self, value: torch.Tensor):
         """Sets the value."""
         assert value is not None, "Use clean_value to set a value to None"
+        assert (
+            self.name != "position_ids" or value.get_device() >= 0
+        ), f"{value} - is_shape={self.is_shape}"
         self.value = value
         if self.dtype:
             assert (

Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@ def __init__(`
`28`	`28`	`providers=parent.providers,`
`29`	`29`	`opsets=parent.opsets,`
`30`	`30`	`local_functions=parent.functions,`
	`31`	`+ verbose=parent.verbose,`
`31`	`32`	`)`
`32`	`33`	`setattr(self, att.name, rt)`
`33`	`34`
Original file line number	Diff line number	Diff line change
`@@ -968,7 +968,11 @@ def _mk(key):`
`968`	`968`	`)`
`969`	`969`	`)`
`970`	`970`	`if runtime == "onnxruntime"`
`971`		`- else (lambda model, providers: TorchOnnxEvaluator(model, providers=providers))`
	`971`	`+ else (`
	`972`	`+ lambda model, providers: TorchOnnxEvaluator(`
	`973`	`+ model, providers=providers, verbose=max(verbose - 1, 0)`
	`974`	`+ )`
	`975`	`+ )`
`972`	`976`	`)`
`973`	`977`	`sess = _quiet_or_not_quiet(`
`974`	`978`	`quiet,`