patch

xadupre · xadupre · commit 8f444df7e374 · 2025-11-26T10:40:21.000+01:00
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.8.3
 +++++
 
+* :pr:`323`: drops torch 2.8 on CI
 * :pr:`322`: support rerunning onnx kernels with torch intermediate results in side-by-side
 * :pr:`314`: fix modelbuilder download needed after this change https://github.com/microsoft/onnxruntime-genai/pull/1862
 * :pr:`311`: use custom and local function to use PackedMultiHeadAttention from onnxruntime
diff --git a/_unittests/ut_tasks/try_export.py b/_unittests/ut_tasks/try_export.py
@@ -1,4 +1,5 @@
 import os
+import time
 import unittest
 import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, never_test, ignore_warnings
@@ -45,6 +46,7 @@ def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
             EXPORTER=custom \\
             python _unittests/ut_tasks/try_export.py -k qwen_2_5_vl_instruct_visual
         """
+        begin = time.perf_counter()
         device = os.environ.get("TESTDEVICE", "cpu")
         dtype = os.environ.get("TESTDTYPE", "float32")
         torch_dtype = {
@@ -87,13 +89,18 @@ def _config_reduction(config, task):
             )
             model = data["model"]
 
+        print(f"-- MODEL LOADED IN {time.perf_counter() - begin}")
+        begin = time.perf_counter()
         model = model.to(device).to(getattr(torch, dtype))
+        print(f"-- MODEL MOVED IN {time.perf_counter() - begin}")
 
         print(f"-- config._attn_implementation={model.config._attn_implementation}")
         print(f"-- model.dtype={model.dtype}")
         print(f"-- model.device={model.device}")
+        begin = time.perf_counter()
         processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
         print(f"-- processor={type(processor)}")
+        print(f"-- PROCESSOR LOADED IN {time.perf_counter() - begin}")
 
         big_inputs = dict(
             hidden_states=torch.rand((14308, 1176), dtype=torch_dtype).to(device),
@@ -104,14 +111,19 @@ def _config_reduction(config, task):
             hidden_states=torch.rand((1292, 1176), dtype=torch_dtype).to(device),
             grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
         )
-        print("-- save inputs")
-        torch.save(big_inputs, self.get_dump_file("qwen_2_5_vl_instruct_visual.inputs.big.pt"))
-        torch.save(inputs, self.get_dump_file("qwen_2_5_vl_instruct_visual.inputs.pt"))
+        if not self.unit_test_going():
+            print("-- save inputs")
+            torch.save(
+                big_inputs, self.get_dump_file("qwen_2_5_vl_instruct_visual.inputs.big.pt")
+            )
+            torch.save(inputs, self.get_dump_file("qwen_2_5_vl_instruct_visual.inputs.pt"))
 
         print(f"-- inputs: {self.string_type(inputs, with_shape=True)}")
         # this is too long
         model_to_export = model.visual if hasattr(model, "visual") else model.model.visual
+        begin = time.perf_counter()
         expected = model_to_export(**inputs)
+        print(f"-- MODEL RUN IN {time.perf_counter() - begin}")
         print(f"-- expected: {self.string_type(expected, with_shape=True)}")
 
         filename = self.get_dump_file(
@@ -126,6 +138,7 @@ def _config_reduction(config, task):
         )
 
         # fake_inputs = make_fake_with_dynamic_dimensions(inputs, dynamic_shapes)[0]
+        begin = time.perf_counter()
         export_inputs = inputs
         print()
         with torch_export_patches(
@@ -148,14 +161,21 @@ def _config_reduction(config, task):
                 onnx_plugs=PLUGS,
             )
 
+        print(f"-- MODEL CONVERTED IN {time.perf_counter() - begin}")
+
         pt2_files = [f"{fileep}.backup.pt2", f"{fileep}.ep.pt2", f"{fileep}.pt2"]
-        pt2_file = [f for f in pt2_files if os.path.exists(f)]
-        assert pt2_file, f"Unable to find an existing file among {pt2_files}"
-        pt2_file = pt2_file[0]
+        pt2_files = [f for f in pt2_files if os.path.exists(f)]
+        assert (
+            self.unit_test_going() or pt2_files
+        ), f"Unable to find an existing file among {pt2_files!r}"
+        pt2_file = (
+            (pt2_files[0] if pt2_files else None) if not self.unit_test_going() else None
+        )
         # self.assertExists(pt2_file)
         # ep = torch.export.load(pt2_file)
         # diff = self.max_diff(ep.module()(**export_inputs), model.visual(**export_inputs))
         # print("----------- diff", diff)
+        begin = time.perf_counter()
         self.assert_onnx_disc(
             f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}",
             filename,
@@ -171,9 +191,10 @@ def _config_reduction(config, task):
             atol=0.02,
             rtol=10,
             ort_optimized_graph=False,
-            # ep=pt2_file,
+            ep=pt2_file,
             expected=expected,
         )
+        print(f"-- MODEL VERIFIED IN {time.perf_counter() - begin}")
         if self.unit_test_going():
             self.clean_dump()
 
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -743,15 +743,15 @@ class ExtTestCase(unittest.TestCase):
     _warns: List[Tuple[str, int, Warning]] = []
     _todos: List[Tuple[Callable, str]] = []
 
-    def unit_test_going(self):
+    def unit_test_going(self) -> bool:
         """
         Enables a flag telling the script is running while testing it.
         Avois unit tests to be very long.
         """
         return unit_test_going()
 
     @property
-    def verbose(self):
+    def verbose(self) -> int:
         "Returns the the value of environment variable ``VERBOSE``."
         return int(os.environ.get("VERBOSE", "0"))
 
@@ -776,13 +776,13 @@ def todo(cls, f: Callable, msg: str):
         cls._todos.append((f, msg))
 
     @classmethod
-    def ort(cls):
+    def ort(cls) -> unittest.__class__:
         import onnxruntime
 
         return onnxruntime
 
     @classmethod
-    def to_onnx(self, *args, **kwargs):
+    def to_onnx(self, *args, **kwargs) -> "ModelProto":  # noqa: F821
         from experimental_experiment.torch_interpreter import to_onnx
 
         return to_onnx(*args, **kwargs)
@@ -823,12 +823,7 @@ def clean_dump(self, folder: str = "dump_test"):
             elif os.path.isdir(item_path):
                 shutil.rmtree(item_path)
 
-    def dump_onnx(
-        self,
-        name: str,
-        proto: Any,
-        folder: Optional[str] = None,
-    ) -> str:
+    def dump_onnx(self, name: str, proto: Any, folder: Optional[str] = None) -> str:
         """Dumps an onnx file."""
         fullname = self.get_dump_file(name, folder=folder)
         with open(fullname, "wb") as f:
@@ -1111,7 +1106,9 @@ def assertAlmostEqual(
             value = numpy.array(value).astype(expected.dtype)
         self.assertEqualArray(expected, value, atol=atol, rtol=rtol)
 
-    def check_ort(self, onx: "onnx.ModelProto") -> bool:  # noqa: F821
+    def check_ort(
+        self, onx: "onnx.ModelProto"  # noqa: F821
+    ) -> "onnxruntime.InferenceSession":  # noqa: F821
         from onnxruntime import InferenceSession
 
         return InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
@@ -1154,7 +1151,7 @@ def assertEndsWith(self, suffix: str, full: str):
         if not full.endswith(suffix):
             raise AssertionError(f"suffix={suffix!r} does not end string  {full!r}.")
 
-    def capture(self, fct: Callable):
+    def capture(self, fct: Callable) -> Tuple[Any, str, str]:
         """
         Runs a function and capture standard output and error.
 
@@ -1250,7 +1247,7 @@ def assert_onnx_disc(
                     proto, onnx.ModelProto
                 ), f"Unexpected type {type(proto)} for proto"
                 name = self.dump_onnx(name, proto)
-            if verbose:
+            if verbose and not self.unit_test_going():
                 print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
         if verbose:
             print(f"[{vname}] make feeds {string_type(inputs, **kws)}")
@@ -1262,15 +1259,14 @@ def assert_onnx_disc(
             feeds = make_feeds(proto, inputs, use_numpy=True, copy=True)
             import onnxruntime
 
-            if verbose:
-                print(f"[{vname}] create onnxruntime.InferenceSession")
             options = onnxruntime.SessionOptions()
             if ort_optimized_graph:
                 options.optimized_model_filepath = f"{name}.optort.onnx"
+            providers = kwargs.get("providers", ["CPUExecutionProvider"])
+            if verbose:
+                print(f"[{vname}] create onnxruntime.InferenceSession with {providers}")
             sess = onnxruntime.InferenceSession(
-                proto.SerializeToString(),
-                options,
-                providers=kwargs.get("providers", ["CPUExecutionProvider"]),
+                proto.SerializeToString(), options, providers=providers
             )
             if verbose:
                 print(f"[{vname}] run ort feeds {string_type(feeds, **kws)}")