require

xadupre · xadupre · commit 1cf41ae6491b · 2025-12-02T19:17:28.000+01:00
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -7,6 +7,7 @@
 from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
     requires_cuda,
+    requires_onnxruntime,
     requires_transformers,
     requires_torch,
     ignore_warnings,
@@ -555,7 +556,7 @@ def _get_seqlen(cls) -> torch.Tensor:
 
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
     @requires_cuda()
-    def test_plug_packed_multi_head_attention_qwen25_packed_float16(self):
+    def test_plug_multi_head_attention_qwen25_packed_float16(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
             qwen_sdpa_attention_packed_versatile,
         )
@@ -583,8 +584,9 @@ def test_plug_packed_multi_head_attention_qwen25_packed_float16(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.01)
         self.assertLess(results.diffs[0]["abs"], 0.01)
 
+    @requires_onnxruntime("1.24")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
-    def test_plug_packed_multi_head_attention_qwen25_loopmha_float16(self):
+    def test_plug_multi_head_attention_qwen25_loopmha_float16(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
             qwen_sdpa_attention_loopmha_versatile,
         )
@@ -617,8 +619,9 @@ def test_plug_packed_multi_head_attention_qwen25_loopmha_float16(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.01)
         self.assertLess(results.diffs[0]["abs"], 0.01)
 
+    @requires_onnxruntime("1.24")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
-    def test_plug_packed_multi_head_attention_qwen25_loopmha_float32(self):
+    def test_plug_multi_head_attention_qwen25_loopmha_float32(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
             qwen_sdpa_attention_loopmha_versatile,
         )
@@ -651,8 +654,9 @@ def test_plug_packed_multi_head_attention_qwen25_loopmha_float32(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=1e-5)
         self.assertLess(results.diffs[0]["abs"], 1e-5)
 
+    @requires_onnxruntime("1.24")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
-    def test_plug_packed_multi_head_attention_qwen25_loopa24_float16(self):
+    def test_plug_multi_head_attention_qwen25_loopa24_float16(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
             qwen_sdpa_attention_loopa24_versatile,
         )
@@ -678,8 +682,9 @@ def test_plug_packed_multi_head_attention_qwen25_loopa24_float16(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.005)
         self.assertLess(results.diffs[0]["abs"], 0.005)
 
+    @requires_onnxruntime("1.24")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
-    def test_plug_packed_multi_head_attention_qwen25_loopa24_float32(self):
+    def test_plug_multi_head_attention_qwen25_loopa24_float32(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
             qwen_sdpa_attention_loopa24_versatile,
         )