1414
1515class TestTasksImageTextToText (ExtTestCase ):
1616 @hide_stdout ()
17- @requires_transformers ("4.53 " )
17+ @requires_transformers ("4.56 " )
1818 @requires_torch ("2.7.99" )
1919 def test_image_text_to_text_idefics (self ):
2020 mid = "HuggingFaceM4/tiny-random-idefics"
2121 data = get_untrained_model_with_inputs (mid , verbose = 1 , add_second_input = True )
2222 self .assertEqual (data ["task" ], "image-text-to-text" )
2323 model , inputs , ds = data ["model" ], data ["inputs" ], data ["dynamic_shapes" ]
24- model (** torch_deepcopy (inputs ))
24+ expected = model (** torch_deepcopy (inputs ))
2525 model (** data ["inputs2" ])
2626 with torch_export_patches (patch_transformers = True , verbose = 10 , patch_torch = False ):
27- torch .export .export (
27+ ep = torch .export .export (
2828 model , (), kwargs = inputs , dynamic_shapes = use_dyn_not_str (ds ), strict = False
2929 )
30+ # The conversion does not work. Tolerance is set to 1.
31+ self .assertEqualAny (expected , ep .module ()(** inputs ), atol = 1 )
3032
3133 @hide_stdout ()
3234 @requires_transformers ("5.0.99" )
@@ -44,12 +46,13 @@ def test_image_text_to_text_tiny_gemma3(self):
4446 # self.assertIn((data["size"], data["n_weights"]), [(17248576, 4312144)])
4547 model , inputs , ds = data ["model" ], data ["inputs" ], data ["dynamic_shapes" ]
4648 print ("--" , self .string_type (data ["inputs" ], with_shape = True ))
47- model (** torch_deepcopy (inputs ))
49+ expected = model (** torch_deepcopy (inputs ))
4850 model (** data ["inputs2" ])
4951 with torch_export_patches (patch_transformers = True , verbose = 10 ):
50- torch .export .export (
52+ ep = torch .export .export (
5153 model , (), kwargs = inputs , dynamic_shapes = use_dyn_not_str (ds ), strict = False
5254 )
55+ self .assertEqualAny (expected , ep .module ()(** inputs ))
5356
5457 @hide_stdout ()
5558 @requires_transformers ("4.56.99" )
@@ -72,11 +75,13 @@ def test_image_text_to_text_gemma3_4b_it(self):
7275 model , inputs , ds = data ["model" ], data ["inputs" ], data ["dynamic_shapes" ]
7376 # inputs.pop("attention_mask")
7477 # ds.pop("attention_mask")
75- model (** torch_deepcopy (inputs ))
78+ expected = model (** torch_deepcopy (inputs ))
7679 with torch_export_patches (patch_transformers = True , verbose = 10 ):
77- torch .export .export (
80+ ep = torch .export .export (
7881 model , (), kwargs = inputs , dynamic_shapes = use_dyn_not_str (ds ), strict = False
7982 )
83+ # The conversion does not work. Tolerance is set to 1.
84+ self .assertEqualAny (expected , ep .module ()(** inputs ))
8085
8186 @hide_stdout ()
8287 @requires_transformers ("5.0.99" )
@@ -93,12 +98,13 @@ def test_image_text_to_text_zai_glm(self):
9398 self .assertEqual (data ["task" ], "image-text-to-text" )
9499 model , inputs , ds = data ["model" ], data ["inputs" ], data ["dynamic_shapes" ]
95100 print ("--" , self .string_type (data ["inputs" ], with_shape = True ))
96- model (** torch_deepcopy (inputs ))
101+ expected = model (** torch_deepcopy (inputs ))
97102 model (** data ["inputs2" ])
98103 with torch_export_patches (patch_transformers = True , verbose = 10 ):
99- torch .export .export (
104+ ep = torch .export .export (
100105 model , (), kwargs = inputs , dynamic_shapes = use_dyn_not_str (ds ), strict = False
101106 )
107+ self .assertEqualAny (expected , ep .module ()(** inputs ))
102108
103109
104110if __name__ == "__main__" :
0 commit comments