@@ -14,7 +14,9 @@ class TestTryExportHuggingFaceHubModel(ExtTestCase):
1414 @ignore_warnings (UserWarning )
1515 def test_imagetext2text_qwen_2_5_vl_instruct_visual (self ):
1616 """
17- clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
17+ clear&&NEVERTEST=1 python _unittests/ut_tasks/try_export.py -k qwen_2_5
18+
19+ possible prefix: ``TEXTDEVICE=cuda TESTDTYPE=float16 EXPORTER=onnx-dynamo
1820
1921 ::
2022
@@ -33,6 +35,15 @@ def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
3335 return_dict:bool
3436 )
3537 """
38+ device = os .environ .get ("TESTDEVICE" , "cpu" )
39+ dtype = os .environ .get ("TESTDTYPE" , "float32" )
40+ torch_dtype = {
41+ "float16" : torch .float16 ,
42+ "bfloat16" : torch .bfloat16 ,
43+ "float32" : torch .float32 ,
44+ }[dtype ]
45+ exporter = os .environ .get ("EXPORTER" , "custom" )
46+
3647 from transformers import AutoModel , AutoProcessor
3748
3849 # model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
@@ -57,28 +68,28 @@ def _config_reduction(config, task):
5768 )
5869 model = data ["model" ]
5970
60- model = model .to ("cpu" ).to (torch . float32 )
71+ model = model .to (device ).to (getattr ( torch , dtype ) )
6172
73+ print (f"-- model.dtype={ model .dtype } " )
6274 print (f"-- model.device={ model .device } " )
6375 processor = AutoProcessor .from_pretrained (model_id , use_fast = True )
6476 print (f"-- processor={ type (processor )} " )
6577
6678 inputs = dict (
67- hidden_states = torch .rand ((1292 , 1176 ), dtype = torch . float32 ),
68- grid_thw = torch .tensor ([[1 , 34 , 38 ]], dtype = torch .int64 ),
79+ hidden_states = torch .rand ((1292 , 1176 ), dtype = torch_dtype ). to ( device ),
80+ grid_thw = torch .tensor ([[1 , 34 , 38 ]], dtype = torch .int64 ). to ( device ) ,
6981 )
7082
7183 print (f"-- inputs: { self .string_type (inputs , with_shape = True )} " )
7284 # this is too long
73- # expected = model.visual(**inputs)
74- # print(f"-- expected: {self.string_type(expected, with_shape=True)}")
85+ expected = model .visual (** inputs )
86+ print (f"-- expected: { self .string_type (expected , with_shape = True )} " )
7587
76- exporter = "custom" # "onnx-dynamo"
7788 filename = self .get_dump_file (
78- f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{ exporter } .onnx"
89+ f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{ device } . { dtype } . { exporter } .onnx"
7990 )
8091 fileep = self .get_dump_file (
81- f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{ exporter } .graph"
92+ f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{ device } . { dtype } . { exporter } .graph"
8293 )
8394 dynamic_shapes = dict (
8495 hidden_states = {0 : "hidden_width" , 1 : "hidden_height" },
@@ -103,8 +114,27 @@ def _config_reduction(config, task):
103114 exporter = exporter ,
104115 verbose = 1 ,
105116 save_ep = fileep ,
117+ target_opset = 22 ,
118+ optimize = True ,
106119 )
107120
121+ self .assert_onnx_disc (
122+ f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{ device } .{ dtype } .{ exporter } " ,
123+ filename ,
124+ model .visual ,
125+ export_inputs ,
126+ verbose = 1 ,
127+ providers = (
128+ ["CUDAExecutionProvider" , "CPUExecutionProvider" ]
129+ if device == "cuda"
130+ else ["CPUExecutionProvider" ]
131+ ),
132+ use_ort = True ,
133+ atol = 0.02 ,
134+ rtol = 10 ,
135+ ort_optimized_graph = False ,
136+ )
137+
108138
109139if __name__ == "__main__" :
110140 unittest .main (verbosity = 2 )
0 commit comments