1- import os
21import unittest
32from typing import Optional
43import numpy as np
4+ import onnx
5+ import onnx .helper as oh
56from onnx import TensorProto
67from onnx .helper import (
7- make_graph ,
8- make_model ,
98 make_node ,
109 make_tensor_value_info ,
1110 make_opsetid ,
1211)
13- from onnx_diagnostic .ext_test_case import ExtTestCase
12+ from onnx_diagnostic .helpers import from_array_extended
13+ from onnx_diagnostic .ext_test_case import ExtTestCase , has_cuda
1414from onnx_diagnostic .reference import ExtendedReferenceEvaluator
1515
1616
@@ -23,28 +23,28 @@ def _range(self, *shape, bias: Optional[float] = None):
2323 return x .reshape (tuple (shape )).astype (np .float32 )
2424
2525 def test_fused_matmul (self ):
26- model = make_model (
27- make_graph (
26+ model = oh . make_model (
27+ oh . make_graph (
2828 [make_node ("FusedMatMul" , ["X" , "Y" ], ["Z" ], domain = "com.microsoft" )],
2929 "name" ,
3030 [
31- make_tensor_value_info ("X" , TensorProto .FLOAT , None ),
32- make_tensor_value_info ("Y" , TensorProto .FLOAT , None ),
31+ oh . make_tensor_value_info ("X" , TensorProto .FLOAT , None ),
32+ oh . make_tensor_value_info ("Y" , TensorProto .FLOAT , None ),
3333 ],
3434 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
3535 ),
36- opset_imports = [make_opsetid ("" , 18 ), make_opsetid ("com.microsoft" , 1 )],
36+ opset_imports = [make_opsetid ("" , 18 ), oh . make_opsetid ("com.microsoft" , 1 )],
3737 )
3838 ref = ExtendedReferenceEvaluator (model )
3939 a = np .arange (4 ).reshape (- 1 , 2 )
4040 got = ref .run (None , {"X" : a , "Y" : a })
4141 self .assertEqualArray (a @ a , got [0 ])
4242
4343 def test_fused_matmul11 (self ):
44- model = make_model (
45- make_graph (
44+ model = oh . make_model (
45+ oh . make_graph (
4646 [
47- make_node (
47+ oh . make_node (
4848 "FusedMatMul" ,
4949 ["X" , "Y" ],
5050 ["Z" ],
@@ -55,30 +55,30 @@ def test_fused_matmul11(self):
5555 ],
5656 "name" ,
5757 [
58- make_tensor_value_info ("X" , TensorProto .FLOAT , None ),
59- make_tensor_value_info ("Y" , TensorProto .FLOAT , None ),
58+ oh . make_tensor_value_info ("X" , TensorProto .FLOAT , None ),
59+ oh . make_tensor_value_info ("Y" , TensorProto .FLOAT , None ),
6060 ],
6161 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
6262 ),
63- opset_imports = [make_opsetid ("" , 18 ), make_opsetid ("com.microsoft" , 1 )],
63+ opset_imports = [make_opsetid ("" , 18 ), oh . make_opsetid ("com.microsoft" , 1 )],
6464 )
6565 ref = ExtendedReferenceEvaluator (model )
6666 a = np .arange (4 ).reshape (- 1 , 2 )
6767 got = ref .run (None , {"X" : a , "Y" : a })
6868 self .assertEqualArray (a .T @ a .T , got [0 ])
6969
7070 def test_memcpy (self ):
71- model = make_model (
72- make_graph (
71+ model = oh . make_model (
72+ oh . make_graph (
7373 [
74- make_node ("MemcpyToHost" , ["X" ], ["Z" ]),
75- make_node ("MemcpyFromHost" , ["X" ], ["Z" ]),
74+ oh . make_node ("MemcpyToHost" , ["X" ], ["Z" ]),
75+ oh . make_node ("MemcpyFromHost" , ["X" ], ["Z" ]),
7676 ],
7777 "name" ,
7878 [make_tensor_value_info ("X" , TensorProto .FLOAT , None )],
7979 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
8080 ),
81- opset_imports = [make_opsetid ("" , 18 ), make_opsetid ("com.microsoft" , 1 )],
81+ opset_imports = [make_opsetid ("" , 18 ), oh . make_opsetid ("com.microsoft" , 1 )],
8282 ir_version = 9 ,
8383 )
8484 a = np .arange (4 ).reshape (- 1 , 2 ).astype (np .float32 )
@@ -90,10 +90,10 @@ def test_quick_gelu(self):
9090 from onnxruntime import InferenceSession
9191
9292 for alpha in [0.0 , 2.0 ]:
93- model = make_model (
94- make_graph (
93+ model = oh . make_model (
94+ oh . make_graph (
9595 [
96- make_node (
96+ oh . make_node (
9797 "QuickGelu" ,
9898 ["X" ],
9999 ["Z" ],
@@ -105,7 +105,7 @@ def test_quick_gelu(self):
105105 [make_tensor_value_info ("X" , TensorProto .FLOAT , None )],
106106 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
107107 ),
108- opset_imports = [make_opsetid ("" , 18 ), make_opsetid ("com.microsoft" , 1 )],
108+ opset_imports = [make_opsetid ("" , 18 ), oh . make_opsetid ("com.microsoft" , 1 )],
109109 ir_version = 9 ,
110110 )
111111 sess = InferenceSession (
@@ -118,10 +118,10 @@ def test_quick_gelu(self):
118118 self .assertEqualArray (expected [0 ], got [0 ])
119119
120120 def test_scatter_elements (self ):
121- model = make_model (
122- make_graph (
121+ model = oh . make_model (
122+ oh . make_graph (
123123 [
124- make_node (
124+ oh . make_node (
125125 "ScatterElements" ,
126126 ["data" , "indices" , "updates" ],
127127 ["Z" ],
@@ -131,9 +131,9 @@ def test_scatter_elements(self):
131131 ],
132132 "name" ,
133133 [
134- make_tensor_value_info ("data" , TensorProto .FLOAT , None ),
135- make_tensor_value_info ("indices" , TensorProto .INT64 , None ),
136- make_tensor_value_info ("updates" , TensorProto .FLOAT , None ),
134+ oh . make_tensor_value_info ("data" , TensorProto .FLOAT , None ),
135+ oh . make_tensor_value_info ("indices" , TensorProto .INT64 , None ),
136+ oh . make_tensor_value_info ("updates" , TensorProto .FLOAT , None ),
137137 ],
138138 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
139139 ),
@@ -152,10 +152,10 @@ def test_scatter_elements(self):
152152 def test_skip_layer_normalization_nobias (self ):
153153 import onnxruntime
154154
155- model = make_model (
156- make_graph (
155+ model = oh . make_model (
156+ oh . make_graph (
157157 [
158- make_node (
158+ oh . make_node (
159159 "SkipLayerNormalization" ,
160160 ["x" , "skip" , "beta" , "gamma" ],
161161 ["Z" ],
@@ -165,14 +165,14 @@ def test_skip_layer_normalization_nobias(self):
165165 ],
166166 "name" ,
167167 [
168- make_tensor_value_info ("x" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
169- make_tensor_value_info ("skip" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
170- make_tensor_value_info ("beta" , TensorProto .FLOAT , ["c" ]),
171- make_tensor_value_info ("gamma" , TensorProto .FLOAT , ["c" ]),
168+ oh . make_tensor_value_info ("x" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
169+ oh . make_tensor_value_info ("skip" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
170+ oh . make_tensor_value_info ("beta" , TensorProto .FLOAT , ["c" ]),
171+ oh . make_tensor_value_info ("gamma" , TensorProto .FLOAT , ["c" ]),
172172 ],
173173 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
174174 ),
175- opset_imports = [make_opsetid ("" , 18 ), make_opsetid ("com.microsoft" , 1 )],
175+ opset_imports = [make_opsetid ("" , 18 ), oh . make_opsetid ("com.microsoft" , 1 )],
176176 ir_version = 10 ,
177177 )
178178 feeds = dict (
@@ -193,10 +193,10 @@ def test_skip_layer_normalization_nobias(self):
193193 def test_skip_layer_normalization_bias (self ):
194194 import onnxruntime
195195
196- model = make_model (
197- make_graph (
196+ model = oh . make_model (
197+ oh . make_graph (
198198 [
199- make_node (
199+ oh . make_node (
200200 "SkipLayerNormalization" ,
201201 ["x" , "skip" , "beta" , "gamma" , "bias" ],
202202 ["Z" ],
@@ -206,15 +206,15 @@ def test_skip_layer_normalization_bias(self):
206206 ],
207207 "name" ,
208208 [
209- make_tensor_value_info ("x" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
210- make_tensor_value_info ("skip" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
211- make_tensor_value_info ("beta" , TensorProto .FLOAT , ["c" ]),
212- make_tensor_value_info ("gamma" , TensorProto .FLOAT , ["c" ]),
213- make_tensor_value_info ("bias" , TensorProto .FLOAT , ["c" ]),
209+ oh . make_tensor_value_info ("x" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
210+ oh . make_tensor_value_info ("skip" , TensorProto .FLOAT , ["a" , "b" , "c" ]),
211+ oh . make_tensor_value_info ("beta" , TensorProto .FLOAT , ["c" ]),
212+ oh . make_tensor_value_info ("gamma" , TensorProto .FLOAT , ["c" ]),
213+ oh . make_tensor_value_info ("bias" , TensorProto .FLOAT , ["c" ]),
214214 ],
215215 [make_tensor_value_info ("Z" , TensorProto .FLOAT , None )],
216216 ),
217- opset_imports = [make_opsetid ("" , 18 ), make_opsetid ("com.microsoft" , 1 )],
217+ opset_imports = [make_opsetid ("" , 18 ), oh . make_opsetid ("com.microsoft" , 1 )],
218218 ir_version = 10 ,
219219 )
220220 feeds = dict (
@@ -233,10 +233,159 @@ def test_skip_layer_normalization_bias(self):
233233 self .assertEqual (len (expected ), len (got ))
234234 self .assertEqualArrayAny (expected , got , atol = 1e-3 )
235235
236- def test_attention (self ):
237- path = os .path .join (
238- os .path .dirname (__file__ ), "data" , "test_attention_pattern_1_4d_cpu.onnx"
236+ def _get_model_attention (self ) -> onnx .ModelProto :
237+ # Obtained with:
238+ # python -m onnx_array_api translate -a onnx-short -m <model.onnx>
239+ opset_imports = [
240+ oh .make_opsetid ("pkg.onnxscript.torch_lib.common" , 1 ),
241+ oh .make_opsetid ("" , 18 ),
242+ oh .make_opsetid ("pkg.onnxscript.torch_lib" , 1 ),
243+ oh .make_opsetid ("pkg.torch.__subgraph__" , 1 ),
244+ oh .make_opsetid ("com.microsoft" , 1 ),
245+ ]
246+ inputs = []
247+ outputs = []
248+ nodes = []
249+ initializers = []
250+ sparse_initializers = []
251+ functions = []
252+ value = np .random .randn (1024 , 1024 ).astype (np .float32 )
253+ initializers .append (
254+ from_array_extended (
255+ np .array (value , dtype = np .float32 ),
256+ name = "encoder.encoders.0.self_attn.linear_q.weight" ,
257+ )
258+ )
259+ value = np .random .randn (1024 ).astype (np .float32 )
260+ initializers .append (
261+ from_array_extended (
262+ np .array (value , dtype = np .float32 ),
263+ name = "encoder.encoders.0.self_attn.linear_q.bias" ,
264+ )
265+ )
266+ value = np .random .randn (1024 , 1024 ).astype (np .float32 )
267+ initializers .append (
268+ from_array_extended (
269+ np .array (value , dtype = np .float32 ),
270+ name = "encoder.encoders.0.self_attn.linear_k.weight" ,
271+ )
272+ )
273+ value = np .random .randn (1024 ).astype (np .float32 )
274+ initializers .append (
275+ from_array_extended (
276+ np .array (value , dtype = np .float32 ),
277+ name = "encoder.encoders.0.self_attn.linear_k.bias" ,
278+ )
279+ )
280+ value = np .random .randn (1024 , 1024 ).astype (np .float32 )
281+ initializers .append (
282+ from_array_extended (
283+ np .array (value , dtype = np .float32 ),
284+ name = "encoder.encoders.0.self_attn.linear_v.weight" ,
285+ )
286+ )
287+ value = np .random .randn (1024 ).astype (np .float32 )
288+ initializers .append (
289+ from_array_extended (
290+ np .array (value , dtype = np .float32 ),
291+ name = "encoder.encoders.0.self_attn.linear_v.bias" ,
292+ )
293+ )
294+ initializers .append (from_array_extended (np .array (1 , dtype = np .int64 ), name = "dim_0_7" ))
295+ inputs .append (
296+ oh .make_tensor_value_info (
297+ "layer_norm_1" , onnx .TensorProto .FLOAT , shape = ("s0" , "(s1-1)//8+1" , 1024 )
298+ )
299+ )
300+ inputs .append (
301+ oh .make_tensor_value_info (
302+ "expand_1" , onnx .TensorProto .BOOL , shape = ("s0" , "(s1-1)//8+1" , "(s1-1)//8+1" )
303+ )
304+ )
305+ inputs .append (
306+ oh .make_tensor_value_info (
307+ "unsqueeze_9" ,
308+ onnx .TensorProto .FLOAT ,
309+ shape = (1 , 16 , "(s1-1)//8+1" , "(s1-1)//8+1" ),
310+ )
311+ )
312+ inputs .append (oh .make_tensor_value_info ("val_104" , onnx .TensorProto .INT64 , shape = (4 ,)))
313+ inputs .append (oh .make_tensor_value_info ("val_112" , onnx .TensorProto .INT64 , shape = (4 ,)))
314+ inputs .append (oh .make_tensor_value_info ("val_120" , onnx .TensorProto .INT64 , shape = (4 ,)))
315+ inputs .append (oh .make_tensor_value_info ("val_132" , onnx .TensorProto .INT64 , shape = (3 ,)))
316+ nodes .append (oh .make_node ("Unsqueeze" , ["expand_1" , "dim_0_7" ], ["unsqueeze_6" ]))
317+ nodes .append (
318+ oh .make_node ("Cast" , ["unsqueeze_6" ], ["convert_element_type_default" ], to = 7 )
319+ )
320+ nodes .append (
321+ oh .make_node (
322+ "Concat" ,
323+ [
324+ "encoder.encoders.0.self_attn.linear_q.weight" ,
325+ "encoder.encoders.0.self_attn.linear_k.weight" ,
326+ "encoder.encoders.0.self_attn.linear_v.weight" ,
327+ ],
328+ ["encoder.encoders.0.self_attn.linear_q.weight_qkv" ],
329+ axis = 1 ,
330+ )
239331 )
332+ nodes .append (
333+ oh .make_node (
334+ "Concat" ,
335+ [
336+ "encoder.encoders.0.self_attn.linear_q.bias" ,
337+ "encoder.encoders.0.self_attn.linear_k.bias" ,
338+ "encoder.encoders.0.self_attn.linear_v.bias" ,
339+ ],
340+ ["encoder.encoders.0.self_attn.linear_q.bias_bias" ],
341+ axis = 0 ,
342+ )
343+ )
344+ nodes .append (
345+ oh .make_node (
346+ "Cast" ,
347+ ["convert_element_type_default" ],
348+ ["convert_element_type_default_int32" ],
349+ to = 6 ,
350+ )
351+ )
352+ nodes .append (
353+ oh .make_node (
354+ "Attention" ,
355+ [
356+ "layer_norm_1" ,
357+ "encoder.encoders.0.self_attn.linear_q.weight_qkv" ,
358+ "encoder.encoders.0.self_attn.linear_q.bias_bias" ,
359+ "convert_element_type_default_int32" ,
360+ "" ,
361+ "unsqueeze_9" ,
362+ ],
363+ ["view_3" ],
364+ domain = "com.microsoft" ,
365+ num_heads = 16 ,
366+ )
367+ )
368+ outputs .append (
369+ oh .make_tensor_value_info (
370+ "view_3" , onnx .TensorProto .FLOAT , shape = ("s0" , "(s1-1)//8+1" , 1024 )
371+ )
372+ )
373+ graph = oh .make_graph (
374+ nodes ,
375+ "experiment" ,
376+ inputs ,
377+ outputs ,
378+ initializers ,
379+ sparse_initializer = sparse_initializers ,
380+ )
381+ model = oh .make_model (
382+ graph , functions = functions , opset_imports = opset_imports , ir_version = 10
383+ )
384+ return model
385+
386+ def test_attention (self ):
387+ model = self ._get_model_attention ()
388+ path = self .dump_onnx ("test_attention.onnx" , model )
240389 ref = ExtendedReferenceEvaluator (path )
241390 feeds = {
242391 "layer_norm_1" : self ._range (2 , 8 , 1024 ), # s0,(s1-1)//8+1,1024
@@ -250,7 +399,18 @@ def test_attention(self):
250399 [2 , 8 , 1024 ], dtype = np .int64
251400 ), # s0,CeilToInt(IntTrueDiv(s1, 8)),1024
252401 }
253- ref .run (None , feeds )
402+ got = ref .run (None , feeds )
403+
404+ if not has_cuda ():
405+ return
406+ import onnxruntime
407+
408+ sess = onnxruntime .InferenceSession (
409+ model .SerializeToString (),
410+ providers = ["CUDAExecutionProvider" , "CPUExecutionProvider" ],
411+ )
412+ expected = sess .run (None , feeds )
413+ self .assertEqualArrayAny (expected , got , atol = 1 )
254414
255415
256416if __name__ == "__main__" :
0 commit comments