1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3
- ; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
-
5
- ; CHECK-GI: warning: Instruction selection used fallback path for test_vdot_u32
6
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_u32
7
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_s32
8
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_s32
9
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_u32_zero
10
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_u32_zero
11
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_s32_zero
12
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_s32_zero
13
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_u32
14
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_u32
15
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_u32
16
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_u32
17
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_u32_zero
18
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_u32_zero
19
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_u32_zero
20
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_u32_zero
21
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_s32
22
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_s32
23
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_s32
24
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_s32
25
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_s32_zero
26
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_s32_zero
27
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_s32_zero
28
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_s32_zero
3
+ ; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
29
4
30
5
declare <2 x i32 > @llvm.aarch64.neon.udot.v2i32.v8i8 (<2 x i32 >, <8 x i8 >, <8 x i8 >)
31
6
declare <4 x i32 > @llvm.aarch64.neon.udot.v4i32.v16i8 (<4 x i32 >, <16 x i8 >, <16 x i8 >)
@@ -74,43 +49,71 @@ entry:
74
49
75
50
76
51
define <2 x i32 > @test_vdot_u32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) #0 {
77
- ; CHECK-LABEL: test_vdot_u32_zero:
78
- ; CHECK: // %bb.0: // %entry
79
- ; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b
80
- ; CHECK-NEXT: ret
52
+ ; CHECK-SD-LABEL: test_vdot_u32_zero:
53
+ ; CHECK-SD: // %bb.0: // %entry
54
+ ; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.8b
55
+ ; CHECK-SD-NEXT: ret
56
+ ;
57
+ ; CHECK-GI-LABEL: test_vdot_u32_zero:
58
+ ; CHECK-GI: // %bb.0: // %entry
59
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
60
+ ; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.8b
61
+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
62
+ ; CHECK-GI-NEXT: ret
81
63
entry:
82
64
%vdot1.i = call <2 x i32 > @llvm.aarch64.neon.udot.v2i32.v8i8 (<2 x i32 > zeroinitializer , <8 x i8 > %b , <8 x i8 > %c ) #2
83
65
%ret = add <2 x i32 > %vdot1.i , %a
84
66
ret <2 x i32 > %ret
85
67
}
86
68
87
69
define <4 x i32 > @test_vdotq_u32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) #0 {
88
- ; CHECK-LABEL: test_vdotq_u32_zero:
89
- ; CHECK: // %bb.0: // %entry
90
- ; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b
91
- ; CHECK-NEXT: ret
70
+ ; CHECK-SD-LABEL: test_vdotq_u32_zero:
71
+ ; CHECK-SD: // %bb.0: // %entry
72
+ ; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.16b
73
+ ; CHECK-SD-NEXT: ret
74
+ ;
75
+ ; CHECK-GI-LABEL: test_vdotq_u32_zero:
76
+ ; CHECK-GI: // %bb.0: // %entry
77
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
78
+ ; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.16b
79
+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
80
+ ; CHECK-GI-NEXT: ret
92
81
entry:
93
82
%vdot1.i = call <4 x i32 > @llvm.aarch64.neon.udot.v4i32.v16i8 (<4 x i32 > zeroinitializer , <16 x i8 > %b , <16 x i8 > %c ) #2
94
83
%ret = add <4 x i32 > %vdot1.i , %a
95
84
ret <4 x i32 > %ret
96
85
}
97
86
98
87
define <2 x i32 > @test_vdot_s32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) #0 {
99
- ; CHECK-LABEL: test_vdot_s32_zero:
100
- ; CHECK: // %bb.0: // %entry
101
- ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b
102
- ; CHECK-NEXT: ret
88
+ ; CHECK-SD-LABEL: test_vdot_s32_zero:
89
+ ; CHECK-SD: // %bb.0: // %entry
90
+ ; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.8b
91
+ ; CHECK-SD-NEXT: ret
92
+ ;
93
+ ; CHECK-GI-LABEL: test_vdot_s32_zero:
94
+ ; CHECK-GI: // %bb.0: // %entry
95
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
96
+ ; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.8b
97
+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
98
+ ; CHECK-GI-NEXT: ret
103
99
entry:
104
100
%vdot1.i = call <2 x i32 > @llvm.aarch64.neon.sdot.v2i32.v8i8 (<2 x i32 > zeroinitializer , <8 x i8 > %b , <8 x i8 > %c ) #2
105
101
%ret = add <2 x i32 > %vdot1.i , %a
106
102
ret <2 x i32 > %ret
107
103
}
108
104
109
105
define <4 x i32 > @test_vdotq_s32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) #0 {
110
- ; CHECK-LABEL: test_vdotq_s32_zero:
111
- ; CHECK: // %bb.0: // %entry
112
- ; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b
113
- ; CHECK-NEXT: ret
106
+ ; CHECK-SD-LABEL: test_vdotq_s32_zero:
107
+ ; CHECK-SD: // %bb.0: // %entry
108
+ ; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.16b
109
+ ; CHECK-SD-NEXT: ret
110
+ ;
111
+ ; CHECK-GI-LABEL: test_vdotq_s32_zero:
112
+ ; CHECK-GI: // %bb.0: // %entry
113
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
114
+ ; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.16b
115
+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
116
+ ; CHECK-GI-NEXT: ret
114
117
entry:
115
118
%vdot1.i = call <4 x i32 > @llvm.aarch64.neon.sdot.v4i32.v16i8 (<4 x i32 > zeroinitializer , <16 x i8 > %b , <16 x i8 > %c ) #2
116
119
%ret = add <4 x i32 > %vdot1.i , %a
@@ -174,11 +177,19 @@ entry:
174
177
175
178
176
179
define <2 x i32 > @test_vdot_lane_u32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) {
177
- ; CHECK-LABEL: test_vdot_lane_u32_zero:
178
- ; CHECK: // %bb.0: // %entry
179
- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
180
- ; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1]
181
- ; CHECK-NEXT: ret
180
+ ; CHECK-SD-LABEL: test_vdot_lane_u32_zero:
181
+ ; CHECK-SD: // %bb.0: // %entry
182
+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
183
+ ; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1]
184
+ ; CHECK-SD-NEXT: ret
185
+ ;
186
+ ; CHECK-GI-LABEL: test_vdot_lane_u32_zero:
187
+ ; CHECK-GI: // %bb.0: // %entry
188
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
189
+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
190
+ ; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1]
191
+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
192
+ ; CHECK-GI-NEXT: ret
182
193
entry:
183
194
%.cast = bitcast <8 x i8 > %c to <2 x i32 >
184
195
%shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -189,11 +200,19 @@ entry:
189
200
}
190
201
191
202
define <4 x i32 > @test_vdotq_lane_u32_zero (<4 x i32 > %a , <16 x i8 > %b , <8 x i8 > %c ) {
192
- ; CHECK-LABEL: test_vdotq_lane_u32_zero:
193
- ; CHECK: // %bb.0: // %entry
194
- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
195
- ; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1]
196
- ; CHECK-NEXT: ret
203
+ ; CHECK-SD-LABEL: test_vdotq_lane_u32_zero:
204
+ ; CHECK-SD: // %bb.0: // %entry
205
+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
206
+ ; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1]
207
+ ; CHECK-SD-NEXT: ret
208
+ ;
209
+ ; CHECK-GI-LABEL: test_vdotq_lane_u32_zero:
210
+ ; CHECK-GI: // %bb.0: // %entry
211
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
212
+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
213
+ ; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1]
214
+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
215
+ ; CHECK-GI-NEXT: ret
197
216
entry:
198
217
%.cast = bitcast <8 x i8 > %c to <2 x i32 >
199
218
%shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
@@ -204,10 +223,17 @@ entry:
204
223
}
205
224
206
225
define <2 x i32 > @test_vdot_laneq_u32_zero (<2 x i32 > %a , <8 x i8 > %b , <16 x i8 > %c ) {
207
- ; CHECK-LABEL: test_vdot_laneq_u32_zero:
208
- ; CHECK: // %bb.0: // %entry
209
- ; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1]
210
- ; CHECK-NEXT: ret
226
+ ; CHECK-SD-LABEL: test_vdot_laneq_u32_zero:
227
+ ; CHECK-SD: // %bb.0: // %entry
228
+ ; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1]
229
+ ; CHECK-SD-NEXT: ret
230
+ ;
231
+ ; CHECK-GI-LABEL: test_vdot_laneq_u32_zero:
232
+ ; CHECK-GI: // %bb.0: // %entry
233
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
234
+ ; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1]
235
+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
236
+ ; CHECK-GI-NEXT: ret
211
237
entry:
212
238
%.cast = bitcast <16 x i8 > %c to <4 x i32 >
213
239
%shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -218,10 +244,17 @@ entry:
218
244
}
219
245
220
246
define <4 x i32 > @test_vdotq_laneq_u32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) {
221
- ; CHECK-LABEL: test_vdotq_laneq_u32_zero:
222
- ; CHECK: // %bb.0: // %entry
223
- ; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1]
224
- ; CHECK-NEXT: ret
247
+ ; CHECK-SD-LABEL: test_vdotq_laneq_u32_zero:
248
+ ; CHECK-SD: // %bb.0: // %entry
249
+ ; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1]
250
+ ; CHECK-SD-NEXT: ret
251
+ ;
252
+ ; CHECK-GI-LABEL: test_vdotq_laneq_u32_zero:
253
+ ; CHECK-GI: // %bb.0: // %entry
254
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
255
+ ; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1]
256
+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
257
+ ; CHECK-GI-NEXT: ret
225
258
entry:
226
259
%.cast = bitcast <16 x i8 > %c to <4 x i32 >
227
260
%shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
@@ -288,11 +321,19 @@ entry:
288
321
289
322
290
323
define <2 x i32 > @test_vdot_lane_s32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) {
291
- ; CHECK-LABEL: test_vdot_lane_s32_zero:
292
- ; CHECK: // %bb.0: // %entry
293
- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
294
- ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
295
- ; CHECK-NEXT: ret
324
+ ; CHECK-SD-LABEL: test_vdot_lane_s32_zero:
325
+ ; CHECK-SD: // %bb.0: // %entry
326
+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
327
+ ; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
328
+ ; CHECK-SD-NEXT: ret
329
+ ;
330
+ ; CHECK-GI-LABEL: test_vdot_lane_s32_zero:
331
+ ; CHECK-GI: // %bb.0: // %entry
332
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
333
+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
334
+ ; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1]
335
+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
336
+ ; CHECK-GI-NEXT: ret
296
337
entry:
297
338
%.cast = bitcast <8 x i8 > %c to <2 x i32 >
298
339
%shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -303,11 +344,19 @@ entry:
303
344
}
304
345
305
346
define <4 x i32 > @test_vdotq_lane_s32_zero (<4 x i32 > %a , <16 x i8 > %b , <8 x i8 > %c ) {
306
- ; CHECK-LABEL: test_vdotq_lane_s32_zero:
307
- ; CHECK: // %bb.0: // %entry
308
- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
309
- ; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
310
- ; CHECK-NEXT: ret
347
+ ; CHECK-SD-LABEL: test_vdotq_lane_s32_zero:
348
+ ; CHECK-SD: // %bb.0: // %entry
349
+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
350
+ ; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
351
+ ; CHECK-SD-NEXT: ret
352
+ ;
353
+ ; CHECK-GI-LABEL: test_vdotq_lane_s32_zero:
354
+ ; CHECK-GI: // %bb.0: // %entry
355
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
356
+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
357
+ ; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1]
358
+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
359
+ ; CHECK-GI-NEXT: ret
311
360
entry:
312
361
%.cast = bitcast <8 x i8 > %c to <2 x i32 >
313
362
%shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
@@ -318,10 +367,17 @@ entry:
318
367
}
319
368
320
369
define <2 x i32 > @test_vdot_laneq_s32_zero (<2 x i32 > %a , <8 x i8 > %b , <16 x i8 > %c ) {
321
- ; CHECK-LABEL: test_vdot_laneq_s32_zero:
322
- ; CHECK: // %bb.0: // %entry
323
- ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
324
- ; CHECK-NEXT: ret
370
+ ; CHECK-SD-LABEL: test_vdot_laneq_s32_zero:
371
+ ; CHECK-SD: // %bb.0: // %entry
372
+ ; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
373
+ ; CHECK-SD-NEXT: ret
374
+ ;
375
+ ; CHECK-GI-LABEL: test_vdot_laneq_s32_zero:
376
+ ; CHECK-GI: // %bb.0: // %entry
377
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
378
+ ; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1]
379
+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
380
+ ; CHECK-GI-NEXT: ret
325
381
entry:
326
382
%.cast = bitcast <16 x i8 > %c to <4 x i32 >
327
383
%shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -332,10 +388,17 @@ entry:
332
388
}
333
389
334
390
define <4 x i32 > @test_vdotq_laneq_s32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) {
335
- ; CHECK-LABEL: test_vdotq_laneq_s32_zero:
336
- ; CHECK: // %bb.0: // %entry
337
- ; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
338
- ; CHECK-NEXT: ret
391
+ ; CHECK-SD-LABEL: test_vdotq_laneq_s32_zero:
392
+ ; CHECK-SD: // %bb.0: // %entry
393
+ ; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
394
+ ; CHECK-SD-NEXT: ret
395
+ ;
396
+ ; CHECK-GI-LABEL: test_vdotq_laneq_s32_zero:
397
+ ; CHECK-GI: // %bb.0: // %entry
398
+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
399
+ ; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1]
400
+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
401
+ ; CHECK-GI-NEXT: ret
339
402
entry:
340
403
%.cast = bitcast <16 x i8 > %c to <4 x i32 >
341
404
%shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
0 commit comments