11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3- ; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5- ; CHECK-GI: warning: Instruction selection used fallback path for test_vdot_u32
6- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_u32
7- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_s32
8- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_s32
9- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_u32_zero
10- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_u32_zero
11- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_s32_zero
12- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_s32_zero
13- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_u32
14- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_u32
15- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_u32
16- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_u32
17- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_u32_zero
18- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_u32_zero
19- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_u32_zero
20- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_u32_zero
21- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_s32
22- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_s32
23- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_s32
24- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_s32
25- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_s32_zero
26- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_s32_zero
27- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_s32_zero
28- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_s32_zero
3+ ; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
294
305declare <2 x i32 > @llvm.aarch64.neon.udot.v2i32.v8i8 (<2 x i32 >, <8 x i8 >, <8 x i8 >)
316declare <4 x i32 > @llvm.aarch64.neon.udot.v4i32.v16i8 (<4 x i32 >, <16 x i8 >, <16 x i8 >)
@@ -74,43 +49,71 @@ entry:
7449
7550
7651define <2 x i32 > @test_vdot_u32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) #0 {
77- ; CHECK-LABEL: test_vdot_u32_zero:
78- ; CHECK: // %bb.0: // %entry
79- ; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b
80- ; CHECK-NEXT: ret
52+ ; CHECK-SD-LABEL: test_vdot_u32_zero:
53+ ; CHECK-SD: // %bb.0: // %entry
54+ ; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.8b
55+ ; CHECK-SD-NEXT: ret
56+ ;
57+ ; CHECK-GI-LABEL: test_vdot_u32_zero:
58+ ; CHECK-GI: // %bb.0: // %entry
59+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
60+ ; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.8b
61+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
62+ ; CHECK-GI-NEXT: ret
8163entry:
8264 %vdot1.i = call <2 x i32 > @llvm.aarch64.neon.udot.v2i32.v8i8 (<2 x i32 > zeroinitializer , <8 x i8 > %b , <8 x i8 > %c ) #2
8365 %ret = add <2 x i32 > %vdot1.i , %a
8466 ret <2 x i32 > %ret
8567}
8668
8769define <4 x i32 > @test_vdotq_u32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) #0 {
88- ; CHECK-LABEL: test_vdotq_u32_zero:
89- ; CHECK: // %bb.0: // %entry
90- ; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b
91- ; CHECK-NEXT: ret
70+ ; CHECK-SD-LABEL: test_vdotq_u32_zero:
71+ ; CHECK-SD: // %bb.0: // %entry
72+ ; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.16b
73+ ; CHECK-SD-NEXT: ret
74+ ;
75+ ; CHECK-GI-LABEL: test_vdotq_u32_zero:
76+ ; CHECK-GI: // %bb.0: // %entry
77+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
78+ ; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.16b
79+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
80+ ; CHECK-GI-NEXT: ret
9281entry:
9382 %vdot1.i = call <4 x i32 > @llvm.aarch64.neon.udot.v4i32.v16i8 (<4 x i32 > zeroinitializer , <16 x i8 > %b , <16 x i8 > %c ) #2
9483 %ret = add <4 x i32 > %vdot1.i , %a
9584 ret <4 x i32 > %ret
9685}
9786
9887define <2 x i32 > @test_vdot_s32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) #0 {
99- ; CHECK-LABEL: test_vdot_s32_zero:
100- ; CHECK: // %bb.0: // %entry
101- ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b
102- ; CHECK-NEXT: ret
88+ ; CHECK-SD-LABEL: test_vdot_s32_zero:
89+ ; CHECK-SD: // %bb.0: // %entry
90+ ; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.8b
91+ ; CHECK-SD-NEXT: ret
92+ ;
93+ ; CHECK-GI-LABEL: test_vdot_s32_zero:
94+ ; CHECK-GI: // %bb.0: // %entry
95+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
96+ ; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.8b
97+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
98+ ; CHECK-GI-NEXT: ret
10399entry:
104100 %vdot1.i = call <2 x i32 > @llvm.aarch64.neon.sdot.v2i32.v8i8 (<2 x i32 > zeroinitializer , <8 x i8 > %b , <8 x i8 > %c ) #2
105101 %ret = add <2 x i32 > %vdot1.i , %a
106102 ret <2 x i32 > %ret
107103}
108104
109105define <4 x i32 > @test_vdotq_s32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) #0 {
110- ; CHECK-LABEL: test_vdotq_s32_zero:
111- ; CHECK: // %bb.0: // %entry
112- ; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b
113- ; CHECK-NEXT: ret
106+ ; CHECK-SD-LABEL: test_vdotq_s32_zero:
107+ ; CHECK-SD: // %bb.0: // %entry
108+ ; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.16b
109+ ; CHECK-SD-NEXT: ret
110+ ;
111+ ; CHECK-GI-LABEL: test_vdotq_s32_zero:
112+ ; CHECK-GI: // %bb.0: // %entry
113+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
114+ ; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.16b
115+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
116+ ; CHECK-GI-NEXT: ret
114117entry:
115118 %vdot1.i = call <4 x i32 > @llvm.aarch64.neon.sdot.v4i32.v16i8 (<4 x i32 > zeroinitializer , <16 x i8 > %b , <16 x i8 > %c ) #2
116119 %ret = add <4 x i32 > %vdot1.i , %a
@@ -174,11 +177,19 @@ entry:
174177
175178
176179define <2 x i32 > @test_vdot_lane_u32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) {
177- ; CHECK-LABEL: test_vdot_lane_u32_zero:
178- ; CHECK: // %bb.0: // %entry
179- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
180- ; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1]
181- ; CHECK-NEXT: ret
180+ ; CHECK-SD-LABEL: test_vdot_lane_u32_zero:
181+ ; CHECK-SD: // %bb.0: // %entry
182+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
183+ ; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1]
184+ ; CHECK-SD-NEXT: ret
185+ ;
186+ ; CHECK-GI-LABEL: test_vdot_lane_u32_zero:
187+ ; CHECK-GI: // %bb.0: // %entry
188+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
189+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
190+ ; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1]
191+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
192+ ; CHECK-GI-NEXT: ret
182193entry:
183194 %.cast = bitcast <8 x i8 > %c to <2 x i32 >
184195 %shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -189,11 +200,19 @@ entry:
189200}
190201
191202define <4 x i32 > @test_vdotq_lane_u32_zero (<4 x i32 > %a , <16 x i8 > %b , <8 x i8 > %c ) {
192- ; CHECK-LABEL: test_vdotq_lane_u32_zero:
193- ; CHECK: // %bb.0: // %entry
194- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
195- ; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1]
196- ; CHECK-NEXT: ret
203+ ; CHECK-SD-LABEL: test_vdotq_lane_u32_zero:
204+ ; CHECK-SD: // %bb.0: // %entry
205+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
206+ ; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1]
207+ ; CHECK-SD-NEXT: ret
208+ ;
209+ ; CHECK-GI-LABEL: test_vdotq_lane_u32_zero:
210+ ; CHECK-GI: // %bb.0: // %entry
211+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
212+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
213+ ; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1]
214+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
215+ ; CHECK-GI-NEXT: ret
197216entry:
198217 %.cast = bitcast <8 x i8 > %c to <2 x i32 >
199218 %shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
@@ -204,10 +223,17 @@ entry:
204223}
205224
206225define <2 x i32 > @test_vdot_laneq_u32_zero (<2 x i32 > %a , <8 x i8 > %b , <16 x i8 > %c ) {
207- ; CHECK-LABEL: test_vdot_laneq_u32_zero:
208- ; CHECK: // %bb.0: // %entry
209- ; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1]
210- ; CHECK-NEXT: ret
226+ ; CHECK-SD-LABEL: test_vdot_laneq_u32_zero:
227+ ; CHECK-SD: // %bb.0: // %entry
228+ ; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1]
229+ ; CHECK-SD-NEXT: ret
230+ ;
231+ ; CHECK-GI-LABEL: test_vdot_laneq_u32_zero:
232+ ; CHECK-GI: // %bb.0: // %entry
233+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
234+ ; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1]
235+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
236+ ; CHECK-GI-NEXT: ret
211237entry:
212238 %.cast = bitcast <16 x i8 > %c to <4 x i32 >
213239 %shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -218,10 +244,17 @@ entry:
218244}
219245
220246define <4 x i32 > @test_vdotq_laneq_u32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) {
221- ; CHECK-LABEL: test_vdotq_laneq_u32_zero:
222- ; CHECK: // %bb.0: // %entry
223- ; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1]
224- ; CHECK-NEXT: ret
247+ ; CHECK-SD-LABEL: test_vdotq_laneq_u32_zero:
248+ ; CHECK-SD: // %bb.0: // %entry
249+ ; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1]
250+ ; CHECK-SD-NEXT: ret
251+ ;
252+ ; CHECK-GI-LABEL: test_vdotq_laneq_u32_zero:
253+ ; CHECK-GI: // %bb.0: // %entry
254+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
255+ ; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1]
256+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
257+ ; CHECK-GI-NEXT: ret
225258entry:
226259 %.cast = bitcast <16 x i8 > %c to <4 x i32 >
227260 %shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
@@ -288,11 +321,19 @@ entry:
288321
289322
290323define <2 x i32 > @test_vdot_lane_s32_zero (<2 x i32 > %a , <8 x i8 > %b , <8 x i8 > %c ) {
291- ; CHECK-LABEL: test_vdot_lane_s32_zero:
292- ; CHECK: // %bb.0: // %entry
293- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
294- ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
295- ; CHECK-NEXT: ret
324+ ; CHECK-SD-LABEL: test_vdot_lane_s32_zero:
325+ ; CHECK-SD: // %bb.0: // %entry
326+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
327+ ; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
328+ ; CHECK-SD-NEXT: ret
329+ ;
330+ ; CHECK-GI-LABEL: test_vdot_lane_s32_zero:
331+ ; CHECK-GI: // %bb.0: // %entry
332+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
333+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
334+ ; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1]
335+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
336+ ; CHECK-GI-NEXT: ret
296337entry:
297338 %.cast = bitcast <8 x i8 > %c to <2 x i32 >
298339 %shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -303,11 +344,19 @@ entry:
303344}
304345
305346define <4 x i32 > @test_vdotq_lane_s32_zero (<4 x i32 > %a , <16 x i8 > %b , <8 x i8 > %c ) {
306- ; CHECK-LABEL: test_vdotq_lane_s32_zero:
307- ; CHECK: // %bb.0: // %entry
308- ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
309- ; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
310- ; CHECK-NEXT: ret
347+ ; CHECK-SD-LABEL: test_vdotq_lane_s32_zero:
348+ ; CHECK-SD: // %bb.0: // %entry
349+ ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
350+ ; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
351+ ; CHECK-SD-NEXT: ret
352+ ;
353+ ; CHECK-GI-LABEL: test_vdotq_lane_s32_zero:
354+ ; CHECK-GI: // %bb.0: // %entry
355+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
356+ ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
357+ ; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1]
358+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
359+ ; CHECK-GI-NEXT: ret
311360entry:
312361 %.cast = bitcast <8 x i8 > %c to <2 x i32 >
313362 %shuffle = shufflevector <2 x i32 > %.cast , <2 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
@@ -318,10 +367,17 @@ entry:
318367}
319368
320369define <2 x i32 > @test_vdot_laneq_s32_zero (<2 x i32 > %a , <8 x i8 > %b , <16 x i8 > %c ) {
321- ; CHECK-LABEL: test_vdot_laneq_s32_zero:
322- ; CHECK: // %bb.0: // %entry
323- ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
324- ; CHECK-NEXT: ret
370+ ; CHECK-SD-LABEL: test_vdot_laneq_s32_zero:
371+ ; CHECK-SD: // %bb.0: // %entry
372+ ; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
373+ ; CHECK-SD-NEXT: ret
374+ ;
375+ ; CHECK-GI-LABEL: test_vdot_laneq_s32_zero:
376+ ; CHECK-GI: // %bb.0: // %entry
377+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
378+ ; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1]
379+ ; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
380+ ; CHECK-GI-NEXT: ret
325381entry:
326382 %.cast = bitcast <16 x i8 > %c to <4 x i32 >
327383 %shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <2 x i32 > <i32 1 , i32 1 >
@@ -332,10 +388,17 @@ entry:
332388}
333389
334390define <4 x i32 > @test_vdotq_laneq_s32_zero (<4 x i32 > %a , <16 x i8 > %b , <16 x i8 > %c ) {
335- ; CHECK-LABEL: test_vdotq_laneq_s32_zero:
336- ; CHECK: // %bb.0: // %entry
337- ; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
338- ; CHECK-NEXT: ret
391+ ; CHECK-SD-LABEL: test_vdotq_laneq_s32_zero:
392+ ; CHECK-SD: // %bb.0: // %entry
393+ ; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
394+ ; CHECK-SD-NEXT: ret
395+ ;
396+ ; CHECK-GI-LABEL: test_vdotq_laneq_s32_zero:
397+ ; CHECK-GI: // %bb.0: // %entry
398+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
399+ ; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1]
400+ ; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
401+ ; CHECK-GI-NEXT: ret
339402entry:
340403 %.cast = bitcast <16 x i8 > %c to <4 x i32 >
341404 %shuffle = shufflevector <4 x i32 > %.cast , <4 x i32 > undef , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
0 commit comments