Skip to content

Commit 44f0079

Browse files
davemgreenkrishna2803
authored andcommitted
[AArch64][GlobalISel] Lower udot/sdot intrinsics to G_UDOT/G_SDOT
This allows them to be selected using the same pathways as normal lowering. USDOT is not handled yet as we do not yet have a node for it.
1 parent a7a936c commit 44f0079

File tree

2 files changed

+151
-78
lines changed

2 files changed

+151
-78
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1650,6 +1650,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16501650
MI.eraseFromParent();
16511651
return true;
16521652
};
1653+
auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1654+
MIB.buildInstr(Opcode, {MI.getOperand(0)},
1655+
{MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1656+
MI.eraseFromParent();
1657+
return true;
1658+
};
16531659

16541660
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
16551661
switch (IntrinsicID) {
@@ -1828,6 +1834,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
18281834
return LowerBinOp(TargetOpcode::G_USUBSAT);
18291835
break;
18301836
}
1837+
case Intrinsic::aarch64_neon_udot:
1838+
return LowerTriOp(AArch64::G_UDOT);
1839+
case Intrinsic::aarch64_neon_sdot:
1840+
return LowerTriOp(AArch64::G_SDOT);
18311841

18321842
case Intrinsic::vector_reverse:
18331843
// TODO: Add support for vector_reverse

llvm/test/CodeGen/AArch64/neon-dot-product.ll

Lines changed: 141 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_vdot_u32
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_u32
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_s32
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_s32
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_u32_zero
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_u32_zero
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_s32_zero
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_s32_zero
13-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_u32
14-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_u32
15-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_u32
16-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_u32
17-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_u32_zero
18-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_u32_zero
19-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_u32_zero
20-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_u32_zero
21-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_s32
22-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_s32
23-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_s32
24-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_s32
25-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_lane_s32_zero
26-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_lane_s32_zero
27-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdot_laneq_s32_zero
28-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vdotq_laneq_s32_zero
3+
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+dotprod -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
294

305
declare <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>)
316
declare <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>)
@@ -74,43 +49,71 @@ entry:
7449

7550

7651
define <2 x i32> @test_vdot_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 {
77-
; CHECK-LABEL: test_vdot_u32_zero:
78-
; CHECK: // %bb.0: // %entry
79-
; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b
80-
; CHECK-NEXT: ret
52+
; CHECK-SD-LABEL: test_vdot_u32_zero:
53+
; CHECK-SD: // %bb.0: // %entry
54+
; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.8b
55+
; CHECK-SD-NEXT: ret
56+
;
57+
; CHECK-GI-LABEL: test_vdot_u32_zero:
58+
; CHECK-GI: // %bb.0: // %entry
59+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
60+
; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.8b
61+
; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
62+
; CHECK-GI-NEXT: ret
8163
entry:
8264
%vdot1.i = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %b, <8 x i8> %c) #2
8365
%ret = add <2 x i32> %vdot1.i, %a
8466
ret <2 x i32> %ret
8567
}
8668

8769
define <4 x i32> @test_vdotq_u32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 {
88-
; CHECK-LABEL: test_vdotq_u32_zero:
89-
; CHECK: // %bb.0: // %entry
90-
; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b
91-
; CHECK-NEXT: ret
70+
; CHECK-SD-LABEL: test_vdotq_u32_zero:
71+
; CHECK-SD: // %bb.0: // %entry
72+
; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.16b
73+
; CHECK-SD-NEXT: ret
74+
;
75+
; CHECK-GI-LABEL: test_vdotq_u32_zero:
76+
; CHECK-GI: // %bb.0: // %entry
77+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
78+
; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.16b
79+
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
80+
; CHECK-GI-NEXT: ret
9281
entry:
9382
%vdot1.i = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %b, <16 x i8> %c) #2
9483
%ret = add <4 x i32> %vdot1.i, %a
9584
ret <4 x i32> %ret
9685
}
9786

9887
define <2 x i32> @test_vdot_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 {
99-
; CHECK-LABEL: test_vdot_s32_zero:
100-
; CHECK: // %bb.0: // %entry
101-
; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b
102-
; CHECK-NEXT: ret
88+
; CHECK-SD-LABEL: test_vdot_s32_zero:
89+
; CHECK-SD: // %bb.0: // %entry
90+
; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.8b
91+
; CHECK-SD-NEXT: ret
92+
;
93+
; CHECK-GI-LABEL: test_vdot_s32_zero:
94+
; CHECK-GI: // %bb.0: // %entry
95+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
96+
; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.8b
97+
; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
98+
; CHECK-GI-NEXT: ret
10399
entry:
104100
%vdot1.i = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %b, <8 x i8> %c) #2
105101
%ret = add <2 x i32> %vdot1.i, %a
106102
ret <2 x i32> %ret
107103
}
108104

109105
define <4 x i32> @test_vdotq_s32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 {
110-
; CHECK-LABEL: test_vdotq_s32_zero:
111-
; CHECK: // %bb.0: // %entry
112-
; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b
113-
; CHECK-NEXT: ret
106+
; CHECK-SD-LABEL: test_vdotq_s32_zero:
107+
; CHECK-SD: // %bb.0: // %entry
108+
; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.16b
109+
; CHECK-SD-NEXT: ret
110+
;
111+
; CHECK-GI-LABEL: test_vdotq_s32_zero:
112+
; CHECK-GI: // %bb.0: // %entry
113+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
114+
; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.16b
115+
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
116+
; CHECK-GI-NEXT: ret
114117
entry:
115118
%vdot1.i = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %b, <16 x i8> %c) #2
116119
%ret = add <4 x i32> %vdot1.i, %a
@@ -174,11 +177,19 @@ entry:
174177

175178

176179
define <2 x i32> @test_vdot_lane_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
177-
; CHECK-LABEL: test_vdot_lane_u32_zero:
178-
; CHECK: // %bb.0: // %entry
179-
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
180-
; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1]
181-
; CHECK-NEXT: ret
180+
; CHECK-SD-LABEL: test_vdot_lane_u32_zero:
181+
; CHECK-SD: // %bb.0: // %entry
182+
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
183+
; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1]
184+
; CHECK-SD-NEXT: ret
185+
;
186+
; CHECK-GI-LABEL: test_vdot_lane_u32_zero:
187+
; CHECK-GI: // %bb.0: // %entry
188+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
189+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
190+
; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1]
191+
; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
192+
; CHECK-GI-NEXT: ret
182193
entry:
183194
%.cast = bitcast <8 x i8> %c to <2 x i32>
184195
%shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -189,11 +200,19 @@ entry:
189200
}
190201

191202
define <4 x i32> @test_vdotq_lane_u32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
192-
; CHECK-LABEL: test_vdotq_lane_u32_zero:
193-
; CHECK: // %bb.0: // %entry
194-
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
195-
; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1]
196-
; CHECK-NEXT: ret
203+
; CHECK-SD-LABEL: test_vdotq_lane_u32_zero:
204+
; CHECK-SD: // %bb.0: // %entry
205+
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
206+
; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1]
207+
; CHECK-SD-NEXT: ret
208+
;
209+
; CHECK-GI-LABEL: test_vdotq_lane_u32_zero:
210+
; CHECK-GI: // %bb.0: // %entry
211+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
212+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
213+
; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1]
214+
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
215+
; CHECK-GI-NEXT: ret
197216
entry:
198217
%.cast = bitcast <8 x i8> %c to <2 x i32>
199218
%shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -204,10 +223,17 @@ entry:
204223
}
205224

206225
define <2 x i32> @test_vdot_laneq_u32_zero(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) {
207-
; CHECK-LABEL: test_vdot_laneq_u32_zero:
208-
; CHECK: // %bb.0: // %entry
209-
; CHECK-NEXT: udot v0.2s, v1.8b, v2.4b[1]
210-
; CHECK-NEXT: ret
226+
; CHECK-SD-LABEL: test_vdot_laneq_u32_zero:
227+
; CHECK-SD: // %bb.0: // %entry
228+
; CHECK-SD-NEXT: udot v0.2s, v1.8b, v2.4b[1]
229+
; CHECK-SD-NEXT: ret
230+
;
231+
; CHECK-GI-LABEL: test_vdot_laneq_u32_zero:
232+
; CHECK-GI: // %bb.0: // %entry
233+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
234+
; CHECK-GI-NEXT: udot v3.2s, v1.8b, v2.4b[1]
235+
; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
236+
; CHECK-GI-NEXT: ret
211237
entry:
212238
%.cast = bitcast <16 x i8> %c to <4 x i32>
213239
%shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -218,10 +244,17 @@ entry:
218244
}
219245

220246
define <4 x i32> @test_vdotq_laneq_u32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
221-
; CHECK-LABEL: test_vdotq_laneq_u32_zero:
222-
; CHECK: // %bb.0: // %entry
223-
; CHECK-NEXT: udot v0.4s, v1.16b, v2.4b[1]
224-
; CHECK-NEXT: ret
247+
; CHECK-SD-LABEL: test_vdotq_laneq_u32_zero:
248+
; CHECK-SD: // %bb.0: // %entry
249+
; CHECK-SD-NEXT: udot v0.4s, v1.16b, v2.4b[1]
250+
; CHECK-SD-NEXT: ret
251+
;
252+
; CHECK-GI-LABEL: test_vdotq_laneq_u32_zero:
253+
; CHECK-GI: // %bb.0: // %entry
254+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
255+
; CHECK-GI-NEXT: udot v3.4s, v1.16b, v2.4b[1]
256+
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
257+
; CHECK-GI-NEXT: ret
225258
entry:
226259
%.cast = bitcast <16 x i8> %c to <4 x i32>
227260
%shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -288,11 +321,19 @@ entry:
288321

289322

290323
define <2 x i32> @test_vdot_lane_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
291-
; CHECK-LABEL: test_vdot_lane_s32_zero:
292-
; CHECK: // %bb.0: // %entry
293-
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
294-
; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
295-
; CHECK-NEXT: ret
324+
; CHECK-SD-LABEL: test_vdot_lane_s32_zero:
325+
; CHECK-SD: // %bb.0: // %entry
326+
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
327+
; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
328+
; CHECK-SD-NEXT: ret
329+
;
330+
; CHECK-GI-LABEL: test_vdot_lane_s32_zero:
331+
; CHECK-GI: // %bb.0: // %entry
332+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
333+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
334+
; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1]
335+
; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
336+
; CHECK-GI-NEXT: ret
296337
entry:
297338
%.cast = bitcast <8 x i8> %c to <2 x i32>
298339
%shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -303,11 +344,19 @@ entry:
303344
}
304345

305346
define <4 x i32> @test_vdotq_lane_s32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
306-
; CHECK-LABEL: test_vdotq_lane_s32_zero:
307-
; CHECK: // %bb.0: // %entry
308-
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
309-
; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
310-
; CHECK-NEXT: ret
347+
; CHECK-SD-LABEL: test_vdotq_lane_s32_zero:
348+
; CHECK-SD: // %bb.0: // %entry
349+
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
350+
; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
351+
; CHECK-SD-NEXT: ret
352+
;
353+
; CHECK-GI-LABEL: test_vdotq_lane_s32_zero:
354+
; CHECK-GI: // %bb.0: // %entry
355+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
356+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
357+
; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1]
358+
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
359+
; CHECK-GI-NEXT: ret
311360
entry:
312361
%.cast = bitcast <8 x i8> %c to <2 x i32>
313362
%shuffle = shufflevector <2 x i32> %.cast, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -318,10 +367,17 @@ entry:
318367
}
319368

320369
define <2 x i32> @test_vdot_laneq_s32_zero(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) {
321-
; CHECK-LABEL: test_vdot_laneq_s32_zero:
322-
; CHECK: // %bb.0: // %entry
323-
; CHECK-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
324-
; CHECK-NEXT: ret
370+
; CHECK-SD-LABEL: test_vdot_laneq_s32_zero:
371+
; CHECK-SD: // %bb.0: // %entry
372+
; CHECK-SD-NEXT: sdot v0.2s, v1.8b, v2.4b[1]
373+
; CHECK-SD-NEXT: ret
374+
;
375+
; CHECK-GI-LABEL: test_vdot_laneq_s32_zero:
376+
; CHECK-GI: // %bb.0: // %entry
377+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
378+
; CHECK-GI-NEXT: sdot v3.2s, v1.8b, v2.4b[1]
379+
; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
380+
; CHECK-GI-NEXT: ret
325381
entry:
326382
%.cast = bitcast <16 x i8> %c to <4 x i32>
327383
%shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -332,10 +388,17 @@ entry:
332388
}
333389

334390
define <4 x i32> @test_vdotq_laneq_s32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
335-
; CHECK-LABEL: test_vdotq_laneq_s32_zero:
336-
; CHECK: // %bb.0: // %entry
337-
; CHECK-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
338-
; CHECK-NEXT: ret
391+
; CHECK-SD-LABEL: test_vdotq_laneq_s32_zero:
392+
; CHECK-SD: // %bb.0: // %entry
393+
; CHECK-SD-NEXT: sdot v0.4s, v1.16b, v2.4b[1]
394+
; CHECK-SD-NEXT: ret
395+
;
396+
; CHECK-GI-LABEL: test_vdotq_laneq_s32_zero:
397+
; CHECK-GI: // %bb.0: // %entry
398+
; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
399+
; CHECK-GI-NEXT: sdot v3.4s, v1.16b, v2.4b[1]
400+
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
401+
; CHECK-GI-NEXT: ret
339402
entry:
340403
%.cast = bitcast <16 x i8> %c to <4 x i32>
341404
%shuffle = shufflevector <4 x i32> %.cast, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>

0 commit comments

Comments
 (0)