Skip to content

Commit 3a561bc

Browse files
authored
[AArch64] Add tests for commutable [usp]mull, [us]addl, [us]abdl (#152512)
Precommit tests for PR #152158.
1 parent 229ab5a commit 3a561bc

File tree

4 files changed

+192
-10
lines changed

4 files changed

+192
-10
lines changed

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,30 @@ entry:
7070
ret <2 x i64> %add.i
7171
}
7272

73+
define void @test_commutable_vaddl_s8(<8 x i8> %a, <8 x i8> %b, ptr %c) {
74+
; CHECK-SD-LABEL: test_commutable_vaddl_s8:
75+
; CHECK-SD: // %bb.0: // %entry
76+
; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b
77+
; CHECK-SD-NEXT: stp q0, q0, [x0]
78+
; CHECK-SD-NEXT: ret
79+
;
80+
; CHECK-GI-LABEL: test_commutable_vaddl_s8:
81+
; CHECK-GI: // %bb.0: // %entry
82+
; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b
83+
; CHECK-GI-NEXT: saddl v0.8h, v1.8b, v0.8b
84+
; CHECK-GI-NEXT: stp q2, q0, [x0]
85+
; CHECK-GI-NEXT: ret
86+
entry:
87+
%vmovl.i.i = sext <8 x i8> %a to <8 x i16>
88+
%vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
89+
%add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
90+
store <8 x i16> %add.i, ptr %c
91+
%add.i2 = add <8 x i16> %vmovl.i2.i, %vmovl.i.i
92+
%c.gep.1 = getelementptr i8, ptr %c, i64 16
93+
store <8 x i16> %add.i2, ptr %c.gep.1
94+
ret void
95+
}
96+
7397
define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
7498
; CHECK-LABEL: test_vaddl_u8:
7599
; CHECK: // %bb.0: // %entry
@@ -106,6 +130,30 @@ entry:
106130
ret <2 x i64> %add.i
107131
}
108132

133+
define void @test_commutable_vaddl_u8(<8 x i8> %a, <8 x i8> %b, ptr %c) {
134+
; CHECK-SD-LABEL: test_commutable_vaddl_u8:
135+
; CHECK-SD: // %bb.0: // %entry
136+
; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
137+
; CHECK-SD-NEXT: stp q0, q0, [x0]
138+
; CHECK-SD-NEXT: ret
139+
;
140+
; CHECK-GI-LABEL: test_commutable_vaddl_u8:
141+
; CHECK-GI: // %bb.0: // %entry
142+
; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b
143+
; CHECK-GI-NEXT: uaddl v0.8h, v1.8b, v0.8b
144+
; CHECK-GI-NEXT: stp q2, q0, [x0]
145+
; CHECK-GI-NEXT: ret
146+
entry:
147+
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
148+
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
149+
%add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
150+
store <8 x i16> %add.i, ptr %c
151+
%add.i2 = add <8 x i16> %vmovl.i2.i, %vmovl.i.i
152+
%c.gep.1 = getelementptr i8, ptr %c, i64 16
153+
store <8 x i16> %add.i2, ptr %c.gep.1
154+
ret void
155+
}
156+
109157
define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) {
110158
; CHECK-SD-LABEL: test_vaddl_a8:
111159
; CHECK-SD: // %bb.0: // %entry
@@ -2892,9 +2940,9 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
28922940
; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
28932941
; CHECK-GI: // %bb.0: // %entry
28942942
; CHECK-GI-NEXT: lsr x9, x0, #16
2895-
; CHECK-GI-NEXT: adrp x8, .LCPI196_0
2943+
; CHECK-GI-NEXT: adrp x8, .LCPI198_0
28962944
; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
2897-
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
2945+
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI198_0]
28982946
; CHECK-GI-NEXT: fmov d1, x9
28992947
; CHECK-GI-NEXT: dup v2.8h, v1.h[0]
29002948
; CHECK-GI-NEXT: sqneg v1.8h, v2.8h

llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,6 +1608,19 @@ define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
16081608
ret <16 x i8> %prod
16091609
}
16101610

1611+
define <16 x i8> @commutable_poly_mul(<16 x i8> %lhs, <16 x i8> %rhs) {
1612+
; CHECK-LABEL: commutable_poly_mul:
1613+
; CHECK: // %bb.0:
1614+
; CHECK-NEXT: pmul v2.16b, v0.16b, v1.16b
1615+
; CHECK-NEXT: pmul v0.16b, v1.16b, v0.16b
1616+
; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
1617+
; CHECK-NEXT: ret
1618+
%1 = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
1619+
%2 = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %rhs, <16 x i8> %lhs)
1620+
%3 = add <16 x i8> %1, %2
1621+
ret <16 x i8> %3
1622+
}
1623+
16111624
declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
16121625
declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
16131626
declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,36 @@ define <2 x i64> @sabdl2d(ptr %A, ptr %B) nounwind {
4444
ret <2 x i64> %tmp4
4545
}
4646

47+
define void @commutable_sabdl(ptr %A, ptr %B, ptr %C) nounwind {
48+
; CHECK-SD-LABEL: commutable_sabdl:
49+
; CHECK-SD: // %bb.0:
50+
; CHECK-SD-NEXT: ldr d0, [x0]
51+
; CHECK-SD-NEXT: ldr d1, [x1]
52+
; CHECK-SD-NEXT: sabdl.8h v0, v1, v0
53+
; CHECK-SD-NEXT: str q0, [x2]
54+
; CHECK-SD-NEXT: ret
55+
;
56+
; CHECK-GI-LABEL: commutable_sabdl:
57+
; CHECK-GI: // %bb.0:
58+
; CHECK-GI-NEXT: ldr d0, [x0]
59+
; CHECK-GI-NEXT: ldr d1, [x1]
60+
; CHECK-GI-NEXT: sabdl.8h v2, v0, v1
61+
; CHECK-GI-NEXT: sabdl.8h v0, v1, v0
62+
; CHECK-GI-NEXT: str q2, [x2]
63+
; CHECK-GI-NEXT: str q0, [x2]
64+
; CHECK-GI-NEXT: ret
65+
%tmp1 = load <8 x i8>, ptr %A
66+
%tmp2 = load <8 x i8>, ptr %B
67+
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
68+
%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
69+
store <8 x i16> %tmp4, ptr %C
70+
%tmp5 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp1)
71+
%tmp6 = zext <8 x i8> %tmp5 to <8 x i16>
72+
%tmp7 = getelementptr i8, ptr %C, i64 16
73+
store <8 x i16> %tmp6, ptr %C
74+
ret void
75+
}
76+
4777
define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind {
4878
; CHECK-SD-LABEL: sabdl2_8h:
4979
; CHECK-SD: // %bb.0:
@@ -155,6 +185,36 @@ define <2 x i64> @uabdl2d(ptr %A, ptr %B) nounwind {
155185
ret <2 x i64> %tmp4
156186
}
157187

188+
define void @commutable_uabdl(ptr %A, ptr %B, ptr %C) nounwind {
189+
; CHECK-SD-LABEL: commutable_uabdl:
190+
; CHECK-SD: // %bb.0:
191+
; CHECK-SD-NEXT: ldr d0, [x0]
192+
; CHECK-SD-NEXT: ldr d1, [x1]
193+
; CHECK-SD-NEXT: uabdl.8h v0, v1, v0
194+
; CHECK-SD-NEXT: str q0, [x2]
195+
; CHECK-SD-NEXT: ret
196+
;
197+
; CHECK-GI-LABEL: commutable_uabdl:
198+
; CHECK-GI: // %bb.0:
199+
; CHECK-GI-NEXT: ldr d0, [x0]
200+
; CHECK-GI-NEXT: ldr d1, [x1]
201+
; CHECK-GI-NEXT: uabdl.8h v2, v0, v1
202+
; CHECK-GI-NEXT: uabdl.8h v0, v1, v0
203+
; CHECK-GI-NEXT: str q2, [x2]
204+
; CHECK-GI-NEXT: str q0, [x2]
205+
; CHECK-GI-NEXT: ret
206+
%tmp1 = load <8 x i8>, ptr %A
207+
%tmp2 = load <8 x i8>, ptr %B
208+
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
209+
%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
210+
store <8 x i16> %tmp4, ptr %C
211+
%tmp5 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp1)
212+
%tmp6 = zext <8 x i8> %tmp5 to <8 x i16>
213+
%tmp7 = getelementptr i8, ptr %C, i64 16
214+
store <8 x i16> %tmp6, ptr %C
215+
ret void
216+
}
217+
158218
define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind {
159219
; CHECK-SD-LABEL: uabdl2_8h:
160220
; CHECK-SD: // %bb.0:

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
6+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h
67
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
78
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
89
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
@@ -78,6 +79,21 @@ define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind {
7879
ret <2 x i64> %tmp3
7980
}
8081

82+
define void @commutable_smull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
83+
; CHECK-LABEL: commutable_smull:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: smull v2.2d, v0.2s, v1.2s
86+
; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
87+
; CHECK-NEXT: stp q2, q0, [x0]
88+
; CHECK-NEXT: ret
89+
%1 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %B)
90+
%2 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %B, <2 x i32> %A)
91+
store <2 x i64> %1, ptr %C
92+
%3 = getelementptr i8, ptr %C, i64 16
93+
store <2 x i64> %2, ptr %3
94+
ret void
95+
}
96+
8197
declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
8298
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
8399
declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
@@ -121,6 +137,21 @@ define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind {
121137
ret <2 x i64> %tmp3
122138
}
123139

140+
define void @commutable_umull(<2 x i32> %A, <2 x i32> %B, ptr %C) {
141+
; CHECK-LABEL: commutable_umull:
142+
; CHECK: // %bb.0:
143+
; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s
144+
; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
145+
; CHECK-NEXT: stp q2, q0, [x0]
146+
; CHECK-NEXT: ret
147+
%1 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %B)
148+
%2 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %B, <2 x i32> %A)
149+
store <2 x i64> %1, ptr %C
150+
%3 = getelementptr i8, ptr %C, i64 16
151+
store <2 x i64> %2, ptr %3
152+
ret void
153+
}
154+
124155
declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
125156
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
126157
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
@@ -212,6 +243,21 @@ define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind {
212243
ret <8 x i16> %tmp3
213244
}
214245

246+
define void @commutable_pmull8h(<8 x i8> %A, <8 x i8> %B, ptr %C) {
247+
; CHECK-LABEL: commutable_pmull8h:
248+
; CHECK: // %bb.0:
249+
; CHECK-NEXT: pmull v2.8h, v0.8b, v1.8b
250+
; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
251+
; CHECK-NEXT: stp q2, q0, [x0]
252+
; CHECK-NEXT: ret
253+
%1 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %A, <8 x i8> %B)
254+
%2 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %B, <8 x i8> %A)
255+
store <8 x i16> %1, ptr %C
256+
%3 = getelementptr i8, ptr %C, i8 16
257+
store <8 x i16> %2, ptr %3
258+
ret void
259+
}
260+
215261
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
216262

217263
define <4 x i16> @sqdmulh_4h(ptr %A, ptr %B) nounwind {
@@ -487,10 +533,10 @@ define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
487533
; CHECK-GI-LABEL: smlal2d_chain_with_constant:
488534
; CHECK-GI: // %bb.0:
489535
; CHECK-GI-NEXT: mvn v3.8b, v2.8b
490-
; CHECK-GI-NEXT: adrp x8, .LCPI27_0
536+
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
491537
; CHECK-GI-NEXT: smull v1.2d, v1.2s, v3.2s
492538
; CHECK-GI-NEXT: smlal v1.2d, v0.2s, v2.2s
493-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI27_0]
539+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI30_0]
494540
; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
495541
; CHECK-GI-NEXT: str q0, [x0]
496542
; CHECK-GI-NEXT: ret
@@ -566,8 +612,8 @@ define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
566612
;
567613
; CHECK-GI-LABEL: smlsl2d_chain_with_constant:
568614
; CHECK-GI: // %bb.0:
569-
; CHECK-GI-NEXT: adrp x8, .LCPI31_0
570-
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI31_0]
615+
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
616+
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI34_0]
571617
; CHECK-GI-NEXT: smlsl v3.2d, v0.2s, v2.2s
572618
; CHECK-GI-NEXT: mvn v0.8b, v2.8b
573619
; CHECK-GI-NEXT: smlsl v3.2d, v1.2s, v0.2s
@@ -829,10 +875,10 @@ define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
829875
; CHECK-GI-LABEL: umlal2d_chain_with_constant:
830876
; CHECK-GI: // %bb.0:
831877
; CHECK-GI-NEXT: mvn v3.8b, v2.8b
832-
; CHECK-GI-NEXT: adrp x8, .LCPI43_0
878+
; CHECK-GI-NEXT: adrp x8, .LCPI46_0
833879
; CHECK-GI-NEXT: umull v1.2d, v1.2s, v3.2s
834880
; CHECK-GI-NEXT: umlal v1.2d, v0.2s, v2.2s
835-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI43_0]
881+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI46_0]
836882
; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
837883
; CHECK-GI-NEXT: str q0, [x0]
838884
; CHECK-GI-NEXT: ret
@@ -908,8 +954,8 @@ define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2,
908954
;
909955
; CHECK-GI-LABEL: umlsl2d_chain_with_constant:
910956
; CHECK-GI: // %bb.0:
911-
; CHECK-GI-NEXT: adrp x8, .LCPI47_0
912-
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI47_0]
957+
; CHECK-GI-NEXT: adrp x8, .LCPI50_0
958+
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI50_0]
913959
; CHECK-GI-NEXT: umlsl v3.2d, v0.2s, v2.2s
914960
; CHECK-GI-NEXT: mvn v0.8b, v2.8b
915961
; CHECK-GI-NEXT: umlsl v3.2d, v1.2s, v0.2s
@@ -3222,6 +3268,21 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
32223268
ret <16 x i8> %val
32233269
}
32243270

3271+
define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind {
3272+
; CHECK-LABEL: test_commutable_pmull_64:
3273+
; CHECK: // %bb.0:
3274+
; CHECK-NEXT: fmov d0, x1
3275+
; CHECK-NEXT: fmov d1, x0
3276+
; CHECK-NEXT: pmull v2.1q, v1.1d, v0.1d
3277+
; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d
3278+
; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
3279+
; CHECK-NEXT: ret
3280+
%1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
3281+
%2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l)
3282+
%3 = add <16 x i8> %1, %2
3283+
ret <16 x i8> %3
3284+
}
3285+
32253286
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
32263287

32273288
define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {

0 commit comments

Comments
 (0)