Skip to content

Commit f3a58f2

Browse files
committed
[AArch64] Add tests for sinking exts into muls (NFC)
1 parent a6385a3 commit f3a58f2

File tree

1 file changed

+248
-0
lines changed

1 file changed

+248
-0
lines changed
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
define <8 x i16> @mul_splat_sext_v8i16(ptr %x, ptr %y) {
5+
; CHECK-LABEL: mul_splat_sext_v8i16:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: ldr d1, [x0]
8+
; CHECK-NEXT: movi v0.2d, #0000000000000000
9+
; CHECK-NEXT: mov x8, xzr
10+
; CHECK-NEXT: dup v1.8b, v1.b[3]
11+
; CHECK-NEXT: .LBB0_1: // %l1
12+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
13+
; CHECK-NEXT: ldr d2, [x1, x8]
14+
; CHECK-NEXT: add x8, x8, #4
15+
; CHECK-NEXT: cmp w8, #4
16+
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
17+
; CHECK-NEXT: b.eq .LBB0_1
18+
; CHECK-NEXT: // %bb.2: // %l2
19+
; CHECK-NEXT: ret
20+
entry:
21+
%x.val = load <8 x i8>, ptr %x
22+
%x.ext = sext <8 x i8> %x.val to <8 x i16>
23+
%a = shufflevector <8 x i16> %x.ext, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
24+
br label %l1
25+
26+
l1:
27+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
28+
%q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
29+
%y.idx = mul nuw nsw i32 %p, 4
30+
%y.ptr = getelementptr i8, ptr %y, i32 %y.idx
31+
%y.val = load <8 x i8>, ptr %y.ptr
32+
%y.ext = sext <8 x i8> %y.val to <8 x i16>
33+
%b = mul <8 x i16> %y.ext, %a
34+
%c = add <8 x i16> %q, %b
35+
%pa = add i32 %p, 1
36+
%c1 = icmp eq i32 %p, 0
37+
br i1 %c1, label %l1, label %l2
38+
39+
l2:
40+
ret <8 x i16> %c
41+
}
42+
43+
define <4 x i32> @mul_splat_sext_v4i32(ptr %x, ptr %y) {
44+
; CHECK-LABEL: mul_splat_sext_v4i32:
45+
; CHECK: // %bb.0: // %entry
46+
; CHECK-NEXT: movi v0.2d, #0000000000000000
47+
; CHECK-NEXT: ldr d1, [x0]
48+
; CHECK-NEXT: mov x8, xzr
49+
; CHECK-NEXT: .LBB1_1: // %l1
50+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
51+
; CHECK-NEXT: ldr d2, [x1, x8]
52+
; CHECK-NEXT: add x8, x8, #8
53+
; CHECK-NEXT: cmp w8, #8
54+
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
55+
; CHECK-NEXT: b.eq .LBB1_1
56+
; CHECK-NEXT: // %bb.2: // %l2
57+
; CHECK-NEXT: ret
58+
entry:
59+
%x.val = load <4 x i16>, ptr %x
60+
%x.ext = sext <4 x i16> %x.val to <4 x i32>
61+
%a = shufflevector <4 x i32> %x.ext, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
62+
br label %l1
63+
64+
l1:
65+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
66+
%q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
67+
%y.idx = mul nuw nsw i32 %p, 4
68+
%y.ptr = getelementptr i16, ptr %y, i32 %y.idx
69+
%y.val = load <4 x i16>, ptr %y.ptr
70+
%y.ext = sext <4 x i16> %y.val to <4 x i32>
71+
%b = mul <4 x i32> %y.ext, %a
72+
%c = add <4 x i32> %q, %b
73+
%pa = add i32 %p, 1
74+
%c1 = icmp eq i32 %p, 0
75+
br i1 %c1, label %l1, label %l2
76+
77+
l2:
78+
ret <4 x i32> %c
79+
}
80+
81+
define <2 x i64> @mul_splat_sext_v2i64(ptr %x, ptr %y) {
82+
; CHECK-LABEL: mul_splat_sext_v2i64:
83+
; CHECK: // %bb.0: // %entry
84+
; CHECK-NEXT: movi v0.2d, #0000000000000000
85+
; CHECK-NEXT: ldr d1, [x0]
86+
; CHECK-NEXT: mov x8, xzr
87+
; CHECK-NEXT: .LBB2_1: // %l1
88+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
89+
; CHECK-NEXT: ldr d2, [x1, x8]
90+
; CHECK-NEXT: add x8, x8, #16
91+
; CHECK-NEXT: cmp w8, #16
92+
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
93+
; CHECK-NEXT: b.eq .LBB2_1
94+
; CHECK-NEXT: // %bb.2: // %l2
95+
; CHECK-NEXT: ret
96+
entry:
97+
%x.val = load <2 x i32>, ptr %x
98+
%x.ext = sext <2 x i32> %x.val to <2 x i64>
99+
%a = shufflevector <2 x i64> %x.ext, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
100+
br label %l1
101+
102+
l1:
103+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
104+
%q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
105+
%y.idx = mul nuw nsw i32 %p, 4
106+
%y.ptr = getelementptr i32, ptr %y, i32 %y.idx
107+
%y.val = load <2 x i32>, ptr %y.ptr
108+
%y.ext = sext <2 x i32> %y.val to <2 x i64>
109+
%b = mul <2 x i64> %y.ext, %a
110+
%c = add <2 x i64> %q, %b
111+
%pa = add i32 %p, 1
112+
%c1 = icmp eq i32 %p, 0
113+
br i1 %c1, label %l1, label %l2
114+
115+
l2:
116+
ret <2 x i64> %c
117+
}
118+
119+
define <8 x i16> @mul_sext_splat_v8i16(ptr %x, ptr %y) {
120+
; CHECK-LABEL: mul_sext_splat_v8i16:
121+
; CHECK: // %bb.0: // %entry
122+
; CHECK-NEXT: ldr d0, [x0]
123+
; CHECK-NEXT: mov x8, xzr
124+
; CHECK-NEXT: dup v1.8b, v0.b[3]
125+
; CHECK-NEXT: movi v0.2d, #0000000000000000
126+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
127+
; CHECK-NEXT: .LBB3_1: // %l1
128+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
129+
; CHECK-NEXT: ldr d2, [x1, x8]
130+
; CHECK-NEXT: add x8, x8, #4
131+
; CHECK-NEXT: cmp w8, #4
132+
; CHECK-NEXT: sshll v2.8h, v2.8b, #0
133+
; CHECK-NEXT: mla v0.8h, v2.8h, v1.8h
134+
; CHECK-NEXT: b.eq .LBB3_1
135+
; CHECK-NEXT: // %bb.2: // %l2
136+
; CHECK-NEXT: ret
137+
entry:
138+
%x.val = load <8 x i8>, ptr %x
139+
%x.spt = shufflevector <8 x i8> %x.val, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
140+
%a = sext <8 x i8> %x.spt to <8 x i16>
141+
br label %l1
142+
143+
l1:
144+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
145+
%q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
146+
%y.idx = mul nuw nsw i32 %p, 4
147+
%y.ptr = getelementptr i8, ptr %y, i32 %y.idx
148+
%y.val = load <8 x i8>, ptr %y.ptr
149+
%y.ext = sext <8 x i8> %y.val to <8 x i16>
150+
%b = mul <8 x i16> %y.ext, %a
151+
%c = add <8 x i16> %q, %b
152+
%pa = add i32 %p, 1
153+
%c1 = icmp eq i32 %p, 0
154+
br i1 %c1, label %l1, label %l2
155+
156+
l2:
157+
ret <8 x i16> %c
158+
}
159+
160+
define <4 x i32> @mul_sext_splat_v4i32(ptr %x, ptr %y) {
161+
; CHECK-LABEL: mul_sext_splat_v4i32:
162+
; CHECK: // %bb.0: // %entry
163+
; CHECK-NEXT: ldr d0, [x0]
164+
; CHECK-NEXT: mov x8, xzr
165+
; CHECK-NEXT: dup v1.4h, v0.h[3]
166+
; CHECK-NEXT: movi v0.2d, #0000000000000000
167+
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
168+
; CHECK-NEXT: .LBB4_1: // %l1
169+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
170+
; CHECK-NEXT: ldr d2, [x1, x8]
171+
; CHECK-NEXT: add x8, x8, #8
172+
; CHECK-NEXT: cmp w8, #8
173+
; CHECK-NEXT: sshll v2.4s, v2.4h, #0
174+
; CHECK-NEXT: mla v0.4s, v2.4s, v1.4s
175+
; CHECK-NEXT: b.eq .LBB4_1
176+
; CHECK-NEXT: // %bb.2: // %l2
177+
; CHECK-NEXT: ret
178+
entry:
179+
%x.val = load <4 x i16>, ptr %x
180+
%x.spt = shufflevector <4 x i16> %x.val, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
181+
%a = sext <4 x i16> %x.spt to <4 x i32>
182+
br label %l1
183+
184+
l1:
185+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
186+
%q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
187+
%y.idx = mul nuw nsw i32 %p, 4
188+
%y.ptr = getelementptr i16, ptr %y, i32 %y.idx
189+
%y.val = load <4 x i16>, ptr %y.ptr
190+
%y.ext = sext <4 x i16> %y.val to <4 x i32>
191+
%b = mul <4 x i32> %y.ext, %a
192+
%c = add <4 x i32> %q, %b
193+
%pa = add i32 %p, 1
194+
%c1 = icmp eq i32 %p, 0
195+
br i1 %c1, label %l1, label %l2
196+
197+
l2:
198+
ret <4 x i32> %c
199+
}
200+
201+
define <2 x i64> @mul_sext_splat_v2i64(ptr %x, ptr %y) {
202+
; CHECK-LABEL: mul_sext_splat_v2i64:
203+
; CHECK: // %bb.0: // %entry
204+
; CHECK-NEXT: ldr d0, [x0]
205+
; CHECK-NEXT: mov x8, xzr
206+
; CHECK-NEXT: dup v0.2s, v0.s[1]
207+
; CHECK-NEXT: sshll v1.2d, v0.2s, #0
208+
; CHECK-NEXT: movi v0.2d, #0000000000000000
209+
; CHECK-NEXT: mov x9, v1.d[1]
210+
; CHECK-NEXT: fmov x10, d1
211+
; CHECK-NEXT: .LBB5_1: // %l1
212+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
213+
; CHECK-NEXT: ldr d1, [x1, x8]
214+
; CHECK-NEXT: add x8, x8, #16
215+
; CHECK-NEXT: cmp w8, #16
216+
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
217+
; CHECK-NEXT: fmov x12, d1
218+
; CHECK-NEXT: mov x11, v1.d[1]
219+
; CHECK-NEXT: smull x12, w12, w10
220+
; CHECK-NEXT: smull x11, w11, w9
221+
; CHECK-NEXT: fmov d1, x12
222+
; CHECK-NEXT: mov v1.d[1], x11
223+
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
224+
; CHECK-NEXT: b.eq .LBB5_1
225+
; CHECK-NEXT: // %bb.2: // %l2
226+
; CHECK-NEXT: ret
227+
entry:
228+
%x.val = load <2 x i32>, ptr %x
229+
%x.spt = shufflevector <2 x i32> %x.val, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
230+
%a = sext <2 x i32> %x.spt to <2 x i64>
231+
br label %l1
232+
233+
l1:
234+
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
235+
%q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
236+
%y.idx = mul nuw nsw i32 %p, 4
237+
%y.ptr = getelementptr i32, ptr %y, i32 %y.idx
238+
%y.val = load <2 x i32>, ptr %y.ptr
239+
%y.ext = sext <2 x i32> %y.val to <2 x i64>
240+
%b = mul <2 x i64> %y.ext, %a
241+
%c = add <2 x i64> %q, %b
242+
%pa = add i32 %p, 1
243+
%c1 = icmp eq i32 %p, 0
244+
br i1 %c1, label %l1, label %l2
245+
246+
l2:
247+
ret <2 x i64> %c
248+
}

0 commit comments

Comments
 (0)