Skip to content

Commit 62302b5

Browse files
davemgreenkcloudy0717
authored andcommitted
[AArch64] Add tests for umulh. NFC
1 parent b12c8fc commit 62302b5

File tree

2 files changed

+535
-20
lines changed

2 files changed

+535
-20
lines changed

llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll

Lines changed: 268 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
33

44
;
55
; SMULH
66
;
77

8-
define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
8+
define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
99
; CHECK-LABEL: smulh_i8:
1010
; CHECK: // %bb.0:
1111
; CHECK-NEXT: ptrue p0.b
@@ -19,7 +19,7 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
1919
ret <vscale x 16 x i8> %tr
2020
}
2121

22-
define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
22+
define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2323
; CHECK-LABEL: smulh_i16:
2424
; CHECK: // %bb.0:
2525
; CHECK-NEXT: ptrue p0.h
@@ -33,7 +33,7 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
3333
ret <vscale x 8 x i16> %tr
3434
}
3535

36-
define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
36+
define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
3737
; CHECK-LABEL: smulh_i32:
3838
; CHECK: // %bb.0:
3939
; CHECK-NEXT: ptrue p0.s
@@ -47,7 +47,7 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
4747
ret <vscale x 4 x i32> %tr
4848
}
4949

50-
define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
50+
define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
5151
; CHECK-LABEL: smulh_i64:
5252
; CHECK: // %bb.0:
5353
; CHECK-NEXT: ptrue p0.d
@@ -65,7 +65,7 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
6565
; UMULH
6666
;
6767

68-
define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
68+
define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
6969
; CHECK-LABEL: umulh_i8:
7070
; CHECK: // %bb.0:
7171
; CHECK-NEXT: ptrue p0.b
@@ -79,7 +79,7 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
7979
ret <vscale x 16 x i8> %tr
8080
}
8181

82-
define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
82+
define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
8383
; CHECK-LABEL: umulh_i16:
8484
; CHECK: // %bb.0:
8585
; CHECK-NEXT: ptrue p0.h
@@ -93,7 +93,7 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
9393
ret <vscale x 8 x i16> %tr
9494
}
9595

96-
define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
96+
define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
9797
; CHECK-LABEL: umulh_i32:
9898
; CHECK: // %bb.0:
9999
; CHECK-NEXT: ptrue p0.s
@@ -107,7 +107,7 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
107107
ret <vscale x 4 x i32> %tr
108108
}
109109

110-
define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
110+
define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
111111
; CHECK-LABEL: umulh_i64:
112112
; CHECK: // %bb.0:
113113
; CHECK-NEXT: ptrue p0.d
@@ -121,4 +121,262 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
121121
ret <vscale x 2 x i64> %tr
122122
}
123123

124-
attributes #0 = { "target-features"="+sve" }
124+
125+
; Fixed-length 128bits
126+
127+
define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
128+
; CHECK-LABEL: smulh_v16i8:
129+
; CHECK: // %bb.0:
130+
; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
131+
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
132+
; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
133+
; CHECK-NEXT: ret
134+
%1 = sext <16 x i8> %a to <16 x i16>
135+
%2 = sext <16 x i8> %b to <16 x i16>
136+
%mul = mul <16 x i16> %1, %2
137+
%shr = lshr <16 x i16> %mul, splat(i16 8)
138+
%tr = trunc <16 x i16> %shr to <16 x i8>
139+
ret <16 x i8> %tr
140+
}
141+
142+
define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
143+
; CHECK-LABEL: smulh_v8i16:
144+
; CHECK: // %bb.0:
145+
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
146+
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
147+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
148+
; CHECK-NEXT: ret
149+
%1 = sext <8 x i16> %a to <8 x i32>
150+
%2 = sext <8 x i16> %b to <8 x i32>
151+
%mul = mul <8 x i32> %1, %2
152+
%shr = lshr <8 x i32> %mul, splat(i32 16)
153+
%tr = trunc <8 x i32> %shr to <8 x i16>
154+
ret <8 x i16> %tr
155+
}
156+
157+
define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
158+
; CHECK-LABEL: smulh_v4i32:
159+
; CHECK: // %bb.0:
160+
; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
161+
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
162+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
163+
; CHECK-NEXT: ret
164+
%1 = sext <4 x i32> %a to <4 x i64>
165+
%2 = sext <4 x i32> %b to <4 x i64>
166+
%mul = mul <4 x i64> %1, %2
167+
%shr = lshr <4 x i64> %mul, splat(i64 32)
168+
%tr = trunc <4 x i64> %shr to <4 x i32>
169+
ret <4 x i32> %tr
170+
}
171+
172+
define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
173+
; CHECK-LABEL: smulh_v2i64:
174+
; CHECK: // %bb.0:
175+
; CHECK-NEXT: mov x8, v0.d[1]
176+
; CHECK-NEXT: mov x9, v1.d[1]
177+
; CHECK-NEXT: fmov x10, d0
178+
; CHECK-NEXT: fmov x11, d1
179+
; CHECK-NEXT: smulh x10, x10, x11
180+
; CHECK-NEXT: smulh x8, x8, x9
181+
; CHECK-NEXT: fmov d0, x10
182+
; CHECK-NEXT: fmov d1, x8
183+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
184+
; CHECK-NEXT: ret
185+
%1 = sext <2 x i64> %a to <2 x i128>
186+
%2 = sext <2 x i64> %b to <2 x i128>
187+
%mul = mul <2 x i128> %1, %2
188+
%shr = lshr <2 x i128> %mul, splat(i128 64)
189+
%tr = trunc <2 x i128> %shr to <2 x i64>
190+
ret <2 x i64> %tr
191+
}
192+
193+
define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
194+
; CHECK-LABEL: umulh_v16i8:
195+
; CHECK: // %bb.0:
196+
; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b
197+
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
198+
; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
199+
; CHECK-NEXT: ret
200+
%1 = zext <16 x i8> %a to <16 x i16>
201+
%2 = zext <16 x i8> %b to <16 x i16>
202+
%mul = mul <16 x i16> %1, %2
203+
%shr = lshr <16 x i16> %mul, splat(i16 8)
204+
%tr = trunc <16 x i16> %shr to <16 x i8>
205+
ret <16 x i8> %tr
206+
}
207+
208+
define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
209+
; CHECK-LABEL: umulh_v8i16:
210+
; CHECK: // %bb.0:
211+
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
212+
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
213+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
214+
; CHECK-NEXT: ret
215+
%1 = zext <8 x i16> %a to <8 x i32>
216+
%2 = zext <8 x i16> %b to <8 x i32>
217+
%mul = mul <8 x i32> %1, %2
218+
%shr = lshr <8 x i32> %mul, splat(i32 16)
219+
%tr = trunc <8 x i32> %shr to <8 x i16>
220+
ret <8 x i16> %tr
221+
}
222+
223+
define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
224+
; CHECK-LABEL: umulh_v4i32:
225+
; CHECK: // %bb.0:
226+
; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
227+
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
228+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
229+
; CHECK-NEXT: ret
230+
%1 = zext <4 x i32> %a to <4 x i64>
231+
%2 = zext <4 x i32> %b to <4 x i64>
232+
%mul = mul <4 x i64> %1, %2
233+
%shr = lshr <4 x i64> %mul, splat(i64 32)
234+
%tr = trunc <4 x i64> %shr to <4 x i32>
235+
ret <4 x i32> %tr
236+
}
237+
238+
define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
239+
; CHECK-LABEL: umulh_v2i64:
240+
; CHECK: // %bb.0:
241+
; CHECK-NEXT: mov x8, v0.d[1]
242+
; CHECK-NEXT: mov x9, v1.d[1]
243+
; CHECK-NEXT: fmov x10, d0
244+
; CHECK-NEXT: fmov x11, d1
245+
; CHECK-NEXT: umulh x10, x10, x11
246+
; CHECK-NEXT: umulh x8, x8, x9
247+
; CHECK-NEXT: fmov d0, x10
248+
; CHECK-NEXT: fmov d1, x8
249+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
250+
; CHECK-NEXT: ret
251+
%1 = zext <2 x i64> %a to <2 x i128>
252+
%2 = zext <2 x i64> %b to <2 x i128>
253+
%mul = mul <2 x i128> %1, %2
254+
%shr = lshr <2 x i128> %mul, splat(i128 64)
255+
%tr = trunc <2 x i128> %shr to <2 x i64>
256+
ret <2 x i64> %tr
257+
}
258+
259+
260+
261+
; Fixed-length 64bits
262+
263+
define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
264+
; CHECK-LABEL: smulh_v8i8:
265+
; CHECK: // %bb.0:
266+
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
267+
; CHECK-NEXT: shrn v0.8b, v0.8h, #8
268+
; CHECK-NEXT: ret
269+
%1 = sext <8 x i8> %a to <8 x i16>
270+
%2 = sext <8 x i8> %b to <8 x i16>
271+
%mul = mul <8 x i16> %1, %2
272+
%shr = lshr <8 x i16> %mul, splat(i16 8)
273+
%tr = trunc <8 x i16> %shr to <8 x i8>
274+
ret <8 x i8> %tr
275+
}
276+
277+
define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
278+
; CHECK-LABEL: smulh_v4i16:
279+
; CHECK: // %bb.0:
280+
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
281+
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
282+
; CHECK-NEXT: ret
283+
%1 = sext <4 x i16> %a to <4 x i32>
284+
%2 = sext <4 x i16> %b to <4 x i32>
285+
%mul = mul <4 x i32> %1, %2
286+
%shr = lshr <4 x i32> %mul, splat(i32 16)
287+
%tr = trunc <4 x i32> %shr to <4 x i16>
288+
ret <4 x i16> %tr
289+
}
290+
291+
define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
292+
; CHECK-LABEL: smulh_v2i32:
293+
; CHECK: // %bb.0:
294+
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
295+
; CHECK-NEXT: shrn v0.2s, v0.2d, #32
296+
; CHECK-NEXT: ret
297+
%1 = sext <2 x i32> %a to <2 x i64>
298+
%2 = sext <2 x i32> %b to <2 x i64>
299+
%mul = mul <2 x i64> %1, %2
300+
%shr = lshr <2 x i64> %mul, splat(i64 32)
301+
%tr = trunc <2 x i64> %shr to <2 x i32>
302+
ret <2 x i32> %tr
303+
}
304+
305+
define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
306+
; CHECK-LABEL: smulh_v1i64:
307+
; CHECK: // %bb.0:
308+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
309+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
310+
; CHECK-NEXT: fmov x8, d0
311+
; CHECK-NEXT: fmov x9, d1
312+
; CHECK-NEXT: smulh x8, x8, x9
313+
; CHECK-NEXT: fmov d0, x8
314+
; CHECK-NEXT: ret
315+
%1 = sext <1 x i64> %a to <1 x i128>
316+
%2 = sext <1 x i64> %b to <1 x i128>
317+
%mul = mul <1 x i128> %1, %2
318+
%shr = lshr <1 x i128> %mul, splat(i128 64)
319+
%tr = trunc <1 x i128> %shr to <1 x i64>
320+
ret <1 x i64> %tr
321+
}
322+
323+
define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
324+
; CHECK-LABEL: umulh_v8i8:
325+
; CHECK: // %bb.0:
326+
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
327+
; CHECK-NEXT: shrn v0.8b, v0.8h, #8
328+
; CHECK-NEXT: ret
329+
%1 = zext <8 x i8> %a to <8 x i16>
330+
%2 = zext <8 x i8> %b to <8 x i16>
331+
%mul = mul <8 x i16> %1, %2
332+
%shr = lshr <8 x i16> %mul, splat(i16 8)
333+
%tr = trunc <8 x i16> %shr to <8 x i8>
334+
ret <8 x i8> %tr
335+
}
336+
337+
define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
338+
; CHECK-LABEL: umulh_v4i16:
339+
; CHECK: // %bb.0:
340+
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
341+
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
342+
; CHECK-NEXT: ret
343+
%1 = zext <4 x i16> %a to <4 x i32>
344+
%2 = zext <4 x i16> %b to <4 x i32>
345+
%mul = mul <4 x i32> %1, %2
346+
%shr = lshr <4 x i32> %mul, splat(i32 16)
347+
%tr = trunc <4 x i32> %shr to <4 x i16>
348+
ret <4 x i16> %tr
349+
}
350+
351+
define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
352+
; CHECK-LABEL: umulh_v2i32:
353+
; CHECK: // %bb.0:
354+
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
355+
; CHECK-NEXT: shrn v0.2s, v0.2d, #32
356+
; CHECK-NEXT: ret
357+
%1 = zext <2 x i32> %a to <2 x i64>
358+
%2 = zext <2 x i32> %b to <2 x i64>
359+
%mul = mul <2 x i64> %1, %2
360+
%shr = lshr <2 x i64> %mul, splat(i64 32)
361+
%tr = trunc <2 x i64> %shr to <2 x i32>
362+
ret <2 x i32> %tr
363+
}
364+
365+
define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
366+
; CHECK-LABEL: umulh_v1i64:
367+
; CHECK: // %bb.0:
368+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
369+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
370+
; CHECK-NEXT: fmov x8, d0
371+
; CHECK-NEXT: fmov x9, d1
372+
; CHECK-NEXT: umulh x8, x8, x9
373+
; CHECK-NEXT: fmov d0, x8
374+
; CHECK-NEXT: ret
375+
%1 = zext <1 x i64> %a to <1 x i128>
376+
%2 = zext <1 x i64> %b to <1 x i128>
377+
%mul = mul <1 x i128> %1, %2
378+
%shr = lshr <1 x i128> %mul, splat(i128 64)
379+
%tr = trunc <1 x i128> %shr to <1 x i64>
380+
ret <1 x i64> %tr
381+
}
382+

0 commit comments

Comments
 (0)