Skip to content

Commit 5e31eef

Browse files
committed
[AArch64] Add test coverage for some aba/abal cases (NFC)
1 parent 8edb5b4 commit 5e31eef

File tree

1 file changed

+262
-0
lines changed

1 file changed

+262
-0
lines changed

llvm/test/CodeGen/AArch64/neon-saba.ll

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,268 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
174174
ret <8 x i8> %add
175175
}
176176

177+
; SABA from ADD(SABD(X, ZEROS))
178+
179+
define <4 x i32> @saba_sabd_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
180+
; CHECK-SD-LABEL: saba_sabd_zeros_4s:
181+
; CHECK-SD: // %bb.0:
182+
; CHECK-SD-NEXT: abs v1.4s, v1.4s
183+
; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
184+
; CHECK-SD-NEXT: ret
185+
;
186+
; CHECK-GI-LABEL: saba_sabd_zeros_4s:
187+
; CHECK-GI: // %bb.0:
188+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
189+
; CHECK-GI-NEXT: saba v0.4s, v1.4s, v2.4s
190+
; CHECK-GI-NEXT: ret
191+
%sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> zeroinitializer)
192+
%add = add <4 x i32> %sabd, %a
193+
ret <4 x i32> %add
194+
}
195+
196+
define <2 x i32> @saba_sabd_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
197+
; CHECK-SD-LABEL: saba_sabd_zeros_2s:
198+
; CHECK-SD: // %bb.0:
199+
; CHECK-SD-NEXT: abs v1.2s, v1.2s
200+
; CHECK-SD-NEXT: add v0.2s, v1.2s, v0.2s
201+
; CHECK-SD-NEXT: ret
202+
;
203+
; CHECK-GI-LABEL: saba_sabd_zeros_2s:
204+
; CHECK-GI: // %bb.0:
205+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
206+
; CHECK-GI-NEXT: saba v0.2s, v1.2s, v2.2s
207+
; CHECK-GI-NEXT: ret
208+
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
209+
%add = add <2 x i32> %sabd, %a
210+
ret <2 x i32> %add
211+
}
212+
213+
define <8 x i16> @saba_sabd_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
214+
; CHECK-SD-LABEL: saba_sabd_zeros_8h:
215+
; CHECK-SD: // %bb.0:
216+
; CHECK-SD-NEXT: abs v1.8h, v1.8h
217+
; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
218+
; CHECK-SD-NEXT: ret
219+
;
220+
; CHECK-GI-LABEL: saba_sabd_zeros_8h:
221+
; CHECK-GI: // %bb.0:
222+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
223+
; CHECK-GI-NEXT: saba v0.8h, v1.8h, v2.8h
224+
; CHECK-GI-NEXT: ret
225+
%sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> zeroinitializer)
226+
%add = add <8 x i16> %sabd, %a
227+
ret <8 x i16> %add
228+
}
229+
230+
define <4 x i16> @saba_sabd_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
231+
; CHECK-SD-LABEL: saba_sabd_zeros_4h:
232+
; CHECK-SD: // %bb.0:
233+
; CHECK-SD-NEXT: abs v1.4h, v1.4h
234+
; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
235+
; CHECK-SD-NEXT: ret
236+
;
237+
; CHECK-GI-LABEL: saba_sabd_zeros_4h:
238+
; CHECK-GI: // %bb.0:
239+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
240+
; CHECK-GI-NEXT: saba v0.4h, v1.4h, v2.4h
241+
; CHECK-GI-NEXT: ret
242+
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
243+
%add = add <4 x i16> %sabd, %a
244+
ret <4 x i16> %add
245+
}
246+
247+
define <16 x i8> @saba_sabd_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
248+
; CHECK-SD-LABEL: saba_sabd_zeros_16b:
249+
; CHECK-SD: // %bb.0:
250+
; CHECK-SD-NEXT: abs v1.16b, v1.16b
251+
; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b
252+
; CHECK-SD-NEXT: ret
253+
;
254+
; CHECK-GI-LABEL: saba_sabd_zeros_16b:
255+
; CHECK-GI: // %bb.0:
256+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
257+
; CHECK-GI-NEXT: saba v0.16b, v1.16b, v2.16b
258+
; CHECK-GI-NEXT: ret
259+
%sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> zeroinitializer)
260+
%add = add <16 x i8> %sabd, %a
261+
ret <16 x i8> %add
262+
}
263+
264+
define <8 x i8> @saba_sabd_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
265+
; CHECK-SD-LABEL: saba_sabd_zeros_8b:
266+
; CHECK-SD: // %bb.0:
267+
; CHECK-SD-NEXT: abs v1.8b, v1.8b
268+
; CHECK-SD-NEXT: add v0.8b, v1.8b, v0.8b
269+
; CHECK-SD-NEXT: ret
270+
;
271+
; CHECK-GI-LABEL: saba_sabd_zeros_8b:
272+
; CHECK-GI: // %bb.0:
273+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
274+
; CHECK-GI-NEXT: saba v0.8b, v1.8b, v2.8b
275+
; CHECK-GI-NEXT: ret
276+
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
277+
%add = add <8 x i8> %sabd, %a
278+
ret <8 x i8> %add
279+
}
280+
281+
define <4 x i32> @saba_abs_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
282+
; CHECK-LABEL: saba_abs_zeros_4s:
283+
; CHECK: // %bb.0:
284+
; CHECK-NEXT: abs v1.4s, v1.4s
285+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
286+
; CHECK-NEXT: ret
287+
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %b, i1 true)
288+
%add = add <4 x i32> %a, %abs
289+
ret <4 x i32> %add
290+
}
291+
292+
define <2 x i32> @saba_abs_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
293+
; CHECK-LABEL: saba_abs_zeros_2s:
294+
; CHECK: // %bb.0:
295+
; CHECK-NEXT: abs v1.2s, v1.2s
296+
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
297+
; CHECK-NEXT: ret
298+
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
299+
%add = add <2 x i32> %a, %abs
300+
ret <2 x i32> %add
301+
}
302+
303+
define <8 x i16> @saba_abs_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
304+
; CHECK-LABEL: saba_abs_zeros_8h:
305+
; CHECK: // %bb.0:
306+
; CHECK-NEXT: abs v1.8h, v1.8h
307+
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
308+
; CHECK-NEXT: ret
309+
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %b, i1 true)
310+
%add = add <8 x i16> %a, %abs
311+
ret <8 x i16> %add
312+
}
313+
314+
define <4 x i16> @saba_abs_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
315+
; CHECK-LABEL: saba_abs_zeros_4h:
316+
; CHECK: // %bb.0:
317+
; CHECK-NEXT: abs v1.4h, v1.4h
318+
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
319+
; CHECK-NEXT: ret
320+
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
321+
%add = add <4 x i16> %a, %abs
322+
ret <4 x i16> %add
323+
}
324+
325+
define <16 x i8> @saba_abs_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
326+
; CHECK-LABEL: saba_abs_zeros_16b:
327+
; CHECK: // %bb.0:
328+
; CHECK-NEXT: abs v1.16b, v1.16b
329+
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
330+
; CHECK-NEXT: ret
331+
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %b, i1 true)
332+
%add = add <16 x i8> %a, %abs
333+
ret <16 x i8> %add
334+
}
335+
336+
define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
337+
; CHECK-LABEL: saba_abs_zeros_8b:
338+
; CHECK: // %bb.0:
339+
; CHECK-NEXT: abs v1.8b, v1.8b
340+
; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
341+
; CHECK-NEXT: ret
342+
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
343+
%add = add <8 x i8> %a, %abs
344+
ret <8 x i8> %add
345+
}
346+
347+
; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
348+
349+
define <2 x i64> @sabal_sabd_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
350+
; CHECK-SD-LABEL: sabal_sabd_zeros_2s:
351+
; CHECK-SD: // %bb.0:
352+
; CHECK-SD-NEXT: abs v1.2s, v1.2s
353+
; CHECK-SD-NEXT: uaddw v0.2d, v0.2d, v1.2s
354+
; CHECK-SD-NEXT: ret
355+
;
356+
; CHECK-GI-LABEL: sabal_sabd_zeros_2s:
357+
; CHECK-GI: // %bb.0:
358+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
359+
; CHECK-GI-NEXT: sabal v0.2d, v1.2s, v2.2s
360+
; CHECK-GI-NEXT: ret
361+
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
362+
%sabd.zext = zext <2 x i32> %sabd to <2 x i64>
363+
%add = add <2 x i64> %sabd.zext, %a
364+
ret <2 x i64> %add
365+
}
366+
367+
define <4 x i32> @sabal_sabd_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
368+
; CHECK-SD-LABEL: sabal_sabd_zeros_4h:
369+
; CHECK-SD: // %bb.0:
370+
; CHECK-SD-NEXT: abs v1.4h, v1.4h
371+
; CHECK-SD-NEXT: uaddw v0.4s, v0.4s, v1.4h
372+
; CHECK-SD-NEXT: ret
373+
;
374+
; CHECK-GI-LABEL: sabal_sabd_zeros_4h:
375+
; CHECK-GI: // %bb.0:
376+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
377+
; CHECK-GI-NEXT: sabal v0.4s, v1.4h, v2.4h
378+
; CHECK-GI-NEXT: ret
379+
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
380+
%sabd.zext = zext <4 x i16> %sabd to <4 x i32>
381+
%add = add <4 x i32> %sabd.zext, %a
382+
ret <4 x i32> %add
383+
}
384+
385+
define <8 x i16> @sabal_sabd_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
386+
; CHECK-SD-LABEL: sabal_sabd_zeros_8b:
387+
; CHECK-SD: // %bb.0:
388+
; CHECK-SD-NEXT: abs v1.8b, v1.8b
389+
; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
390+
; CHECK-SD-NEXT: ret
391+
;
392+
; CHECK-GI-LABEL: sabal_sabd_zeros_8b:
393+
; CHECK-GI: // %bb.0:
394+
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
395+
; CHECK-GI-NEXT: sabal v0.8h, v1.8b, v2.8b
396+
; CHECK-GI-NEXT: ret
397+
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
398+
%sabd.zext = zext <8 x i8> %sabd to <8 x i16>
399+
%add = add <8 x i16> %sabd.zext, %a
400+
ret <8 x i16> %add
401+
}
402+
403+
define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
404+
; CHECK-LABEL: sabal_abs_zeros_2s:
405+
; CHECK: // %bb.0:
406+
; CHECK-NEXT: abs v1.2s, v1.2s
407+
; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
408+
; CHECK-NEXT: ret
409+
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
410+
%abs.zext = zext <2 x i32> %abs to <2 x i64>
411+
%add = add <2 x i64> %a, %abs.zext
412+
ret <2 x i64> %add
413+
}
414+
415+
define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
416+
; CHECK-LABEL: sabal_abs_zeros_4h:
417+
; CHECK: // %bb.0:
418+
; CHECK-NEXT: abs v1.4h, v1.4h
419+
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
420+
; CHECK-NEXT: ret
421+
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
422+
%abs.zext = zext <4 x i16> %abs to <4 x i32>
423+
%add = add <4 x i32> %a, %abs.zext
424+
ret <4 x i32> %add
425+
}
426+
427+
define <8 x i16> @sabal_abs_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
428+
; CHECK-LABEL: sabal_abs_zeros_8b:
429+
; CHECK: // %bb.0:
430+
; CHECK-NEXT: abs v1.8b, v1.8b
431+
; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
432+
; CHECK-NEXT: ret
433+
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
434+
%abs.zext = zext <8 x i8> %abs to <8 x i16>
435+
%add = add <8 x i16> %a, %abs.zext
436+
ret <8 x i16> %add
437+
}
438+
177439
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
178440
declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
179441
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)

0 commit comments

Comments
 (0)