@@ -12,9 +12,9 @@ define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
1212;
1313; CHECK-GI-LABEL: saba_abs_4s:
1414; CHECK-GI: // %bb.0:
15+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
1516; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
16- ; CHECK-GI-NEXT: abs v1.4s, v1.4s
17- ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
17+ ; CHECK-GI-NEXT: saba v0.4s, v1.4s, v3.4s
1818; CHECK-GI-NEXT: ret
1919 %sub = sub nsw <4 x i32 > %b , %c
2020 %abs = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %sub , i1 true )
@@ -30,9 +30,9 @@ define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
3030;
3131; CHECK-GI-LABEL: saba_abs_2s:
3232; CHECK-GI: // %bb.0:
33+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
3334; CHECK-GI-NEXT: sub v1.2s, v1.2s, v2.2s
34- ; CHECK-GI-NEXT: abs v1.2s, v1.2s
35- ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
35+ ; CHECK-GI-NEXT: saba v0.2s, v1.2s, v3.2s
3636; CHECK-GI-NEXT: ret
3737 %sub = sub nsw <2 x i32 > %b , %c
3838 %abs = call <2 x i32 > @llvm.abs.v2i32 (<2 x i32 > %sub , i1 true )
@@ -48,9 +48,9 @@ define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
4848;
4949; CHECK-GI-LABEL: saba_abs_8h:
5050; CHECK-GI: // %bb.0:
51+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
5152; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
52- ; CHECK-GI-NEXT: abs v1.8h, v1.8h
53- ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
53+ ; CHECK-GI-NEXT: saba v0.8h, v1.8h, v3.8h
5454; CHECK-GI-NEXT: ret
5555 %sub = sub nsw <8 x i16 > %b , %c
5656 %abs = call <8 x i16 > @llvm.abs.v8i16 (<8 x i16 > %sub , i1 true )
@@ -66,9 +66,9 @@ define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
6666;
6767; CHECK-GI-LABEL: saba_abs_4h:
6868; CHECK-GI: // %bb.0:
69+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
6970; CHECK-GI-NEXT: sub v1.4h, v1.4h, v2.4h
70- ; CHECK-GI-NEXT: abs v1.4h, v1.4h
71- ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
71+ ; CHECK-GI-NEXT: saba v0.4h, v1.4h, v3.4h
7272; CHECK-GI-NEXT: ret
7373 %sub = sub nsw <4 x i16 > %b , %c
7474 %abs = call <4 x i16 > @llvm.abs.v4i16 (<4 x i16 > %sub , i1 true )
@@ -84,9 +84,9 @@ define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8484;
8585; CHECK-GI-LABEL: saba_abs_16b:
8686; CHECK-GI: // %bb.0:
87+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
8788; CHECK-GI-NEXT: sub v1.16b, v1.16b, v2.16b
88- ; CHECK-GI-NEXT: abs v1.16b, v1.16b
89- ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
89+ ; CHECK-GI-NEXT: saba v0.16b, v1.16b, v3.16b
9090; CHECK-GI-NEXT: ret
9191 %sub = sub nsw <16 x i8 > %b , %c
9292 %abs = call <16 x i8 > @llvm.abs.v16i8 (<16 x i8 > %sub , i1 true )
@@ -102,9 +102,9 @@ define <8 x i8> @saba_abs_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
102102;
103103; CHECK-GI-LABEL: saba_abs_8b:
104104; CHECK-GI: // %bb.0:
105+ ; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
105106; CHECK-GI-NEXT: sub v1.8b, v1.8b, v2.8b
106- ; CHECK-GI-NEXT: abs v1.8b, v1.8b
107- ; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
107+ ; CHECK-GI-NEXT: saba v0.8b, v1.8b, v3.8b
108108; CHECK-GI-NEXT: ret
109109 %sub = sub nsw <8 x i8 > %b , %c
110110 %abs = call <8 x i8 > @llvm.abs.v8i8 (<8 x i8 > %sub , i1 true )
@@ -174,6 +174,214 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
174174 ret <8 x i8 > %add
175175}
176176
177+ ; SABA from ADD(SABD(X, ZEROS))
178+
179+ define <4 x i32 > @saba_sabd_zeros_4s (<4 x i32 > %a , <4 x i32 > %b ) #0 {
180+ ; CHECK-LABEL: saba_sabd_zeros_4s:
181+ ; CHECK: // %bb.0:
182+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
183+ ; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
184+ ; CHECK-NEXT: ret
185+ %sabd = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %b , <4 x i32 > zeroinitializer )
186+ %add = add <4 x i32 > %sabd , %a
187+ ret <4 x i32 > %add
188+ }
189+
190+ define <2 x i32 > @saba_sabd_zeros_2s (<2 x i32 > %a , <2 x i32 > %b ) #0 {
191+ ; CHECK-LABEL: saba_sabd_zeros_2s:
192+ ; CHECK: // %bb.0:
193+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
194+ ; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
195+ ; CHECK-NEXT: ret
196+ %sabd = call <2 x i32 > @llvm.aarch64.neon.sabd.v2i32 (<2 x i32 > %b , <2 x i32 > zeroinitializer )
197+ %add = add <2 x i32 > %sabd , %a
198+ ret <2 x i32 > %add
199+ }
200+
201+ define <8 x i16 > @saba_sabd_zeros_8h (<8 x i16 > %a , <8 x i16 > %b ) #0 {
202+ ; CHECK-LABEL: saba_sabd_zeros_8h:
203+ ; CHECK: // %bb.0:
204+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
205+ ; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
206+ ; CHECK-NEXT: ret
207+ %sabd = call <8 x i16 > @llvm.aarch64.neon.sabd.v8i16 (<8 x i16 > %b , <8 x i16 > zeroinitializer )
208+ %add = add <8 x i16 > %sabd , %a
209+ ret <8 x i16 > %add
210+ }
211+
212+ define <4 x i16 > @saba_sabd_zeros_4h (<4 x i16 > %a , <4 x i16 > %b ) #0 {
213+ ; CHECK-LABEL: saba_sabd_zeros_4h:
214+ ; CHECK: // %bb.0:
215+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
216+ ; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
217+ ; CHECK-NEXT: ret
218+ %sabd = call <4 x i16 > @llvm.aarch64.neon.sabd.v4i16 (<4 x i16 > %b , <4 x i16 > zeroinitializer )
219+ %add = add <4 x i16 > %sabd , %a
220+ ret <4 x i16 > %add
221+ }
222+
223+ define <16 x i8 > @saba_sabd_zeros_16b (<16 x i8 > %a , <16 x i8 > %b ) #0 {
224+ ; CHECK-LABEL: saba_sabd_zeros_16b:
225+ ; CHECK: // %bb.0:
226+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
227+ ; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
228+ ; CHECK-NEXT: ret
229+ %sabd = call <16 x i8 > @llvm.aarch64.neon.sabd.v16i8 (<16 x i8 > %b , <16 x i8 > zeroinitializer )
230+ %add = add <16 x i8 > %sabd , %a
231+ ret <16 x i8 > %add
232+ }
233+
234+ define <8 x i8 > @saba_sabd_zeros_8b (<8 x i8 > %a , <8 x i8 > %b ) #0 {
235+ ; CHECK-LABEL: saba_sabd_zeros_8b:
236+ ; CHECK: // %bb.0:
237+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
238+ ; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
239+ ; CHECK-NEXT: ret
240+ %sabd = call <8 x i8 > @llvm.aarch64.neon.sabd.v8i8 (<8 x i8 > %b , <8 x i8 > zeroinitializer )
241+ %add = add <8 x i8 > %sabd , %a
242+ ret <8 x i8 > %add
243+ }
244+
245+ define <4 x i32 > @saba_abs_zeros_4s (<4 x i32 > %a , <4 x i32 > %b ) #0 {
246+ ; CHECK-LABEL: saba_abs_zeros_4s:
247+ ; CHECK: // %bb.0:
248+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
249+ ; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
250+ ; CHECK-NEXT: ret
251+ %abs = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %b , i1 true )
252+ %add = add <4 x i32 > %a , %abs
253+ ret <4 x i32 > %add
254+ }
255+
256+ define <2 x i32 > @saba_abs_zeros_2s (<2 x i32 > %a , <2 x i32 > %b ) #0 {
257+ ; CHECK-LABEL: saba_abs_zeros_2s:
258+ ; CHECK: // %bb.0:
259+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
260+ ; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
261+ ; CHECK-NEXT: ret
262+ %abs = call <2 x i32 > @llvm.abs.v2i32 (<2 x i32 > %b , i1 true )
263+ %add = add <2 x i32 > %a , %abs
264+ ret <2 x i32 > %add
265+ }
266+
267+ define <8 x i16 > @saba_abs_zeros_8h (<8 x i16 > %a , <8 x i16 > %b ) #0 {
268+ ; CHECK-LABEL: saba_abs_zeros_8h:
269+ ; CHECK: // %bb.0:
270+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
271+ ; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
272+ ; CHECK-NEXT: ret
273+ %abs = call <8 x i16 > @llvm.abs.v8i16 (<8 x i16 > %b , i1 true )
274+ %add = add <8 x i16 > %a , %abs
275+ ret <8 x i16 > %add
276+ }
277+
278+ define <4 x i16 > @saba_abs_zeros_4h (<4 x i16 > %a , <4 x i16 > %b ) #0 {
279+ ; CHECK-LABEL: saba_abs_zeros_4h:
280+ ; CHECK: // %bb.0:
281+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
282+ ; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
283+ ; CHECK-NEXT: ret
284+ %abs = call <4 x i16 > @llvm.abs.v4i16 (<4 x i16 > %b , i1 true )
285+ %add = add <4 x i16 > %a , %abs
286+ ret <4 x i16 > %add
287+ }
288+
289+ define <16 x i8 > @saba_abs_zeros_16b (<16 x i8 > %a , <16 x i8 > %b ) #0 {
290+ ; CHECK-LABEL: saba_abs_zeros_16b:
291+ ; CHECK: // %bb.0:
292+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
293+ ; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
294+ ; CHECK-NEXT: ret
295+ %abs = call <16 x i8 > @llvm.abs.v16i8 (<16 x i8 > %b , i1 true )
296+ %add = add <16 x i8 > %a , %abs
297+ ret <16 x i8 > %add
298+ }
299+
300+ define <8 x i8 > @saba_abs_zeros_8b (<8 x i8 > %a , <8 x i8 > %b ) #0 {
301+ ; CHECK-LABEL: saba_abs_zeros_8b:
302+ ; CHECK: // %bb.0:
303+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
304+ ; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
305+ ; CHECK-NEXT: ret
306+ %abs = call <8 x i8 > @llvm.abs.v8i8 (<8 x i8 > %b , i1 true )
307+ %add = add <8 x i8 > %a , %abs
308+ ret <8 x i8 > %add
309+ }
310+
311+ ; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
312+
313+ define <2 x i64 > @sabal_sabd_zeros_2s (<2 x i64 > %a , <2 x i32 > %b ) #0 {
314+ ; CHECK-LABEL: sabal_sabd_zeros_2s:
315+ ; CHECK: // %bb.0:
316+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
317+ ; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
318+ ; CHECK-NEXT: ret
319+ %sabd = call <2 x i32 > @llvm.aarch64.neon.sabd.v2i32 (<2 x i32 > %b , <2 x i32 > zeroinitializer )
320+ %sabd.zext = zext <2 x i32 > %sabd to <2 x i64 >
321+ %add = add <2 x i64 > %sabd.zext , %a
322+ ret <2 x i64 > %add
323+ }
324+
325+ define <4 x i32 > @sabal_sabd_zeros_4h (<4 x i32 > %a , <4 x i16 > %b ) #0 {
326+ ; CHECK-LABEL: sabal_sabd_zeros_4h:
327+ ; CHECK: // %bb.0:
328+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
329+ ; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
330+ ; CHECK-NEXT: ret
331+ %sabd = call <4 x i16 > @llvm.aarch64.neon.sabd.v4i16 (<4 x i16 > %b , <4 x i16 > zeroinitializer )
332+ %sabd.zext = zext <4 x i16 > %sabd to <4 x i32 >
333+ %add = add <4 x i32 > %sabd.zext , %a
334+ ret <4 x i32 > %add
335+ }
336+
337+ define <8 x i16 > @sabal_sabd_zeros_8b (<8 x i16 > %a , <8 x i8 > %b ) #0 {
338+ ; CHECK-LABEL: sabal_sabd_zeros_8b:
339+ ; CHECK: // %bb.0:
340+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
341+ ; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
342+ ; CHECK-NEXT: ret
343+ %sabd = call <8 x i8 > @llvm.aarch64.neon.sabd.v8i8 (<8 x i8 > %b , <8 x i8 > zeroinitializer )
344+ %sabd.zext = zext <8 x i8 > %sabd to <8 x i16 >
345+ %add = add <8 x i16 > %sabd.zext , %a
346+ ret <8 x i16 > %add
347+ }
348+
349+ define <2 x i64 > @sabal_abs_zeros_2s (<2 x i64 > %a , <2 x i32 > %b ) #0 {
350+ ; CHECK-LABEL: sabal_abs_zeros_2s:
351+ ; CHECK: // %bb.0:
352+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
353+ ; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
354+ ; CHECK-NEXT: ret
355+ %abs = call <2 x i32 > @llvm.abs.v2i32 (<2 x i32 > %b , i1 true )
356+ %abs.zext = zext <2 x i32 > %abs to <2 x i64 >
357+ %add = add <2 x i64 > %a , %abs.zext
358+ ret <2 x i64 > %add
359+ }
360+
361+ define <4 x i32 > @sabal_abs_zeros_4h (<4 x i32 > %a , <4 x i16 > %b ) #0 {
362+ ; CHECK-LABEL: sabal_abs_zeros_4h:
363+ ; CHECK: // %bb.0:
364+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
365+ ; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
366+ ; CHECK-NEXT: ret
367+ %abs = call <4 x i16 > @llvm.abs.v4i16 (<4 x i16 > %b , i1 true )
368+ %abs.zext = zext <4 x i16 > %abs to <4 x i32 >
369+ %add = add <4 x i32 > %a , %abs.zext
370+ ret <4 x i32 > %add
371+ }
372+
373+ define <8 x i16 > @sabal_abs_zeros_8b (<8 x i16 > %a , <8 x i8 > %b ) #0 {
374+ ; CHECK-LABEL: sabal_abs_zeros_8b:
375+ ; CHECK: // %bb.0:
376+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
377+ ; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
378+ ; CHECK-NEXT: ret
379+ %abs = call <8 x i8 > @llvm.abs.v8i8 (<8 x i8 > %b , i1 true )
380+ %abs.zext = zext <8 x i8 > %abs to <8 x i16 >
381+ %add = add <8 x i16 > %a , %abs.zext
382+ ret <8 x i16 > %add
383+ }
384+
177385declare <4 x i32 > @llvm.abs.v4i32 (<4 x i32 >, i1 )
178386declare <2 x i32 > @llvm.abs.v2i32 (<2 x i32 >, i1 )
179387declare <8 x i16 > @llvm.abs.v8i16 (<8 x i16 >, i1 )
0 commit comments