@@ -174,6 +174,268 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
174174 ret <8 x i8 > %add
175175}
176176
177+ ; SABA from ADD(SABD(X, ZEROS))
178+
179+ define <4 x i32 > @saba_sabd_zeros_4s (<4 x i32 > %a , <4 x i32 > %b ) #0 {
180+ ; CHECK-SD-LABEL: saba_sabd_zeros_4s:
181+ ; CHECK-SD: // %bb.0:
182+ ; CHECK-SD-NEXT: abs v1.4s, v1.4s
183+ ; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
184+ ; CHECK-SD-NEXT: ret
185+ ;
186+ ; CHECK-GI-LABEL: saba_sabd_zeros_4s:
187+ ; CHECK-GI: // %bb.0:
188+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
189+ ; CHECK-GI-NEXT: saba v0.4s, v1.4s, v2.4s
190+ ; CHECK-GI-NEXT: ret
191+ %sabd = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %b , <4 x i32 > zeroinitializer )
192+ %add = add <4 x i32 > %sabd , %a
193+ ret <4 x i32 > %add
194+ }
195+
196+ define <2 x i32 > @saba_sabd_zeros_2s (<2 x i32 > %a , <2 x i32 > %b ) #0 {
197+ ; CHECK-SD-LABEL: saba_sabd_zeros_2s:
198+ ; CHECK-SD: // %bb.0:
199+ ; CHECK-SD-NEXT: abs v1.2s, v1.2s
200+ ; CHECK-SD-NEXT: add v0.2s, v1.2s, v0.2s
201+ ; CHECK-SD-NEXT: ret
202+ ;
203+ ; CHECK-GI-LABEL: saba_sabd_zeros_2s:
204+ ; CHECK-GI: // %bb.0:
205+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
206+ ; CHECK-GI-NEXT: saba v0.2s, v1.2s, v2.2s
207+ ; CHECK-GI-NEXT: ret
208+ %sabd = call <2 x i32 > @llvm.aarch64.neon.sabd.v2i32 (<2 x i32 > %b , <2 x i32 > zeroinitializer )
209+ %add = add <2 x i32 > %sabd , %a
210+ ret <2 x i32 > %add
211+ }
212+
213+ define <8 x i16 > @saba_sabd_zeros_8h (<8 x i16 > %a , <8 x i16 > %b ) #0 {
214+ ; CHECK-SD-LABEL: saba_sabd_zeros_8h:
215+ ; CHECK-SD: // %bb.0:
216+ ; CHECK-SD-NEXT: abs v1.8h, v1.8h
217+ ; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
218+ ; CHECK-SD-NEXT: ret
219+ ;
220+ ; CHECK-GI-LABEL: saba_sabd_zeros_8h:
221+ ; CHECK-GI: // %bb.0:
222+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
223+ ; CHECK-GI-NEXT: saba v0.8h, v1.8h, v2.8h
224+ ; CHECK-GI-NEXT: ret
225+ %sabd = call <8 x i16 > @llvm.aarch64.neon.sabd.v8i16 (<8 x i16 > %b , <8 x i16 > zeroinitializer )
226+ %add = add <8 x i16 > %sabd , %a
227+ ret <8 x i16 > %add
228+ }
229+
230+ define <4 x i16 > @saba_sabd_zeros_4h (<4 x i16 > %a , <4 x i16 > %b ) #0 {
231+ ; CHECK-SD-LABEL: saba_sabd_zeros_4h:
232+ ; CHECK-SD: // %bb.0:
233+ ; CHECK-SD-NEXT: abs v1.4h, v1.4h
234+ ; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
235+ ; CHECK-SD-NEXT: ret
236+ ;
237+ ; CHECK-GI-LABEL: saba_sabd_zeros_4h:
238+ ; CHECK-GI: // %bb.0:
239+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
240+ ; CHECK-GI-NEXT: saba v0.4h, v1.4h, v2.4h
241+ ; CHECK-GI-NEXT: ret
242+ %sabd = call <4 x i16 > @llvm.aarch64.neon.sabd.v4i16 (<4 x i16 > %b , <4 x i16 > zeroinitializer )
243+ %add = add <4 x i16 > %sabd , %a
244+ ret <4 x i16 > %add
245+ }
246+
247+ define <16 x i8 > @saba_sabd_zeros_16b (<16 x i8 > %a , <16 x i8 > %b ) #0 {
248+ ; CHECK-SD-LABEL: saba_sabd_zeros_16b:
249+ ; CHECK-SD: // %bb.0:
250+ ; CHECK-SD-NEXT: abs v1.16b, v1.16b
251+ ; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b
252+ ; CHECK-SD-NEXT: ret
253+ ;
254+ ; CHECK-GI-LABEL: saba_sabd_zeros_16b:
255+ ; CHECK-GI: // %bb.0:
256+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
257+ ; CHECK-GI-NEXT: saba v0.16b, v1.16b, v2.16b
258+ ; CHECK-GI-NEXT: ret
259+ %sabd = call <16 x i8 > @llvm.aarch64.neon.sabd.v16i8 (<16 x i8 > %b , <16 x i8 > zeroinitializer )
260+ %add = add <16 x i8 > %sabd , %a
261+ ret <16 x i8 > %add
262+ }
263+
264+ define <8 x i8 > @saba_sabd_zeros_8b (<8 x i8 > %a , <8 x i8 > %b ) #0 {
265+ ; CHECK-SD-LABEL: saba_sabd_zeros_8b:
266+ ; CHECK-SD: // %bb.0:
267+ ; CHECK-SD-NEXT: abs v1.8b, v1.8b
268+ ; CHECK-SD-NEXT: add v0.8b, v1.8b, v0.8b
269+ ; CHECK-SD-NEXT: ret
270+ ;
271+ ; CHECK-GI-LABEL: saba_sabd_zeros_8b:
272+ ; CHECK-GI: // %bb.0:
273+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
274+ ; CHECK-GI-NEXT: saba v0.8b, v1.8b, v2.8b
275+ ; CHECK-GI-NEXT: ret
276+ %sabd = call <8 x i8 > @llvm.aarch64.neon.sabd.v8i8 (<8 x i8 > %b , <8 x i8 > zeroinitializer )
277+ %add = add <8 x i8 > %sabd , %a
278+ ret <8 x i8 > %add
279+ }
280+
281+ define <4 x i32 > @saba_abs_zeros_4s (<4 x i32 > %a , <4 x i32 > %b ) #0 {
282+ ; CHECK-LABEL: saba_abs_zeros_4s:
283+ ; CHECK: // %bb.0:
284+ ; CHECK-NEXT: abs v1.4s, v1.4s
285+ ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
286+ ; CHECK-NEXT: ret
287+ %abs = call <4 x i32 > @llvm.abs.v4i32 (<4 x i32 > %b , i1 true )
288+ %add = add <4 x i32 > %a , %abs
289+ ret <4 x i32 > %add
290+ }
291+
292+ define <2 x i32 > @saba_abs_zeros_2s (<2 x i32 > %a , <2 x i32 > %b ) #0 {
293+ ; CHECK-LABEL: saba_abs_zeros_2s:
294+ ; CHECK: // %bb.0:
295+ ; CHECK-NEXT: abs v1.2s, v1.2s
296+ ; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
297+ ; CHECK-NEXT: ret
298+ %abs = call <2 x i32 > @llvm.abs.v2i32 (<2 x i32 > %b , i1 true )
299+ %add = add <2 x i32 > %a , %abs
300+ ret <2 x i32 > %add
301+ }
302+
303+ define <8 x i16 > @saba_abs_zeros_8h (<8 x i16 > %a , <8 x i16 > %b ) #0 {
304+ ; CHECK-LABEL: saba_abs_zeros_8h:
305+ ; CHECK: // %bb.0:
306+ ; CHECK-NEXT: abs v1.8h, v1.8h
307+ ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
308+ ; CHECK-NEXT: ret
309+ %abs = call <8 x i16 > @llvm.abs.v8i16 (<8 x i16 > %b , i1 true )
310+ %add = add <8 x i16 > %a , %abs
311+ ret <8 x i16 > %add
312+ }
313+
314+ define <4 x i16 > @saba_abs_zeros_4h (<4 x i16 > %a , <4 x i16 > %b ) #0 {
315+ ; CHECK-LABEL: saba_abs_zeros_4h:
316+ ; CHECK: // %bb.0:
317+ ; CHECK-NEXT: abs v1.4h, v1.4h
318+ ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
319+ ; CHECK-NEXT: ret
320+ %abs = call <4 x i16 > @llvm.abs.v4i16 (<4 x i16 > %b , i1 true )
321+ %add = add <4 x i16 > %a , %abs
322+ ret <4 x i16 > %add
323+ }
324+
325+ define <16 x i8 > @saba_abs_zeros_16b (<16 x i8 > %a , <16 x i8 > %b ) #0 {
326+ ; CHECK-LABEL: saba_abs_zeros_16b:
327+ ; CHECK: // %bb.0:
328+ ; CHECK-NEXT: abs v1.16b, v1.16b
329+ ; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
330+ ; CHECK-NEXT: ret
331+ %abs = call <16 x i8 > @llvm.abs.v16i8 (<16 x i8 > %b , i1 true )
332+ %add = add <16 x i8 > %a , %abs
333+ ret <16 x i8 > %add
334+ }
335+
336+ define <8 x i8 > @saba_abs_zeros_8b (<8 x i8 > %a , <8 x i8 > %b ) #0 {
337+ ; CHECK-LABEL: saba_abs_zeros_8b:
338+ ; CHECK: // %bb.0:
339+ ; CHECK-NEXT: abs v1.8b, v1.8b
340+ ; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
341+ ; CHECK-NEXT: ret
342+ %abs = call <8 x i8 > @llvm.abs.v8i8 (<8 x i8 > %b , i1 true )
343+ %add = add <8 x i8 > %a , %abs
344+ ret <8 x i8 > %add
345+ }
346+
347+ ; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
348+
349+ define <2 x i64 > @sabal_sabd_zeros_2s (<2 x i64 > %a , <2 x i32 > %b ) #0 {
350+ ; CHECK-SD-LABEL: sabal_sabd_zeros_2s:
351+ ; CHECK-SD: // %bb.0:
352+ ; CHECK-SD-NEXT: abs v1.2s, v1.2s
353+ ; CHECK-SD-NEXT: uaddw v0.2d, v0.2d, v1.2s
354+ ; CHECK-SD-NEXT: ret
355+ ;
356+ ; CHECK-GI-LABEL: sabal_sabd_zeros_2s:
357+ ; CHECK-GI: // %bb.0:
358+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
359+ ; CHECK-GI-NEXT: sabal v0.2d, v1.2s, v2.2s
360+ ; CHECK-GI-NEXT: ret
361+ %sabd = call <2 x i32 > @llvm.aarch64.neon.sabd.v2i32 (<2 x i32 > %b , <2 x i32 > zeroinitializer )
362+ %sabd.zext = zext <2 x i32 > %sabd to <2 x i64 >
363+ %add = add <2 x i64 > %sabd.zext , %a
364+ ret <2 x i64 > %add
365+ }
366+
367+ define <4 x i32 > @sabal_sabd_zeros_4h (<4 x i32 > %a , <4 x i16 > %b ) #0 {
368+ ; CHECK-SD-LABEL: sabal_sabd_zeros_4h:
369+ ; CHECK-SD: // %bb.0:
370+ ; CHECK-SD-NEXT: abs v1.4h, v1.4h
371+ ; CHECK-SD-NEXT: uaddw v0.4s, v0.4s, v1.4h
372+ ; CHECK-SD-NEXT: ret
373+ ;
374+ ; CHECK-GI-LABEL: sabal_sabd_zeros_4h:
375+ ; CHECK-GI: // %bb.0:
376+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
377+ ; CHECK-GI-NEXT: sabal v0.4s, v1.4h, v2.4h
378+ ; CHECK-GI-NEXT: ret
379+ %sabd = call <4 x i16 > @llvm.aarch64.neon.sabd.v4i16 (<4 x i16 > %b , <4 x i16 > zeroinitializer )
380+ %sabd.zext = zext <4 x i16 > %sabd to <4 x i32 >
381+ %add = add <4 x i32 > %sabd.zext , %a
382+ ret <4 x i32 > %add
383+ }
384+
385+ define <8 x i16 > @sabal_sabd_zeros_8b (<8 x i16 > %a , <8 x i8 > %b ) #0 {
386+ ; CHECK-SD-LABEL: sabal_sabd_zeros_8b:
387+ ; CHECK-SD: // %bb.0:
388+ ; CHECK-SD-NEXT: abs v1.8b, v1.8b
389+ ; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
390+ ; CHECK-SD-NEXT: ret
391+ ;
392+ ; CHECK-GI-LABEL: sabal_sabd_zeros_8b:
393+ ; CHECK-GI: // %bb.0:
394+ ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
395+ ; CHECK-GI-NEXT: sabal v0.8h, v1.8b, v2.8b
396+ ; CHECK-GI-NEXT: ret
397+ %sabd = call <8 x i8 > @llvm.aarch64.neon.sabd.v8i8 (<8 x i8 > %b , <8 x i8 > zeroinitializer )
398+ %sabd.zext = zext <8 x i8 > %sabd to <8 x i16 >
399+ %add = add <8 x i16 > %sabd.zext , %a
400+ ret <8 x i16 > %add
401+ }
402+
403+ define <2 x i64 > @sabal_abs_zeros_2s (<2 x i64 > %a , <2 x i32 > %b ) #0 {
404+ ; CHECK-LABEL: sabal_abs_zeros_2s:
405+ ; CHECK: // %bb.0:
406+ ; CHECK-NEXT: abs v1.2s, v1.2s
407+ ; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
408+ ; CHECK-NEXT: ret
409+ %abs = call <2 x i32 > @llvm.abs.v2i32 (<2 x i32 > %b , i1 true )
410+ %abs.zext = zext <2 x i32 > %abs to <2 x i64 >
411+ %add = add <2 x i64 > %a , %abs.zext
412+ ret <2 x i64 > %add
413+ }
414+
415+ define <4 x i32 > @sabal_abs_zeros_4h (<4 x i32 > %a , <4 x i16 > %b ) #0 {
416+ ; CHECK-LABEL: sabal_abs_zeros_4h:
417+ ; CHECK: // %bb.0:
418+ ; CHECK-NEXT: abs v1.4h, v1.4h
419+ ; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
420+ ; CHECK-NEXT: ret
421+ %abs = call <4 x i16 > @llvm.abs.v4i16 (<4 x i16 > %b , i1 true )
422+ %abs.zext = zext <4 x i16 > %abs to <4 x i32 >
423+ %add = add <4 x i32 > %a , %abs.zext
424+ ret <4 x i32 > %add
425+ }
426+
427+ define <8 x i16 > @sabal_abs_zeros_8b (<8 x i16 > %a , <8 x i8 > %b ) #0 {
428+ ; CHECK-LABEL: sabal_abs_zeros_8b:
429+ ; CHECK: // %bb.0:
430+ ; CHECK-NEXT: abs v1.8b, v1.8b
431+ ; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
432+ ; CHECK-NEXT: ret
433+ %abs = call <8 x i8 > @llvm.abs.v8i8 (<8 x i8 > %b , i1 true )
434+ %abs.zext = zext <8 x i8 > %abs to <8 x i16 >
435+ %add = add <8 x i16 > %a , %abs.zext
436+ ret <8 x i16 > %add
437+ }
438+
177439declare <4 x i32 > @llvm.abs.v4i32 (<4 x i32 >, i1 )
178440declare <2 x i32 > @llvm.abs.v2i32 (<2 x i32 >, i1 )
179441declare <8 x i16 > @llvm.abs.v8i16 (<8 x i16 >, i1 )
0 commit comments