@@ -396,50 +396,36 @@ define i64 @freeze_array() {
396396 ret i64 %t1
397397}
398398
399- define i32 @freeze_abdu (i8 %x , i8 %y ) {
399+ define < 8 x i16 > @freeze_abdu (< 8 x i16 > %a , < 8 x i16 > %b ) {
400400; CHECK-SD-LABEL: freeze_abdu:
401401; CHECK-SD: // %bb.0:
402- ; CHECK-SD-NEXT: and w8, w0, #0xff
403- ; CHECK-SD-NEXT: sub w8, w8, w1, uxtb
404- ; CHECK-SD-NEXT: cmp w8, #0
405- ; CHECK-SD-NEXT: cneg w0, w8, mi
402+ ; CHECK-SD-NEXT: uaba v0.8h, v0.8h, v1.8h
406403; CHECK-SD-NEXT: ret
407404;
408405; CHECK-GI-LABEL: freeze_abdu:
409406; CHECK-GI: // %bb.0:
410- ; CHECK-GI-NEXT: and w8, w0, #0xff
411- ; CHECK-GI-NEXT: sub w8, w8, w1, uxtb
412- ; CHECK-GI-NEXT: cmp w8, #0
413- ; CHECK-GI-NEXT: cneg w0, w8, le
407+ ; CHECK-GI-NEXT: uabd v1.8h, v0.8h, v1.8h
408+ ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
414409; CHECK-GI-NEXT: ret
415- %a = zext i8 %x to i32
416- %b = zext i8 %y to i32
417- %d = sub i32 %a , %b
418- %t = call i32 @llvm.abs.i32 (i32 %d , i1 false )
419- %f = freeze i32 %t
420- ret i32 %f
410+ %d = call <8 x i16 > @llvm.aarch64.neon.uabd.v8i16 (<8 x i16 > %a , <8 x i16 > %b )
411+ %f = freeze <8 x i16 > %d
412+ %r = add <8 x i16 > %a , %f
413+ ret <8 x i16 > %r
421414}
422415
423- define i32 @freeze_abds (i8 %x , i8 %y ) {
416+ define < 8 x i16 > @freeze_abds (< 8 x i16 > %a , < 8 x i16 > %b ) {
424417; CHECK-SD-LABEL: freeze_abds:
425418; CHECK-SD: // %bb.0:
426- ; CHECK-SD-NEXT: sxtb w8, w0
427- ; CHECK-SD-NEXT: sub w8, w8, w1, sxtb
428- ; CHECK-SD-NEXT: cmp w8, #0
429- ; CHECK-SD-NEXT: cneg w0, w8, mi
419+ ; CHECK-SD-NEXT: saba v0.8h, v0.8h, v1.8h
430420; CHECK-SD-NEXT: ret
431421;
432422; CHECK-GI-LABEL: freeze_abds:
433423; CHECK-GI: // %bb.0:
434- ; CHECK-GI-NEXT: sxtb w8, w0
435- ; CHECK-GI-NEXT: sub w8, w8, w1, sxtb
436- ; CHECK-GI-NEXT: cmp w8, #0
437- ; CHECK-GI-NEXT: cneg w0, w8, le
424+ ; CHECK-GI-NEXT: sabd v1.8h, v0.8h, v1.8h
425+ ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
438426; CHECK-GI-NEXT: ret
439- %a = sext i8 %x to i32
440- %b = sext i8 %y to i32
441- %d = sub i32 %a , %b
442- %abs = call i32 @llvm.abs.i32 (i32 %d , i1 true )
443- %f = freeze i32 %abs
444- ret i32 %f
427+ %d = call <8 x i16 > @llvm.aarch64.neon.sabd.v8i16 (<8 x i16 > %a , <8 x i16 > %b )
428+ %f = freeze <8 x i16 > %d
429+ %r = add <8 x i16 > %a , %f
430+ ret <8 x i16 > %r
445431}
0 commit comments