diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32baa2d111270..3037234ebf4f5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -340,6 +340,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); setOperationAction(ISD::SADDSAT, VT, Legal); @@ -419,6 +420,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); setOperationAction(ISD::SADDSAT, VT, Legal); diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index adfe990ba1234..bbc0489620193 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2015,10 +2015,26 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)), sub_128)>; +// abs +def : Pat<(abs v32i8:$xj), (XVMAX_B v32i8:$xj, (XVNEG_B v32i8:$xj))>; +def : Pat<(abs v16i16:$xj), (XVMAX_H v16i16:$xj, (XVNEG_H v16i16:$xj))>; +def : Pat<(abs v8i32:$xj), (XVMAX_W v8i32:$xj, (XVNEG_W v8i32:$xj))>; +def : Pat<(abs v4i64:$xj), (XVMAX_D v4i64:$xj, (XVNEG_D v4i64:$xj))>; + // XVABSD_{B/H/W/D}[U] defm : PatXrXr; defm : PatXrXrU; +// XVADDA_{B/H/W/D} +def : Pat<(add (v32i8 (abs v32i8:$xj)), (v32i8 (abs v32i8:$xk))), + (XVADDA_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(add (v16i16 (abs v16i16:$xj)), (v16i16 (abs v16i16:$xk))), + (XVADDA_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(add (v8i32 (abs v8i32:$xj)), (v8i32 (abs v8i32:$xk))), + (XVADDA_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(add (v4i64 (abs v4i64:$xj)), (v4i64 (abs v4i64:$xk))), + (XVADDA_D v4i64:$xj, v4i64:$xk)>; + // XVSADD_{B/H/W/D}[U], XVSSUB_{B/H/W/D}[U] defm : PatXrXr; defm : PatXrXr; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index d99a57e562528..72029710c57fc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2153,10 +2153,26 @@ def : Pat<(f32 f32imm_vldi:$in), def : Pat<(f64 f64imm_vldi:$in), (f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>; +// abs +def : Pat<(abs v16i8:$vj), (VMAX_B v16i8:$vj, (VNEG_B v16i8:$vj))>; +def : Pat<(abs v8i16:$vj), (VMAX_H v8i16:$vj, (VNEG_H v8i16:$vj))>; +def : Pat<(abs v4i32:$vj), (VMAX_W v4i32:$vj, (VNEG_W v4i32:$vj))>; +def : Pat<(abs v2i64:$vj), (VMAX_D v2i64:$vj, (VNEG_D v2i64:$vj))>; + // VABSD_{B/H/W/D}[U] defm : PatVrVr; defm : PatVrVrU; +// VADDA_{B/H/W/D} +def : Pat<(add (v16i8 (abs v16i8:$vj)), (v16i8 (abs v16i8:$vk))), + (VADDA_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(add (v8i16 (abs v8i16:$vj)), (v8i16 (abs v8i16:$vk))), + (VADDA_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(add (v4i32 (abs v4i32:$vj)), (v4i32 (abs v4i32:$vk))), + (VADDA_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(add (v2i64 (abs v2i64:$vj)), (v2i64 (abs v2i64:$vk))), + (VADDA_D v2i64:$vj, v2i64:$vk)>; + // VSADD_{B/H/W/D}[U], VSSUB_{B/H/W/D}[U] defm : PatVrVr; defm : PatVrVr; diff --git a/llvm/test/CodeGen/LoongArch/lasx/abs.ll b/llvm/test/CodeGen/LoongArch/lasx/abs.ll new file mode 100644 index 0000000000000..e3b0d04d92d75 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/abs.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @vabs_b(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.b $xr1, $xr0 +; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <32 x i8>, ptr %src + %b = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 true) + store <32 x i8> %b, ptr %dst + ret void +} + +define void @vabs_b_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_b_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.b $xr1, $xr0 +; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <32 x i8>, ptr %src + %b = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 false) + store <32 x i8> %b, ptr %dst + ret void +} + +define void @vabs_h(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.h $xr1, $xr0 +; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <16 x i16>, ptr %src + %b = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 true) + store <16 x i16> %b, ptr %dst + ret void +} + +define void @vabs_h_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_h_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.h $xr1, $xr0 +; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <16 x i16>, ptr %src + %b = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 false) + store <16 x i16> %b, ptr %dst + ret void +} + +define void @vabs_w(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.w $xr1, $xr0 +; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x i32>, ptr %src + %b = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 true) + store <8 x i32> %b, ptr %dst + ret void +} + +define void @vabs_w_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_w_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.w $xr1, $xr0 +; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x i32>, ptr %src + %b = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 false) + store <8 x i32> %b, ptr %dst + ret void +} + +define void @vabs_d(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.d $xr1, $xr0 +; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %src + %b = tail call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 true) + store <4 x i64> %b, ptr %dst + ret void +} + +define void @vabs_d_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_d_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvneg.d $xr1, $xr0 +; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %src + %b = tail call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 false) + store <4 x i64> %b, ptr %dst + ret void +} + +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/adda.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/adda.ll index e66a15291fb18..98687755fcfb4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/adda.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/adda.ll @@ -7,11 +7,7 @@ define void @vadda_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvneg.b $xr2, $xr0 -; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr2 -; CHECK-NEXT: xvneg.b $xr2, $xr1 -; CHECK-NEXT: xvmax.b $xr1, $xr1, $xr2 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -33,11 +29,7 @@ define void @vadda_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvneg.h $xr2, $xr0 -; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr2 -; CHECK-NEXT: xvneg.h $xr2, $xr1 -; CHECK-NEXT: xvmax.h $xr1, $xr1, $xr2 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -59,11 +51,7 @@ define void @vadda_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvneg.w $xr2, $xr0 -; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr2 -; CHECK-NEXT: xvneg.w $xr2, $xr1 -; CHECK-NEXT: xvmax.w $xr1, $xr1, $xr2 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -85,11 +73,7 @@ define void @vadda_d(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvneg.d $xr2, $xr0 -; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr2 -; CHECK-NEXT: xvneg.d $xr2, $xr1 -; CHECK-NEXT: xvmax.d $xr1, $xr1, $xr2 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/abs.ll b/llvm/test/CodeGen/LoongArch/lsx/abs.ll new file mode 100644 index 0000000000000..85fe1fe5c0da7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/abs.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @vabs_b(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.b $vr1, $vr0 +; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <16 x i8>, ptr %src + %b = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 true) + store <16 x i8> %b, ptr %dst + ret void +} + +define void @vabs_b_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_b_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.b $vr1, $vr0 +; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <16 x i8>, ptr %src + %b = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 false) + store <16 x i8> %b, ptr %dst + ret void +} + +define void @vabs_h(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.h $vr1, $vr0 +; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x i16>, ptr %src + %b = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 true) + store <8 x i16> %b, ptr %dst + ret void +} + +define void @vabs_h_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_h_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.h $vr1, $vr0 +; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x i16>, ptr %src + %b = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 false) + store <8 x i16> %b, ptr %dst + ret void +} + +define void @vabs_w(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.w $vr1, $vr0 +; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i32>, ptr %src + %b = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 true) + store <4 x i32> %b, ptr %dst + ret void +} + +define void @vabs_w_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_w_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.w $vr1, $vr0 +; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i32>, ptr %src + %b = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 false) + store <4 x i32> %b, ptr %dst + ret void +} + +define void @vabs_d(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.d $vr1, $vr0 +; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %src + %b = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 true) + store <2 x i64> %b, ptr %dst + ret void +} + +define void @vabs_d_1(ptr %dst, ptr %src) { +; CHECK-LABEL: vabs_d_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vneg.d $vr1, $vr0 +; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %src + %b = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 false) + store <2 x i64> %b, ptr %dst + ret void +} + +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/adda.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/adda.ll index 2bd0b597d79ac..34f22e1f6bf45 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/adda.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/adda.ll @@ -7,11 +7,7 @@ define void @vadda_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vneg.b $vr2, $vr0 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr2 -; CHECK-NEXT: vneg.b $vr2, $vr1 -; CHECK-NEXT: vmax.b $vr1, $vr1, $vr2 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -33,11 +29,7 @@ define void @vadda_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vneg.h $vr2, $vr0 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr2 -; CHECK-NEXT: vneg.h $vr2, $vr1 -; CHECK-NEXT: vmax.h $vr1, $vr1, $vr2 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -59,11 +51,7 @@ define void @vadda_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vneg.w $vr2, $vr0 -; CHECK-NEXT: vmax.w $vr0, $vr0, $vr2 -; CHECK-NEXT: vneg.w $vr2, $vr1 -; CHECK-NEXT: vmax.w $vr1, $vr1, $vr2 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -85,11 +73,7 @@ define void @vadda_d(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vneg.d $vr2, $vr0 -; CHECK-NEXT: vmax.d $vr0, $vr0, $vr2 -; CHECK-NEXT: vneg.d $vr2, $vr1 -; CHECK-NEXT: vmax.d $vr1, $vr1, $vr2 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: