diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 634914d3b3fd0..55238a2819363 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -311,6 +311,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -386,6 +390,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index c851b1b6f5eb7..2e8e11155c5fa 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1998,6 +1998,12 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), defm : PatXrXr; defm : PatXrXrU; +// XVSADD_{B/H/W/D}[U], XVSSUB_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXr; +defm : PatXrXrU; +defm : PatXrXrU; + // Vector mask set by condition def : Pat<(loongarch_xvmskltz (v32i8 LASX256:$vj)), (PseudoXVMSKLTZ_B LASX256:$vj)>; def : Pat<(loongarch_xvmskltz (v16i16 LASX256:$vj)), (PseudoXVMSKLTZ_H LASX256:$vj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index fe7c47543424b..5421bba0424bf 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2155,6 +2155,12 @@ def : Pat<(f64 f64imm_vldi:$in), defm : PatVrVr; defm : PatVrVrU; +// VSADD_{B/H/W/D}[U], VSSUB_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVr; +defm : PatVrVrU; +defm : PatVrVrU; + // Vector mask set by condition def : Pat<(loongarch_vmskltz (v16i8 LSX128:$vj)), (PseudoVMSKLTZ_B LSX128:$vj)>; def : Pat<(loongarch_vmskltz (v8i16 LSX128:$vj)), (PseudoVMSKLTZ_H LSX128:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sadd-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sadd-sat.ll index ecd4ee7a62016..1802838305ed5 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sadd-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sadd-sat.ll @@ -1,17 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s define <32 x i8> @xvsadd_b(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: xvsadd_b: ; CHECK: # %bb.0: -; CHECK-NEXT: xvadd.b $xr2, $xr0, $xr1 -; CHECK-NEXT: xvslt.b $xr0, $xr2, $xr0 -; CHECK-NEXT: xvslti.b $xr1, $xr1, 0 -; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrai.b $xr1, $xr2, 7 -; CHECK-NEXT: xvbitrevi.b $xr1, $xr1, 7 -; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 +; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) ret <32 x i8> %ret @@ -20,13 +14,7 @@ define <32 x i8> @xvsadd_b(<32 x i8> %a, <32 x i8> %b) { define <16 x i16> @xvsadd_h(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: xvsadd_h: ; CHECK: # %bb.0: -; CHECK-NEXT: xvadd.h $xr2, $xr0, $xr1 -; CHECK-NEXT: xvslt.h $xr0, $xr2, $xr0 -; CHECK-NEXT: xvslti.h $xr1, $xr1, 0 -; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrai.h $xr1, $xr2, 15 -; CHECK-NEXT: xvbitrevi.h $xr1, $xr1, 15 -; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 +; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) ret <16 x i16> %ret @@ -35,42 +23,17 @@ define <16 x i16> @xvsadd_h(<16 x i16> %a, <16 x i16> %b) { define <8 x i32> @xvsadd_w(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: xvsadd_w: ; CHECK: # %bb.0: -; CHECK-NEXT: xvadd.w $xr2, $xr0, $xr1 -; CHECK-NEXT: xvslt.w $xr0, $xr2, $xr0 -; CHECK-NEXT: xvslti.w $xr1, $xr1, 0 -; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrai.w $xr1, $xr2, 31 -; CHECK-NEXT: xvbitrevi.w $xr1, $xr1, 31 -; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 +; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %a, <8 x i32> %b) ret <8 x i32> %ret } define <4 x i64> @xvsadd_d(<4 x i64> %a, <4 x i64> %b) { -; LA32-LABEL: xvsadd_d: -; LA32: # %bb.0: -; LA32-NEXT: xvadd.d $xr2, $xr0, $xr1 -; LA32-NEXT: xvslt.d $xr0, $xr2, $xr0 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA32-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI3_0) -; LA32-NEXT: xvslti.d $xr1, $xr1, 0 -; LA32-NEXT: xvxor.v $xr0, $xr1, $xr0 -; LA32-NEXT: xvsrai.d $xr1, $xr2, 63 -; LA32-NEXT: xvxor.v $xr1, $xr1, $xr3 -; LA32-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvsadd_d: -; LA64: # %bb.0: -; LA64-NEXT: xvadd.d $xr2, $xr0, $xr1 -; LA64-NEXT: xvslt.d $xr0, $xr2, $xr0 -; LA64-NEXT: xvslti.d $xr1, $xr1, 0 -; LA64-NEXT: xvxor.v $xr0, $xr1, $xr0 -; LA64-NEXT: xvsrai.d $xr1, $xr2, 63 -; LA64-NEXT: xvbitrevi.d $xr1, $xr1, 63 -; LA64-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 -; LA64-NEXT: ret +; CHECK-LABEL: xvsadd_d: +; CHECK: # %bb.0: +; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret %ret = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %ret } diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ssub-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ssub-sat.ll index 765fdb71f0642..5497c4cb913bc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ssub-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ssub-sat.ll @@ -1,18 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s define <32 x i8> @xvssub_b(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: xvssub_b: ; CHECK: # %bb.0: -; CHECK-NEXT: xvrepli.b $xr2, 0 -; CHECK-NEXT: xvslt.b $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsub.b $xr1, $xr0, $xr1 -; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 -; CHECK-NEXT: xvxor.v $xr0, $xr2, $xr0 -; CHECK-NEXT: xvsrai.b $xr2, $xr1, 7 -; CHECK-NEXT: xvbitrevi.b $xr2, $xr2, 7 -; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0 +; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) ret <32 x i8> %ret @@ -21,14 +14,7 @@ define <32 x i8> @xvssub_b(<32 x i8> %a, <32 x i8> %b) { define <16 x i16> @xvssub_h(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: xvssub_h: ; CHECK: # %bb.0: -; CHECK-NEXT: xvrepli.b $xr2, 0 -; CHECK-NEXT: xvslt.h $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsub.h $xr1, $xr0, $xr1 -; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 -; CHECK-NEXT: xvxor.v $xr0, $xr2, $xr0 -; CHECK-NEXT: xvsrai.h $xr2, $xr1, 15 -; CHECK-NEXT: xvbitrevi.h $xr2, $xr2, 15 -; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0 +; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) ret <16 x i16> %ret @@ -37,45 +23,17 @@ define <16 x i16> @xvssub_h(<16 x i16> %a, <16 x i16> %b) { define <8 x i32> @xvssub_w(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: xvssub_w: ; CHECK: # %bb.0: -; CHECK-NEXT: xvrepli.b $xr2, 0 -; CHECK-NEXT: xvslt.w $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsub.w $xr1, $xr0, $xr1 -; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 -; CHECK-NEXT: xvxor.v $xr0, $xr2, $xr0 -; CHECK-NEXT: xvsrai.w $xr2, $xr1, 31 -; CHECK-NEXT: xvbitrevi.w $xr2, $xr2, 31 -; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0 +; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %a, <8 x i32> %b) ret <8 x i32> %ret } define <4 x i64> @xvssub_d(<4 x i64> %a, <4 x i64> %b) { -; LA32-LABEL: xvssub_d: -; LA32: # %bb.0: -; LA32-NEXT: xvrepli.b $xr2, 0 -; LA32-NEXT: xvslt.d $xr2, $xr2, $xr1 -; LA32-NEXT: xvsub.d $xr1, $xr0, $xr1 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA32-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI3_0) -; LA32-NEXT: xvslt.d $xr0, $xr1, $xr0 -; LA32-NEXT: xvxor.v $xr0, $xr2, $xr0 -; LA32-NEXT: xvsrai.d $xr2, $xr1, 63 -; LA32-NEXT: xvxor.v $xr2, $xr2, $xr3 -; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvssub_d: -; LA64: # %bb.0: -; LA64-NEXT: xvrepli.b $xr2, 0 -; LA64-NEXT: xvslt.d $xr2, $xr2, $xr1 -; LA64-NEXT: xvsub.d $xr1, $xr0, $xr1 -; LA64-NEXT: xvslt.d $xr0, $xr1, $xr0 -; LA64-NEXT: xvxor.v $xr0, $xr2, $xr0 -; LA64-NEXT: xvsrai.d $xr2, $xr1, 63 -; LA64-NEXT: xvbitrevi.d $xr2, $xr2, 63 -; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0 -; LA64-NEXT: ret +; CHECK-LABEL: xvssub_d: +; CHECK: # %bb.0: +; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret %ret = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %ret } diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uadd-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uadd-sat.ll index f0317218d3b1d..6943c9188ada9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uadd-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uadd-sat.ll @@ -5,9 +5,7 @@ define <32 x i8> @xvuadd_b(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: xvuadd_b: ; CHECK: # %bb.0: -; CHECK-NEXT: xvxori.b $xr2, $xr1, 255 -; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr2 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) ret <32 x i8> %ret @@ -16,10 +14,7 @@ define <32 x i8> @xvuadd_b(<32 x i8> %a, <32 x i8> %b) { define <16 x i16> @xvuadd_h(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: xvuadd_h: ; CHECK: # %bb.0: -; CHECK-NEXT: xvrepli.b $xr2, -1 -; CHECK-NEXT: xvxor.v $xr2, $xr1, $xr2 -; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr2 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) ret <16 x i16> %ret @@ -28,10 +23,7 @@ define <16 x i16> @xvuadd_h(<16 x i16> %a, <16 x i16> %b) { define <8 x i32> @xvuadd_w(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: xvuadd_w: ; CHECK: # %bb.0: -; CHECK-NEXT: xvrepli.b $xr2, -1 -; CHECK-NEXT: xvxor.v $xr2, $xr1, $xr2 -; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr2 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %a, <8 x i32> %b) ret <8 x i32> %ret @@ -40,10 +32,7 @@ define <8 x i32> @xvuadd_w(<8 x i32> %a, <8 x i32> %b) { define <4 x i64> @xvuadd_d(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: xvuadd_d: ; CHECK: # %bb.0: -; CHECK-NEXT: xvrepli.b $xr2, -1 -; CHECK-NEXT: xvxor.v $xr2, $xr1, $xr2 -; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr2 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %ret diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/usub-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/usub-sat.ll index fe7d477204be7..9c0ff46179e5a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/usub-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/usub-sat.ll @@ -5,8 +5,7 @@ define <32 x i8> @xvusub_b(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: xvusub_b: ; CHECK: # %bb.0: -; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) ret <32 x i8> %ret @@ -15,8 +14,7 @@ define <32 x i8> @xvusub_b(<32 x i8> %a, <32 x i8> %b) { define <16 x i16> @xvusub_h(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: xvusub_h: ; CHECK: # %bb.0: -; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) ret <16 x i16> %ret @@ -25,8 +23,7 @@ define <16 x i16> @xvusub_h(<16 x i16> %a, <16 x i16> %b) { define <8 x i32> @xvusub_w(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: xvusub_w: ; CHECK: # %bb.0: -; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %a, <8 x i32> %b) ret <8 x i32> %ret @@ -35,8 +32,7 @@ define <8 x i32> @xvusub_w(<8 x i32> %a, <8 x i32> %b) { define <4 x i64> @xvusub_d(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: xvusub_d: ; CHECK: # %bb.0: -; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %ret = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %ret diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sadd-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sadd-sat.ll index dd5842fcbec65..5871b4c497e50 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sadd-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sadd-sat.ll @@ -1,17 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s define <16 x i8> @vsadd_b(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vsadd_b: ; CHECK: # %bb.0: -; CHECK-NEXT: vadd.b $vr2, $vr0, $vr1 -; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.b $vr1, $vr1, 0 -; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrai.b $vr1, $vr2, 7 -; CHECK-NEXT: vbitrevi.b $vr1, $vr1, 7 -; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 +; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %ret @@ -20,13 +14,7 @@ define <16 x i8> @vsadd_b(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @vsadd_h(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vsadd_h: ; CHECK: # %bb.0: -; CHECK-NEXT: vadd.h $vr2, $vr0, $vr1 -; CHECK-NEXT: vslt.h $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.h $vr1, $vr1, 0 -; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrai.h $vr1, $vr2, 15 -; CHECK-NEXT: vbitrevi.h $vr1, $vr1, 15 -; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 +; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %ret @@ -35,42 +23,17 @@ define <8 x i16> @vsadd_h(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @vsadd_w(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vsadd_w: ; CHECK: # %bb.0: -; CHECK-NEXT: vadd.w $vr2, $vr0, $vr1 -; CHECK-NEXT: vslt.w $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.w $vr1, $vr1, 0 -; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrai.w $vr1, $vr2, 31 -; CHECK-NEXT: vbitrevi.w $vr1, $vr1, 31 -; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 +; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %ret } define <2 x i64> @vsadd_d(<2 x i64> %a, <2 x i64> %b) { -; LA32-LABEL: vsadd_d: -; LA32: # %bb.0: -; LA32-NEXT: vadd.d $vr2, $vr0, $vr1 -; LA32-NEXT: vslt.d $vr0, $vr2, $vr0 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA32-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI3_0) -; LA32-NEXT: vslti.d $vr1, $vr1, 0 -; LA32-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA32-NEXT: vsrai.d $vr1, $vr2, 63 -; LA32-NEXT: vxor.v $vr1, $vr1, $vr3 -; LA32-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 -; LA32-NEXT: ret -; -; LA64-LABEL: vsadd_d: -; LA64: # %bb.0: -; LA64-NEXT: vadd.d $vr2, $vr0, $vr1 -; LA64-NEXT: vslt.d $vr0, $vr2, $vr0 -; LA64-NEXT: vslti.d $vr1, $vr1, 0 -; LA64-NEXT: vxor.v $vr0, $vr1, $vr0 -; LA64-NEXT: vsrai.d $vr1, $vr2, 63 -; LA64-NEXT: vbitrevi.d $vr1, $vr1, 63 -; LA64-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 -; LA64-NEXT: ret +; CHECK-LABEL: vsadd_d: +; CHECK: # %bb.0: +; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret %ret = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %ret } diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ssub-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ssub-sat.ll index e330b7e43b42e..4ae52f30bb7cd 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ssub-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ssub-sat.ll @@ -1,18 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s define <16 x i8> @vssub_b(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vssub_b: ; CHECK: # %bb.0: -; CHECK-NEXT: vrepli.b $vr2, 0 -; CHECK-NEXT: vslt.b $vr2, $vr2, $vr1 -; CHECK-NEXT: vsub.b $vr1, $vr0, $vr1 -; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vxor.v $vr0, $vr2, $vr0 -; CHECK-NEXT: vsrai.b $vr2, $vr1, 7 -; CHECK-NEXT: vbitrevi.b $vr2, $vr2, 7 -; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0 +; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %ret @@ -21,14 +14,7 @@ define <16 x i8> @vssub_b(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @vssub_h(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vssub_h: ; CHECK: # %bb.0: -; CHECK-NEXT: vrepli.b $vr2, 0 -; CHECK-NEXT: vslt.h $vr2, $vr2, $vr1 -; CHECK-NEXT: vsub.h $vr1, $vr0, $vr1 -; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vxor.v $vr0, $vr2, $vr0 -; CHECK-NEXT: vsrai.h $vr2, $vr1, 15 -; CHECK-NEXT: vbitrevi.h $vr2, $vr2, 15 -; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0 +; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %ret @@ -37,45 +23,17 @@ define <8 x i16> @vssub_h(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @vssub_w(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vssub_w: ; CHECK: # %bb.0: -; CHECK-NEXT: vrepli.b $vr2, 0 -; CHECK-NEXT: vslt.w $vr2, $vr2, $vr1 -; CHECK-NEXT: vsub.w $vr1, $vr0, $vr1 -; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vxor.v $vr0, $vr2, $vr0 -; CHECK-NEXT: vsrai.w $vr2, $vr1, 31 -; CHECK-NEXT: vbitrevi.w $vr2, $vr2, 31 -; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0 +; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %ret } define <2 x i64> @vssub_d(<2 x i64> %a, <2 x i64> %b) { -; LA32-LABEL: vssub_d: -; LA32: # %bb.0: -; LA32-NEXT: vrepli.b $vr2, 0 -; LA32-NEXT: vslt.d $vr2, $vr2, $vr1 -; LA32-NEXT: vsub.d $vr1, $vr0, $vr1 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; LA32-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI3_0) -; LA32-NEXT: vslt.d $vr0, $vr1, $vr0 -; LA32-NEXT: vxor.v $vr0, $vr2, $vr0 -; LA32-NEXT: vsrai.d $vr2, $vr1, 63 -; LA32-NEXT: vxor.v $vr2, $vr2, $vr3 -; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0 -; LA32-NEXT: ret -; -; LA64-LABEL: vssub_d: -; LA64: # %bb.0: -; LA64-NEXT: vrepli.b $vr2, 0 -; LA64-NEXT: vslt.d $vr2, $vr2, $vr1 -; LA64-NEXT: vsub.d $vr1, $vr0, $vr1 -; LA64-NEXT: vslt.d $vr0, $vr1, $vr0 -; LA64-NEXT: vxor.v $vr0, $vr2, $vr0 -; LA64-NEXT: vsrai.d $vr2, $vr1, 63 -; LA64-NEXT: vbitrevi.d $vr2, $vr2, 63 -; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0 -; LA64-NEXT: ret +; CHECK-LABEL: vssub_d: +; CHECK: # %bb.0: +; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret %ret = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %ret } diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uadd-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uadd-sat.ll index 1e8aa52451d47..faf1383257804 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uadd-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uadd-sat.ll @@ -5,9 +5,7 @@ define <16 x i8> @vuadd_b(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vuadd_b: ; CHECK: # %bb.0: -; CHECK-NEXT: vxori.b $vr2, $vr1, 255 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr2 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %ret @@ -16,10 +14,7 @@ define <16 x i8> @vuadd_b(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @vuadd_h(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vuadd_h: ; CHECK: # %bb.0: -; CHECK-NEXT: vrepli.b $vr2, -1 -; CHECK-NEXT: vxor.v $vr2, $vr1, $vr2 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr2 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %ret @@ -28,10 +23,7 @@ define <8 x i16> @vuadd_h(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @vuadd_w(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vuadd_w: ; CHECK: # %bb.0: -; CHECK-NEXT: vrepli.b $vr2, -1 -; CHECK-NEXT: vxor.v $vr2, $vr1, $vr2 -; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr2 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %ret @@ -40,10 +32,7 @@ define <4 x i32> @vuadd_w(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @vuadd_d(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: vuadd_d: ; CHECK: # %bb.0: -; CHECK-NEXT: vrepli.b $vr2, -1 -; CHECK-NEXT: vxor.v $vr2, $vr1, $vr2 -; CHECK-NEXT: vmin.du $vr0, $vr0, $vr2 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %ret diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/usub-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/usub-sat.ll index 3ee1b2b8996d7..59de967fd288a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/usub-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/usub-sat.ll @@ -5,8 +5,7 @@ define <16 x i8> @vusub_b(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vusub_b: ; CHECK: # %bb.0: -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %ret @@ -15,8 +14,7 @@ define <16 x i8> @vusub_b(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @vusub_h(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vusub_h: ; CHECK: # %bb.0: -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %ret @@ -25,8 +23,7 @@ define <8 x i16> @vusub_h(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @vusub_w(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vusub_w: ; CHECK: # %bb.0: -; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 -; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %ret @@ -35,8 +32,7 @@ define <4 x i32> @vusub_w(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @vusub_d(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: vusub_d: ; CHECK: # %bb.0: -; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 -; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 ; CHECK-NEXT: ret %ret = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %ret