diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f7efd5f437fbb..5fdff55570031 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1535,7 +1535,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT, ISD::ABS, ISD::CTPOP, - ISD::VECTOR_SHUFFLE}); + ISD::VECTOR_SHUFFLE, ISD::VSELECT}); + if (Subtarget.hasVendorXTHeadMemPair()) setTargetDAGCombine({ISD::LOAD, ISD::STORE}); if (Subtarget.useRVVForFixedLengthVectors()) @@ -16874,6 +16875,53 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) { + if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse()) + return false; + + SwapCC = false; + if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) { + std::swap(TrueVal, FalseVal); + SwapCC = true; + } + + if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB) + return false; + + SDValue A = FalseVal.getOperand(0); + SDValue B = FalseVal.getOperand(1); + // Add is commutative, so check both orders + return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) || + (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B)); +} + +/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b). +/// This allows us match a vadd.vv fed by a masked vrsub, which reduces +/// register pressure over the add followed by masked vsub sequence. +static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue CC = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue FalseVal = N->getOperand(2); + + bool SwapCC; + if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC)) + return SDValue(); + + SDValue Sub = SwapCC ? TrueVal : FalseVal; + SDValue A = Sub.getOperand(0); + SDValue B = Sub.getOperand(1); + + // Arrange the select such that we can match a masked + // vrsub.vi to perform the conditional negate + SDValue NegB = DAG.getNegative(B, DL, VT); + if (!SwapCC) + CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0)); + SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B); + return DAG.getNode(ISD::ADD, DL, VT, A, NewB); +} + static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) @@ -17153,20 +17201,48 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT.getSimpleVT(), StridedLoad); } -/// Custom legalize or to . This runs -/// during the combine phase before type legalization, and relies on -/// DAGCombine not undoing the transform if isShuffleMaskLegal returns false -/// for the source mask. static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI) { SDLoc DL(N); EVT VT = N->getValueType(0); const unsigned ElementSize = VT.getScalarSizeInBits(); + const unsigned NumElts = VT.getVectorNumElements(); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); ArrayRef Mask = cast(N)->getMask(); + MVT XLenVT = Subtarget.getXLenVT(); + + // Recognized a disguised select of add/sub. + bool SwapCC; + if (ShuffleVectorInst::isSelectMask(Mask, NumElts) && + matchSelectAddSub(V1, V2, SwapCC)) { + SDValue Sub = SwapCC ? V1 : V2; + SDValue A = Sub.getOperand(0); + SDValue B = Sub.getOperand(1); + + SmallVector MaskVals; + for (int MaskIndex : Mask) { + bool SelectMaskVal = (MaskIndex < (int)NumElts); + MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); + } + assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); + EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); + SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals); + // Arrange the select such that we can match a masked + // vrsub.vi to perform the conditional negate + SDValue NegB = DAG.getNegative(B, DL, VT); + if (!SwapCC) + CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0)); + SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B); + return DAG.getNode(ISD::ADD, DL, VT, A, NewB); + } + + // Custom legalize or to . This runs + // during the combine phase before type legalization, and relies on + // DAGCombine not undoing the transform if isShuffleMaskLegal returns false + // for the source mask. if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() || !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 || VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT)) @@ -17183,7 +17259,6 @@ static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT, Res); } - static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -17857,6 +17932,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return performTRUNCATECombine(N, DAG, Subtarget); case ISD::SELECT: return performSELECTCombine(N, DAG, Subtarget); + case ISD::VSELECT: + return performVSELECTCombine(N, DAG); case RISCVISD::CZERO_EQZ: case RISCVISD::CZERO_NEZ: { SDValue Val = N->getOperand(0); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index ee9609992c049..318f38839851c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -9,9 +9,8 @@ define <1 x i32> @select_addsub_v1i32(<1 x i1> %cc, <1 x i32> %a, <1 x i32> %b) ; CHECK-LABEL: select_addsub_v1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <1 x i32> %a, %b %add = add <1 x i32> %a, %b @@ -23,9 +22,8 @@ define <2 x i32> @select_addsub_v2i32(<2 x i1> %cc, <2 x i32> %a, <2 x i32> %b) ; CHECK-LABEL: select_addsub_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <2 x i32> %a, %b %add = add <2 x i32> %a, %b @@ -37,9 +35,8 @@ define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) ; CHECK-LABEL: select_addsub_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b %add = add <4 x i32> %a, %b @@ -51,9 +48,9 @@ define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, ; CHECK-LABEL: select_addsub_v4i32_select_swapped: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vsub.vv v10, v8, v9 -; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b %add = add <4 x i32> %a, %b @@ -65,9 +62,8 @@ define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 ; CHECK-LABEL: select_addsub_v4i32_add_swapped: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v10, v9, v8 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b %add = add <4 x i32> %b, %a @@ -79,9 +75,9 @@ define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, < ; CHECK-LABEL: select_addsub_v4i32_both_swapped: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vsub.vv v10, v8, v9 -; CHECK-NEXT: vadd.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b %add = add <4 x i32> %b, %a @@ -93,12 +89,11 @@ define <4 x i32> @select_addsub_v4i32_sub_swapped(<4 x i1> %cc, <4 x i32> %a, <4 ; CHECK-LABEL: select_addsub_v4i32_sub_swapped: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v10, v9, v8 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret - %sub = sub <4 x i32> %a, %b - %add = add <4 x i32> %b, %a + %sub = sub <4 x i32> %b, %a + %add = add <4 x i32> %a, %b %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add ret <4 x i32> %res } @@ -107,9 +102,8 @@ define <8 x i32> @select_addsub_v8i32(<8 x i1> %cc, <8 x i32> %a, <8 x i32> %b) ; CHECK-LABEL: select_addsub_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vadd.vv v12, v8, v10 -; CHECK-NEXT: vsub.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %sub = sub <8 x i32> %a, %b %add = add <8 x i32> %a, %b @@ -121,9 +115,8 @@ define <16 x i32> @select_addsub_v16i32(<16 x i1> %cc, <16 x i32> %a, <16 x i32> ; CHECK-LABEL: select_addsub_v16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vadd.vv v16, v8, v12 -; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %sub = sub <16 x i32> %a, %b %add = add <16 x i32> %a, %b @@ -136,9 +129,8 @@ define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vadd.vv v24, v8, v16 -; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vrsub.vi v16, v16, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %sub = sub <32 x i32> %a, %b %add = add <32 x i32> %a, %b @@ -153,62 +145,28 @@ define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vadd.vv v24, v8, v16 -; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vadd.vv v16, v16, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsub.vv v16, v24, v8, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -224,9 +182,8 @@ define <8 x i64> @select_addsub_v8i64(<8 x i1> %cc, <8 x i64> %a, <8 x i64> %b) ; CHECK-LABEL: select_addsub_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vadd.vv v16, v8, v12 -; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %sub = sub <8 x i64> %a, %b %add = add <8 x i64> %a, %b @@ -238,9 +195,8 @@ define <8 x i16> @select_addsub_v8i16(<8 x i1> %cc, <8 x i16> %a, <8 x i16> %b) ; CHECK-LABEL: select_addsub_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <8 x i16> %a, %b %add = add <8 x i16> %a, %b @@ -252,9 +208,8 @@ define <8 x i8> @select_addsub_v8i8(<8 x i1> %cc, <8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: select_addsub_v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <8 x i8> %a, %b %add = add <8 x i8> %a, %b @@ -278,9 +233,8 @@ define <8 x i2> @select_addsub_v8i2(<8 x i1> %cc, <8 x i2> %a, <8 x i2> %b) { ; CHECK-LABEL: select_addsub_v8i2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <8 x i2> %a, %b %add = add <8 x i2> %a, %b @@ -293,9 +247,8 @@ define <4 x i32> @select_addsub_v4i32_constmask(<4 x i32> %a, <4 x i32> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.v.i v0, 5 -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b %add = add <4 x i32> %a, %b @@ -307,14 +260,13 @@ define <4 x i32> @select_addsub_v4i32_constmask2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_addsub_v4i32_constmask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v0, 5 -; CHECK-NEXT: vadd.vv v10, v9, v8 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret - %sub = sub <4 x i32> %a, %b - %add = add <4 x i32> %b, %a - %res = select <4 x i1> , <4 x i32> %sub, <4 x i32> %add + %sub = sub <4 x i32> %b, %a + %add = add <4 x i32> %a, %b + %res = select <4 x i1> , <4 x i32> %add, <4 x i32> %sub ret <4 x i32> %res } @@ -324,9 +276,8 @@ define <4 x i32> @select_addsub_v4i32_as_shuffle(<4 x i32> %a, <4 x i32> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.v.i v0, 5 -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b %add = add <4 x i32> %a, %b @@ -339,13 +290,12 @@ define <4 x i32> @select_addsub_v4i32_as_shuffle2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_addsub_v4i32_as_shuffle2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v0, 5 -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vsub.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret %sub = sub <4 x i32> %b, %a %add = add <4 x i32> %a, %b - %res = shufflevector <4 x i32> %sub, <4 x i32> %add, <4 x i32> + %res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> ret <4 x i32> %res }