diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 04dd23d9cdaa2..f2f169d47c0e0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17341,6 +17341,83 @@ static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, return DAG.getZExtOrTrunc(Pop, DL, VT); } +static SDValue performSHLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + // (shl (zext x), y) -> (vwsll x, y) + if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget)) + return V; + + // (shl (sext x), C) -> (vwmulsu x, 1u << C) + // (shl (zext x), C) -> (vwmulu x, 1u << C) + + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse()) + return SDValue(); + unsigned Opcode; + switch (LHS.getOpcode()) { + case ISD::SIGN_EXTEND: + case RISCVISD::VSEXT_VL: + Opcode = RISCVISD::VWMULSU_VL; + break; + case ISD::ZERO_EXTEND: + case RISCVISD::VZEXT_VL: + Opcode = RISCVISD::VWMULU_VL; + break; + default: + return SDValue(); + } + + SDValue RHS = N->getOperand(1); + APInt ShAmt; + uint64_t ShAmtInt; + if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt)) + ShAmtInt = ShAmt.getZExtValue(); + else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL && + RHS.getOperand(1).getOpcode() == ISD::Constant) + ShAmtInt = RHS.getConstantOperandVal(1); + else + return SDValue(); + + // Better foldings: + // (shl (sext x), 1) -> (vwadd x, x) + // (shl (zext x), 1) -> (vwaddu x, x) + if (ShAmtInt <= 1) + return SDValue(); + + SDValue NarrowOp = LHS.getOperand(0); + MVT NarrowVT = NarrowOp.getSimpleValueType(); + uint64_t NarrowBits = NarrowVT.getScalarSizeInBits(); + if (ShAmtInt >= NarrowBits) + return SDValue(); + MVT VT = N->getSimpleValueType(0); + if (NarrowBits * 2 != VT.getScalarSizeInBits()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + SDValue Passthru, Mask, VL; + switch (N->getOpcode()) { + case ISD::SHL: + Passthru = DAG.getUNDEF(VT); + std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + break; + case RISCVISD::SHL_VL: + Passthru = N->getOperand(2); + Mask = N->getOperand(3); + VL = N->getOperand(4); + break; + default: + llvm_unreachable("Expected SHL"); + } + return DAG.getNode(Opcode, DL, VT, NarrowOp, + DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT), + Passthru, Mask, VL); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -17970,7 +18047,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::SHL_VL: - if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget)) + if (SDValue V = performSHLCombine(N, DCI, Subtarget)) return V; [[fallthrough]]; case RISCVISD::SRA_VL: @@ -17995,7 +18072,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRL: case ISD::SHL: { if (N->getOpcode() == ISD::SHL) { - if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget)) + if (SDValue V = performSHLCombine(N, DCI, Subtarget)) return V; } SDValue ShAmt = N->getOperand(1); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index c6e12c52122d2..232a364e87f0e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2730,21 +2730,21 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vsll.vi v8, v9, 2 +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v9, v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v10, (a0), v9, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v9, v8 -; RV64V-NEXT: vsll.vi v8, v9, 2 +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v9, v8, a1 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t +; RV64V-NEXT: vluxei16.v v10, (a0), v9, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -2888,10 +2888,11 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) { ; RV32-LABEL: mgather_baseidx_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; @@ -3037,10 +3038,11 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; @@ -3187,19 +3189,21 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64V-NEXT: vzext.vf2 v12, v8 -; RV64V-NEXT: vsll.vi v8, v12, 2 -; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v12, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64V-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -3352,10 +3356,9 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; ; RV64V-LABEL: mgather_baseidx_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64V-NEXT: vwmulsu.vx v12, v8, a1 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret @@ -4636,21 +4639,21 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32V-NEXT: vzext.vf2 v9, v8 -; RV32V-NEXT: vsll.vi v8, v9, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32V-NEXT: vwmulu.vx v9, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei16.v v12, (a0), v9, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v9, v8 -; RV64V-NEXT: vsll.vi v8, v9, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v9, v8, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t +; RV64V-NEXT: vluxei16.v v12, (a0), v9, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -4923,11 +4926,11 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v10, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -4943,16 +4946,17 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: andi a3, t0, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 ; RV32ZVE32F-NEXT: beqz a3, .LBB51_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 0(a3) ; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -4988,40 +4992,40 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: beqz a4, .LBB51_2 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a5, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a5, v8 ; RV32ZVE32F-NEXT: lw a4, 0(a5) ; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB51_3 ; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a7, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a7, v8 ; RV32ZVE32F-NEXT: lw a6, 0(a7) ; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB51_4 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s t2, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s t2, v8 ; RV32ZVE32F-NEXT: lw t1, 0(t2) ; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB51_5 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s t4, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s t4, v8 ; RV32ZVE32F-NEXT: lw t3, 0(t4) ; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB51_6 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s t6, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s t6, v8 ; RV32ZVE32F-NEXT: lw t5, 0(t6) ; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB51_13: # %else14 @@ -5035,8 +5039,8 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: beqz s0, .LBB51_16 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s s1, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s s1, v8 ; RV32ZVE32F-NEXT: lw s0, 0(s1) ; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 @@ -5052,7 +5056,7 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: beqz t0, .LBB51_15 ; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 0(a2) ; RV32ZVE32F-NEXT: lw a2, 4(a2) @@ -5201,11 +5205,11 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v10, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -5221,16 +5225,17 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: andi a3, t0, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 ; RV32ZVE32F-NEXT: beqz a3, .LBB52_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 0(a3) ; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -5266,40 +5271,40 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz a4, .LBB52_2 ; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a5, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a5, v8 ; RV32ZVE32F-NEXT: lw a4, 0(a5) ; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB52_3 ; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a7, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a7, v8 ; RV32ZVE32F-NEXT: lw a6, 0(a7) ; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB52_4 ; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s t2, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s t2, v8 ; RV32ZVE32F-NEXT: lw t1, 0(t2) ; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB52_5 ; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s t4, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s t4, v8 ; RV32ZVE32F-NEXT: lw t3, 0(t4) ; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB52_6 ; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s t6, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s t6, v8 ; RV32ZVE32F-NEXT: lw t5, 0(t6) ; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB52_13: # %else14 @@ -5313,8 +5318,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz s0, .LBB52_16 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s s1, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s s1, v8 ; RV32ZVE32F-NEXT: lw s0, 0(s1) ; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 @@ -5330,7 +5335,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz t0, .LBB52_15 ; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 0(a2) ; RV32ZVE32F-NEXT: lw a2, 4(a2) @@ -5480,37 +5485,38 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf2 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulu.vx v10, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64V-NEXT: vzext.vf2 v10, v8 -; RV64V-NEXT: vsll.vi v8, v10, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v10, v8, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV64V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: andi a3, t0, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8 ; RV32ZVE32F-NEXT: beqz a3, .LBB53_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 0(a3) ; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -5546,40 +5552,40 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz a4, .LBB53_2 ; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a5, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a5, v8 ; RV32ZVE32F-NEXT: lw a4, 0(a5) ; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: andi a6, t0, 4 ; RV32ZVE32F-NEXT: beqz a6, .LBB53_3 ; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a7, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a7, v8 ; RV32ZVE32F-NEXT: lw a6, 0(a7) ; RV32ZVE32F-NEXT: lw a7, 4(a7) ; RV32ZVE32F-NEXT: andi t1, t0, 8 ; RV32ZVE32F-NEXT: beqz t1, .LBB53_4 ; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s t2, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s t2, v8 ; RV32ZVE32F-NEXT: lw t1, 0(t2) ; RV32ZVE32F-NEXT: lw t2, 4(t2) ; RV32ZVE32F-NEXT: andi t3, t0, 16 ; RV32ZVE32F-NEXT: beqz t3, .LBB53_5 ; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s t4, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s t4, v8 ; RV32ZVE32F-NEXT: lw t3, 0(t4) ; RV32ZVE32F-NEXT: lw t4, 4(t4) ; RV32ZVE32F-NEXT: andi t5, t0, 32 ; RV32ZVE32F-NEXT: beqz t5, .LBB53_6 ; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s t6, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s t6, v8 ; RV32ZVE32F-NEXT: lw t5, 0(t6) ; RV32ZVE32F-NEXT: lw t6, 4(t6) ; RV32ZVE32F-NEXT: .LBB53_13: # %else14 @@ -5593,8 +5599,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz s0, .LBB53_16 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s s1, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s s1, v8 ; RV32ZVE32F-NEXT: lw s0, 0(s1) ; RV32ZVE32F-NEXT: lw s1, 4(s1) ; RV32ZVE32F-NEXT: andi t0, t0, -128 @@ -5610,7 +5616,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: beqz t0, .LBB53_15 ; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw t0, 0(a2) ; RV32ZVE32F-NEXT: lw a2, 4(a2) @@ -5777,10 +5783,11 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; ; RV64V-LABEL: mgather_baseidx_v8i32_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vsext.vf2 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -6053,10 +6060,11 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; ; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vsext.vf2 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -6330,10 +6338,11 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; ; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf2 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulu.vx v16, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -10032,21 +10041,21 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vsll.vi v8, v9, 2 +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v9, v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v10, (a0), v9, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v9, v8 -; RV64V-NEXT: vsll.vi v8, v9, 2 +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v9, v8, a1 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t +; RV64V-NEXT: vluxei16.v v10, (a0), v9, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -10190,10 +10199,11 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) { ; RV32-LABEL: mgather_baseidx_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; @@ -10339,10 +10349,11 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; @@ -10489,19 +10500,21 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64V-NEXT: vzext.vf2 v12, v8 -; RV64V-NEXT: vsll.vi v8, v12, 2 -; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v12, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64V-NEXT: vluxei32.v v10, (a0), v12, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -10654,10 +10667,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; ; RV64V-LABEL: mgather_baseidx_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64V-NEXT: vwmulsu.vx v12, v8, a1 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret @@ -11702,21 +11714,21 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32V-NEXT: vzext.vf2 v9, v8 -; RV32V-NEXT: vsll.vi v8, v9, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32V-NEXT: vwmulu.vx v9, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei16.v v12, (a0), v9, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v9, v8 -; RV64V-NEXT: vsll.vi v8, v9, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v9, v8, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t +; RV64V-NEXT: vluxei16.v v12, (a0), v9, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -11927,11 +11939,11 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v10, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -11947,38 +11959,38 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li a2, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a3, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8 ; RV32ZVE32F-NEXT: bnez a3, .LBB100_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB100_11 +; RV32ZVE32F-NEXT: andi a2, a1, 2 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_11 ; RV32ZVE32F-NEXT: .LBB100_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB100_12 +; RV32ZVE32F-NEXT: andi a2, a1, 4 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_12 ; RV32ZVE32F-NEXT: .LBB100_3: # %else5 -; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB100_13 +; RV32ZVE32F-NEXT: andi a2, a1, 8 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_13 ; RV32ZVE32F-NEXT: .LBB100_4: # %else8 -; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB100_14 +; RV32ZVE32F-NEXT: andi a2, a1, 16 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_14 ; RV32ZVE32F-NEXT: .LBB100_5: # %else11 -; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB100_15 +; RV32ZVE32F-NEXT: andi a2, a1, 32 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_15 ; RV32ZVE32F-NEXT: .LBB100_6: # %else14 -; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB100_16 +; RV32ZVE32F-NEXT: andi a2, a1, 64 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_16 ; RV32ZVE32F-NEXT: .LBB100_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a2, -128 +; RV32ZVE32F-NEXT: andi a1, a1, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_9 ; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) ; RV32ZVE32F-NEXT: .LBB100_9: # %else20 @@ -11992,51 +12004,52 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB100_10: # %cond.load -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB100_2 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 +; RV32ZVE32F-NEXT: fld fa0, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 2 +; RV32ZVE32F-NEXT: beqz a2, .LBB100_2 ; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB100_3 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa1, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 4 +; RV32ZVE32F-NEXT: beqz a2, .LBB100_3 ; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB100_4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa2, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 8 +; RV32ZVE32F-NEXT: beqz a2, .LBB100_4 ; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB100_5 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa3, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 16 +; RV32ZVE32F-NEXT: beqz a2, .LBB100_5 ; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB100_6 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa4, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 32 +; RV32ZVE32F-NEXT: beqz a2, .LBB100_6 ; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB100_7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa5, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 64 +; RV32ZVE32F-NEXT: beqz a2, .LBB100_7 ; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, -128 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa6, 0(a2) +; RV32ZVE32F-NEXT: andi a1, a1, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_8 ; RV32ZVE32F-NEXT: j .LBB100_9 ; @@ -12143,11 +12156,11 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v10, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; @@ -12163,38 +12176,38 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li a2, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a3, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8 ; RV32ZVE32F-NEXT: bnez a3, .LBB101_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB101_11 +; RV32ZVE32F-NEXT: andi a2, a1, 2 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_11 ; RV32ZVE32F-NEXT: .LBB101_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB101_12 +; RV32ZVE32F-NEXT: andi a2, a1, 4 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_12 ; RV32ZVE32F-NEXT: .LBB101_3: # %else5 -; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB101_13 +; RV32ZVE32F-NEXT: andi a2, a1, 8 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_13 ; RV32ZVE32F-NEXT: .LBB101_4: # %else8 -; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB101_14 +; RV32ZVE32F-NEXT: andi a2, a1, 16 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_14 ; RV32ZVE32F-NEXT: .LBB101_5: # %else11 -; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB101_15 +; RV32ZVE32F-NEXT: andi a2, a1, 32 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_15 ; RV32ZVE32F-NEXT: .LBB101_6: # %else14 -; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB101_16 +; RV32ZVE32F-NEXT: andi a2, a1, 64 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_16 ; RV32ZVE32F-NEXT: .LBB101_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a2, -128 +; RV32ZVE32F-NEXT: andi a1, a1, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_9 ; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) ; RV32ZVE32F-NEXT: .LBB101_9: # %else20 @@ -12208,51 +12221,52 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB101_10: # %cond.load -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB101_2 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 +; RV32ZVE32F-NEXT: fld fa0, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 2 +; RV32ZVE32F-NEXT: beqz a2, .LBB101_2 ; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB101_3 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa1, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 4 +; RV32ZVE32F-NEXT: beqz a2, .LBB101_3 ; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB101_4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa2, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 8 +; RV32ZVE32F-NEXT: beqz a2, .LBB101_4 ; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB101_5 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa3, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 16 +; RV32ZVE32F-NEXT: beqz a2, .LBB101_5 ; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB101_6 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa4, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 32 +; RV32ZVE32F-NEXT: beqz a2, .LBB101_6 ; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB101_7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa5, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 64 +; RV32ZVE32F-NEXT: beqz a2, .LBB101_7 ; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, -128 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa6, 0(a2) +; RV32ZVE32F-NEXT: andi a1, a1, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_8 ; RV32ZVE32F-NEXT: j .LBB101_9 ; @@ -12360,59 +12374,59 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf2 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulu.vx v10, v8, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64V-NEXT: vzext.vf2 v10, v8 -; RV64V-NEXT: vsll.vi v8, v10, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v10, v8, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV64V-NEXT: vluxei32.v v12, (a0), v10, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li a2, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a3, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccu.vx v10, a2, v8 ; RV32ZVE32F-NEXT: bnez a3, .LBB102_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB102_11 +; RV32ZVE32F-NEXT: andi a2, a1, 2 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_11 ; RV32ZVE32F-NEXT: .LBB102_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB102_12 +; RV32ZVE32F-NEXT: andi a2, a1, 4 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_12 ; RV32ZVE32F-NEXT: .LBB102_3: # %else5 -; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB102_13 +; RV32ZVE32F-NEXT: andi a2, a1, 8 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_13 ; RV32ZVE32F-NEXT: .LBB102_4: # %else8 -; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB102_14 +; RV32ZVE32F-NEXT: andi a2, a1, 16 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_14 ; RV32ZVE32F-NEXT: .LBB102_5: # %else11 -; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB102_15 +; RV32ZVE32F-NEXT: andi a2, a1, 32 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_15 ; RV32ZVE32F-NEXT: .LBB102_6: # %else14 -; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB102_16 +; RV32ZVE32F-NEXT: andi a2, a1, 64 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_16 ; RV32ZVE32F-NEXT: .LBB102_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a2, -128 +; RV32ZVE32F-NEXT: andi a1, a1, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_9 ; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa7, 0(a1) ; RV32ZVE32F-NEXT: .LBB102_9: # %else20 @@ -12426,51 +12440,52 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB102_10: # %cond.load -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB102_2 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 +; RV32ZVE32F-NEXT: fld fa0, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 2 +; RV32ZVE32F-NEXT: beqz a2, .LBB102_2 ; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB102_3 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa1, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 4 +; RV32ZVE32F-NEXT: beqz a2, .LBB102_3 ; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB102_4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa2, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 8 +; RV32ZVE32F-NEXT: beqz a2, .LBB102_4 ; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB102_5 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa3, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 16 +; RV32ZVE32F-NEXT: beqz a2, .LBB102_5 ; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB102_6 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa4, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 32 +; RV32ZVE32F-NEXT: beqz a2, .LBB102_6 ; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB102_7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa5, 0(a2) +; RV32ZVE32F-NEXT: andi a2, a1, 64 +; RV32ZVE32F-NEXT: beqz a2, .LBB102_7 ; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a2, -128 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: fld fa6, 0(a2) +; RV32ZVE32F-NEXT: andi a1, a1, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_8 ; RV32ZVE32F-NEXT: j .LBB102_9 ; @@ -12595,10 +12610,11 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; ; RV64V-LABEL: mgather_baseidx_v8i32_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vsext.vf2 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -12809,10 +12825,11 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; ; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vsext.vf2 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -13024,10 +13041,11 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; ; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf2 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulu.vx v16, v8, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 7ec4726925704..7fe14064a5bf1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -2148,20 +2148,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v11, v10 -; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v11, v10, a1 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v11, v10 -; RV64V-NEXT: vsll.vi v10, v11, 2 +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v11, v10, a1 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32: @@ -2295,10 +2295,11 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_v8i16_v8i32: @@ -2433,10 +2434,11 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i32: @@ -2572,18 +2574,20 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v10, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64V-NEXT: vzext.vf2 v12, v10 -; RV64V-NEXT: vsll.vi v10, v12, 2 -; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v12, v10, a1 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: @@ -2725,10 +2729,9 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; ; RV64V-LABEL: mscatter_baseidx_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v12, v10 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v12, v10, a1 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t ; RV64V-NEXT: ret ; @@ -3881,20 +3884,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32V-NEXT: vzext.vf2 v13, v12 -; RV32V-NEXT: vsll.vi v12, v13, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32V-NEXT: vwmulu.vx v13, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v13, v12 -; RV64V-NEXT: vsll.vi v12, v13, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v13, v12, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64V-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: @@ -4141,11 +4144,11 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v14, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_v8i16_v8i64: @@ -4173,47 +4176,47 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: lw a7, 44(a0) ; RV32ZVE32F-NEXT: lw a4, 48(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 28(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 36(a0) -; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li s1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi s2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8 ; RV32ZVE32F-NEXT: bnez s2, .LBB45_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a2, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) @@ -4229,60 +4232,61 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store ; RV32ZVE32F-NEXT: .cfi_restore_state -; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) ; RV32ZVE32F-NEXT: lw a0, 4(a0) -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 +; RV32ZVE32F-NEXT: sw s1, 0(s2) ; RV32ZVE32F-NEXT: sw a0, 4(s2) -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: sw s1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8 ; RV32ZVE32F-NEXT: j .LBB45_9 ; @@ -4392,11 +4396,11 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v14, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: @@ -4424,47 +4428,47 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a7, 44(a0) ; RV32ZVE32F-NEXT: lw a4, 48(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 28(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 36(a0) -; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li s1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi s2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8 ; RV32ZVE32F-NEXT: bnez s2, .LBB46_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a2, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) @@ -4480,60 +4484,61 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store ; RV32ZVE32F-NEXT: .cfi_restore_state -; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) ; RV32ZVE32F-NEXT: lw a0, 4(a0) -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 +; RV32ZVE32F-NEXT: sw s1, 0(s2) ; RV32ZVE32F-NEXT: sw a0, 4(s2) -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: sw s1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8 ; RV32ZVE32F-NEXT: j .LBB46_9 ; @@ -4644,20 +4649,20 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf2 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulu.vx v14, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64V-NEXT: vzext.vf2 v14, v12 -; RV64V-NEXT: vsll.vi v12, v14, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v14, v12, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: @@ -4677,47 +4682,47 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a7, 44(a0) ; RV32ZVE32F-NEXT: lw a4, 48(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 28(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 36(a0) -; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.v.x v10, a1 +; RV32ZVE32F-NEXT: li s1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi s2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccu.vx v10, s1, v8 ; RV32ZVE32F-NEXT: bnez s2, .LBB47_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a2, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) @@ -4733,60 +4738,61 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store ; RV32ZVE32F-NEXT: .cfi_restore_state -; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) ; RV32ZVE32F-NEXT: lw a0, 4(a0) -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 +; RV32ZVE32F-NEXT: sw s1, 0(s2) ; RV32ZVE32F-NEXT: sw a0, 4(s2) -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: sw s1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV32ZVE32F-NEXT: j .LBB47_9 ; @@ -4913,10 +4919,11 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; ; RV64V-LABEL: mscatter_baseidx_v8i32_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v16, v12 -; RV64V-NEXT: vsll.vi v12, v16, 3 -; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v12, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: @@ -5162,10 +5169,11 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; ; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v16, v12 -; RV64V-NEXT: vsll.vi v12, v16, 3 -; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v12, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: @@ -5412,10 +5420,11 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf2 v16, v12 -; RV64V-NEXT: vsll.vi v12, v16, 3 -; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulu.vx v16, v12, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: @@ -9019,20 +9028,20 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v11, v10 -; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v11, v10, a1 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v11, v10 -; RV64V-NEXT: vsll.vi v10, v11, 2 +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v11, v10, a1 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64V-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32: @@ -9166,10 +9175,11 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_v8i16_v8f32: @@ -9304,10 +9314,11 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f32: @@ -9443,18 +9454,20 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v10, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64V-NEXT: vzext.vf2 v12, v10 -; RV64V-NEXT: vsll.vi v10, v12, 2 -; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v12, v10, a1 +; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: @@ -9596,10 +9609,9 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; ; RV64V-LABEL: mscatter_baseidx_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v12, v10 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64V-NEXT: li a1, 4 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v12, v10, a1 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t ; RV64V-NEXT: ret ; @@ -10545,20 +10557,20 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32V-NEXT: vzext.vf2 v13, v12 -; RV32V-NEXT: vsll.vi v12, v13, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32V-NEXT: vwmulu.vx v13, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vzext.vf2 v13, v12 -; RV64V-NEXT: vsll.vi v12, v13, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwmulu.vx v13, v12, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64V-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: @@ -10755,11 +10767,11 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v14, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_v8i16_v8f64: @@ -10773,87 +10785,88 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a0 +; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: vmv.x.s a0, v0 +; RV32ZVE32F-NEXT: andi a2, a0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB94_10 +; RV32ZVE32F-NEXT: andi a1, a0, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_10 ; RV32ZVE32F-NEXT: .LBB94_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB94_11 +; RV32ZVE32F-NEXT: andi a1, a0, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_11 ; RV32ZVE32F-NEXT: .LBB94_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB94_12 +; RV32ZVE32F-NEXT: andi a1, a0, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_12 ; RV32ZVE32F-NEXT: .LBB94_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB94_13 +; RV32ZVE32F-NEXT: andi a1, a0, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_13 ; RV32ZVE32F-NEXT: .LBB94_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB94_14 +; RV32ZVE32F-NEXT: andi a1, a0, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_14 ; RV32ZVE32F-NEXT: .LBB94_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB94_15 +; RV32ZVE32F-NEXT: andi a1, a0, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_15 ; RV32ZVE32F-NEXT: .LBB94_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_16 ; RV32ZVE32F-NEXT: .LBB94_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store -; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB94_2 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fsd fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_2 ; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB94_3 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_3 ; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB94_4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_4 ; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB94_5 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_5 ; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB94_6 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_6 ; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB94_7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_7 ; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a0, a0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_8 ; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) ; RV32ZVE32F-NEXT: ret @@ -10956,11 +10969,11 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vsext.vf2 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulsu.vx v14, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: @@ -10974,87 +10987,88 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a0 +; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: vmv.x.s a0, v0 +; RV32ZVE32F-NEXT: andi a2, a0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB95_10 +; RV32ZVE32F-NEXT: andi a1, a0, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_10 ; RV32ZVE32F-NEXT: .LBB95_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB95_11 +; RV32ZVE32F-NEXT: andi a1, a0, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_11 ; RV32ZVE32F-NEXT: .LBB95_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB95_12 +; RV32ZVE32F-NEXT: andi a1, a0, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_12 ; RV32ZVE32F-NEXT: .LBB95_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB95_13 +; RV32ZVE32F-NEXT: andi a1, a0, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_13 ; RV32ZVE32F-NEXT: .LBB95_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB95_14 +; RV32ZVE32F-NEXT: andi a1, a0, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_14 ; RV32ZVE32F-NEXT: .LBB95_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB95_15 +; RV32ZVE32F-NEXT: andi a1, a0, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_15 ; RV32ZVE32F-NEXT: .LBB95_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_16 ; RV32ZVE32F-NEXT: .LBB95_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store -; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB95_2 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fsd fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_2 ; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB95_3 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_3 ; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB95_4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_4 ; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB95_5 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_5 ; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB95_6 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_6 ; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB95_7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_7 ; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a0, a0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_8 ; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) ; RV32ZVE32F-NEXT: ret @@ -11158,106 +11172,107 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf2 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: li a1, 8 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vwmulu.vx v14, v12, a1 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32V-NEXT: ret ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64V-NEXT: vzext.vf2 v14, v12 -; RV64V-NEXT: vsll.vi v12, v14, 3 +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vwmulu.vx v14, v12, a1 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64V-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 +; RV32ZVE32F-NEXT: vmv.v.x v10, a0 +; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: vmv.x.s a0, v0 +; RV32ZVE32F-NEXT: andi a2, a0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB96_10 +; RV32ZVE32F-NEXT: andi a1, a0, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_10 ; RV32ZVE32F-NEXT: .LBB96_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB96_11 +; RV32ZVE32F-NEXT: andi a1, a0, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_11 ; RV32ZVE32F-NEXT: .LBB96_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB96_12 +; RV32ZVE32F-NEXT: andi a1, a0, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_12 ; RV32ZVE32F-NEXT: .LBB96_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: bnez a0, .LBB96_13 +; RV32ZVE32F-NEXT: andi a1, a0, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_13 ; RV32ZVE32F-NEXT: .LBB96_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: bnez a0, .LBB96_14 +; RV32ZVE32F-NEXT: andi a1, a0, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_14 ; RV32ZVE32F-NEXT: .LBB96_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: bnez a0, .LBB96_15 +; RV32ZVE32F-NEXT: andi a1, a0, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_15 ; RV32ZVE32F-NEXT: .LBB96_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_16 ; RV32ZVE32F-NEXT: .LBB96_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store -; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB96_2 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fsd fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_2 ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB96_3 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_3 ; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB96_4 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_4 ; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB96_5 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_5 ; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 -; RV32ZVE32F-NEXT: beqz a0, .LBB96_6 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_6 ; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 -; RV32ZVE32F-NEXT: beqz a0, .LBB96_7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a0, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_7 ; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fsd fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a0, a0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_8 ; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vslidedown.vi v8, v10, 7 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) ; RV32ZVE32F-NEXT: ret @@ -11377,10 +11392,11 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; ; RV64V-LABEL: mscatter_baseidx_v8i32_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v16, v12 -; RV64V-NEXT: vsll.vi v12, v16, 3 -; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v12, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64: @@ -11576,10 +11592,11 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vsext.vf2 v16, v12 -; RV64V-NEXT: vsll.vi v12, v16, 3 -; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulsu.vx v16, v12, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64: @@ -11776,10 +11793,11 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf2 v16, v12 -; RV64V-NEXT: vsll.vi v12, v16, 3 -; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64V-NEXT: li a1, 8 +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vwmulu.vx v16, v12, a1 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index df9ff0fc39a7e..1f6513ae09d60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -736,18 +736,18 @@ define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vsll.vi v10, v9, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v9, v8 -; RV64-NEXT: vsll.vi v10, v9, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v10, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret @@ -760,11 +760,11 @@ define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v8i16_v8i32: @@ -783,11 +783,11 @@ define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i32: @@ -807,20 +807,20 @@ define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, define <8 x i32> @vpgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v8, v10, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v10, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -839,9 +839,9 @@ define <8 x i32> @vpgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m ; ; RV64-LABEL: vpgather_baseidx_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -974,18 +974,18 @@ define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vsll.vi v12, v9, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v9, v8 -; RV64-NEXT: vsll.vi v12, v9, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -998,9 +998,9 @@ define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret @@ -1021,9 +1021,9 @@ define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret @@ -1045,18 +1045,18 @@ define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v12, v10, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -1077,11 +1077,11 @@ define <8 x i64> @vpgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x ; ; RV64-LABEL: vpgather_baseidx_v8i32_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) @@ -1099,11 +1099,11 @@ define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, ; ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1122,11 +1122,11 @@ define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, ; ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1618,18 +1618,18 @@ define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vsll.vi v10, v9, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v9, v8 -; RV64-NEXT: vsll.vi v10, v9, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v10, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret @@ -1642,11 +1642,11 @@ define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v8i16_v8f32: @@ -1665,11 +1665,11 @@ define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f32: @@ -1689,20 +1689,20 @@ define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs define <8 x float> @vpgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v8, v10, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v10, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1721,9 +1721,9 @@ define <8 x float> @vpgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> ; ; RV64-LABEL: vpgather_baseidx_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -1856,18 +1856,18 @@ define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vsll.vi v12, v9, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v9, v8 -; RV64-NEXT: vsll.vi v12, v9, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -1880,9 +1880,9 @@ define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret @@ -1903,9 +1903,9 @@ define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret @@ -1927,18 +1927,18 @@ define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idx define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v12, v10, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -1959,11 +1959,11 @@ define <8 x double> @vpgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; ; RV64-LABEL: vpgather_baseidx_v8i32_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) @@ -1981,11 +1981,11 @@ define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idx ; ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -2004,11 +2004,11 @@ define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idx ; ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -2209,25 +2209,25 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a4, 16 +; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a3 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB97_2 +; RV32-NEXT: bltu a1, a4, .LBB97_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB97_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2235,25 +2235,25 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v16, v12, 3 +; RV64-NEXT: li a3, 8 +; RV64-NEXT: li a4, 16 +; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a3 ; RV64-NEXT: mv a2, a1 -; RV64-NEXT: bltu a1, a3, .LBB97_2 +; RV64-NEXT: bltu a1, a4, .LBB97_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB97_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t +; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2267,25 +2267,25 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a4, 16 +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a3 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB98_2 +; RV32-NEXT: bltu a1, a4, .LBB98_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB98_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2325,25 +2325,25 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a4, 16 +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a3 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB99_2 +; RV32-NEXT: bltu a1, a4, .LBB99_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB99_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2385,25 +2385,25 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a4, 16 +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a3 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB100_2 +; RV32-NEXT: bltu a1, a4, .LBB100_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB100_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2411,25 +2411,25 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v8 -; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 8 +; RV64-NEXT: li a4, 16 +; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a3 ; RV64-NEXT: mv a2, a1 -; RV64-NEXT: bltu a1, a3, .LBB100_2 +; RV64-NEXT: bltu a1, a4, .LBB100_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB100_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2468,20 +2468,19 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: li a2, 8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsext.vf2 v8, v16 -; RV64-NEXT: vsll.vi v16, v8, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v24, a2 +; RV64-NEXT: vwmulsu.vx v24, v8, a2 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB101_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB101_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 @@ -2524,22 +2523,20 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: li a2, 8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v24, a2 +; RV64-NEXT: vwmulsu.vx v24, v8, a2 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB102_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB102_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 @@ -2583,22 +2580,20 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: li a2, 8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v16, v24, a2 +; RV64-NEXT: vwmulu.vx v24, v8, a2 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB103_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB103_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index d691dcd5c54b6..f7e4716d2c847 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -556,20 +556,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v11, v10 -; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v11, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v11, v10 -; RV64-NEXT: vsll.vi v10, v11, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v11, v10, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -580,11 +580,11 @@ define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i define void @vpscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v8i16_v8i32: @@ -603,11 +603,11 @@ define void @vpscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> define void @vpscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i32: @@ -627,20 +627,20 @@ define void @vpscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x define void @vpscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v10, v12, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v12, v10, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -659,9 +659,9 @@ define void @vpscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; ; RV64-LABEL: vpscatter_baseidx_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v10, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -790,20 +790,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v13, v12 -; RV32-NEXT: vsll.vi v12, v13, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v13, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v13, v12 -; RV64-NEXT: vsll.vi v12, v13, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v13, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -814,11 +814,11 @@ define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i define void @vpscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v8i16_v8i64: @@ -837,11 +837,11 @@ define void @vpscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i64: @@ -861,20 +861,20 @@ define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v14, v12 -; RV64-NEXT: vsll.vi v12, v14, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v14, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -893,11 +893,11 @@ define void @vpscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> ; ; RV64-LABEL: vpscatter_baseidx_v8i32_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) @@ -915,11 +915,11 @@ define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -938,11 +938,11 @@ define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1323,20 +1323,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v11, v10 -; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v11, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v11, v10 -; RV64-NEXT: vsll.vi v10, v11, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v11, v10, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v11, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1347,11 +1347,11 @@ define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x define void @vpscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v8i16_v8f32: @@ -1370,11 +1370,11 @@ define void @vpscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16 define void @vpscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f32: @@ -1394,20 +1394,20 @@ define void @vpscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 define void @vpscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v10, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v10, v12, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v12, v10, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1426,9 +1426,9 @@ define void @vpscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idx ; ; RV64-LABEL: vpscatter_baseidx_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v12, v10, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -1557,20 +1557,20 @@ define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vzext.vf2 v13, v12 -; RV32-NEXT: vsll.vi v12, v13, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwmulu.vx v13, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vzext.vf2 v13, v12 -; RV64-NEXT: vsll.vi v12, v13, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwmulu.vx v13, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v13, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1581,11 +1581,11 @@ define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 define void @vpscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v8i16_v8f64: @@ -1604,11 +1604,11 @@ define void @vpscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i1 define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulsu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f64: @@ -1628,20 +1628,20 @@ define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vwmulu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vzext.vf2 v14, v12 -; RV64-NEXT: vsll.vi v12, v14, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vwmulu.vx v14, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v14, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1660,11 +1660,11 @@ define void @vpscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i3 ; ; RV64-LABEL: vpscatter_baseidx_v8i32_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) @@ -1682,11 +1682,11 @@ define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1705,11 +1705,11 @@ define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v12, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i32> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1842,36 +1842,35 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a4, a3, 3 -; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: li a1, 8 ; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v24, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v0, v24 -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v8, v16, a1 +; RV64-NEXT: vwmulsu.vx v16, v24, a1 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB84_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB84_2: ; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vsoxei64.v v24, (a0), v16, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 @@ -1879,14 +1878,14 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 @@ -1942,24 +1941,22 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: li a1, 8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v24 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v24, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v8, v16, a1 +; RV64-NEXT: vwmulsu.vx v16, v24, a1 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB85_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB85_2: ; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v24, v0.t +; RV64-NEXT: vsoxei64.v v24, (a0), v16, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 @@ -2031,24 +2028,22 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: li a1, 8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v24 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v24, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v8, v16, a1 +; RV64-NEXT: vwmulu.vx v16, v24, a1 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB86_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB86_2: ; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v24, v0.t +; RV64-NEXT: vsoxei64.v v24, (a0), v16, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll index fce22849a58af..7b12a56b78661 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll @@ -225,9 +225,10 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) { define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) { ; CHECK-LABEL: vwsll_vi_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_v4i64: @@ -434,9 +435,10 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) { define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) { ; CHECK-LABEL: vwsll_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_v8i32: @@ -654,9 +656,10 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) { define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) { ; CHECK-LABEL: vwsll_vi_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_v16i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 9ee2324f615dd..0fad09f27007c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -775,11 +775,11 @@ define @mgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t +; CHECK-NEXT: vluxei16.v v12, (a0), v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -791,10 +791,11 @@ define @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -815,10 +816,11 @@ define @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -840,10 +842,11 @@ define @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, @mgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; CHECK-NEXT: vzext.vf2 v16, v8 -; CHECK-NEXT: vsll.vi v8, v16, 2 -; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v16, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vluxei32.v v12, (a0), v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -863,10 +866,9 @@ define @mgather_baseidx_nxv8i32(ptr %base, ; ; RV64-LABEL: mgather_baseidx_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV64-NEXT: li a1, 4 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; RV64-NEXT: vwmulsu.vx v16, v8, a1 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -1034,11 +1036,11 @@ define @mgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t +; CHECK-NEXT: vluxei16.v v16, (a0), v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -1050,11 +1052,11 @@ define @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1074,11 +1076,11 @@ define @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -1099,11 +1101,11 @@ define @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, @mgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v12, v8 -; CHECK-NEXT: vsll.vi v8, v12, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v12, v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t +; CHECK-NEXT: vluxei32.v v16, (a0), v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -1124,10 +1126,11 @@ define @mgather_baseidx_nxv8i32_nxv8i64(ptr %base, %idxs @@ -1147,10 +1150,11 @@ define @mgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, %idxs to @@ -1171,10 +1175,11 @@ define @mgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, %idxs to @@ -1845,11 +1850,11 @@ define @mgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t +; CHECK-NEXT: vluxei16.v v12, (a0), v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -1861,10 +1866,11 @@ define @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -1885,10 +1891,11 @@ define @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vsext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -1910,10 +1917,11 @@ define @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, @mgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; CHECK-NEXT: vzext.vf2 v16, v8 -; CHECK-NEXT: vsll.vi v8, v16, 2 -; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v16, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vluxei32.v v12, (a0), v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -1933,10 +1941,9 @@ define @mgather_baseidx_nxv8f32(ptr %base, @mgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t +; CHECK-NEXT: vluxei16.v v16, (a0), v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -2120,11 +2127,11 @@ define @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2144,11 +2151,11 @@ define @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -2169,11 +2176,11 @@ define @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, @mgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, %passthru) { ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v12, v8 -; CHECK-NEXT: vsll.vi v8, v12, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v12, v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t +; CHECK-NEXT: vluxei32.v v16, (a0), v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %eidxs = zext %idxs to @@ -2194,10 +2201,11 @@ define @mgather_baseidx_nxv8i32_nxv8f64(ptr %base, %idxs @@ -2217,10 +2225,11 @@ define @mgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, %idxs to @@ -2241,10 +2250,11 @@ define @mgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, %idxs to diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 77a1f508d2218..3cf7cc9cb5152 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -581,11 +581,11 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8i32( %val, ptr % define void @mscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v14, v12 -; CHECK-NEXT: vsll.vi v12, v14, 2 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v14, v12, a1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: vsoxei16.v v8, (a0), v14, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -596,10 +596,11 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr % define void @mscatter_baseidx_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: @@ -618,10 +619,11 @@ define void @mscatter_baseidx_nxv8i16_nxv8i32( %val, ptr %base define void @mscatter_baseidx_sext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: @@ -641,10 +643,11 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8i32( %val, ptr define void @mscatter_baseidx_zext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v16, v12 -; CHECK-NEXT: vsll.vi v12, v16, 2 -; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v16, v12, a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -662,10 +665,9 @@ define void @mscatter_baseidx_nxv8i32( %val, ptr %base, %idxs @@ -817,11 +819,11 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8i64( %val, ptr % define void @mscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v18, v16 -; CHECK-NEXT: vsll.vi v16, v18, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v18, v16, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -832,11 +834,11 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr % define void @mscatter_baseidx_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: @@ -854,11 +856,11 @@ define void @mscatter_baseidx_nxv8i16_nxv8i64( %val, ptr %base define void @mscatter_baseidx_sext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: @@ -877,11 +879,11 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8i64( %val, ptr define void @mscatter_baseidx_zext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v20, v16 -; CHECK-NEXT: vsll.vi v16, v20, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v20, v16, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -900,10 +902,11 @@ define void @mscatter_baseidx_nxv8i32_nxv8i64( %val, ptr %base ; ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.masked.scatter.nxv8i64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -921,10 +924,11 @@ define void @mscatter_baseidx_sext_nxv8i32_nxv8i64( %val, ptr ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -943,10 +947,11 @@ define void @mscatter_baseidx_zext_nxv8i32_nxv8i64( %val, ptr ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v16, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1475,11 +1480,11 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8f32( %val, ptr define void @mscatter_baseidx_zext_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v14, v12 -; CHECK-NEXT: vsll.vi v12, v14, 2 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v14, v12, a1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: vsoxei16.v v8, (a0), v14, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1490,10 +1495,11 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8f32( %val, ptr define void @mscatter_baseidx_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: @@ -1512,10 +1518,11 @@ define void @mscatter_baseidx_nxv8i16_nxv8f32( %val, ptr %ba define void @mscatter_baseidx_sext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a1, 4 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a1 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: @@ -1535,10 +1542,11 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8f32( %val, pt define void @mscatter_baseidx_zext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v16, v12 -; CHECK-NEXT: vsll.vi v12, v16, 2 -; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v16, v12, a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1556,10 +1564,9 @@ define void @mscatter_baseidx_nxv8f32( %val, ptr %base, %idxs @@ -1711,11 +1718,11 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8f64( %val, pt define void @mscatter_baseidx_zext_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v18, v16 -; CHECK-NEXT: vsll.vi v16, v18, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v18, v16, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1726,11 +1733,11 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8f64( %val, pt define void @mscatter_baseidx_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: @@ -1748,11 +1755,11 @@ define void @mscatter_baseidx_nxv8i16_nxv8f64( %val, ptr %b define void @mscatter_baseidx_sext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a1 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: @@ -1771,11 +1778,11 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8f64( %val, p define void @mscatter_baseidx_zext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m) { ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vzext.vf2 v20, v16 -; CHECK-NEXT: vsll.vi v16, v20, 3 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vwmulu.vx v20, v16, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1794,10 +1801,11 @@ define void @mscatter_baseidx_nxv8i32_nxv8f64( %val, ptr %b ; ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.masked.scatter.nxv8f64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -1815,10 +1823,11 @@ define void @mscatter_baseidx_sext_nxv8i32_nxv8f64( %val, p ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1837,10 +1846,11 @@ define void @mscatter_baseidx_zext_nxv8i32_nxv8f64( %val, p ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v16, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2015,15 +2025,15 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64( %val0, @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v12, v10, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -823,10 +823,11 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: @@ -845,10 +846,11 @@ define @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: @@ -868,18 +870,20 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 2 -; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -897,10 +901,9 @@ define @vpgather_baseidx_nxv8i32(ptr %base, %idxs @@ -1049,18 +1052,18 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v16, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v16, v10, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret @@ -1073,9 +1076,9 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret @@ -1095,9 +1098,9 @@ define @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret @@ -1118,18 +1121,18 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v16, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret @@ -1150,10 +1153,11 @@ define @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, %idxs %v = call @llvm.vp.gather.nxv8i64.nxv8p0( %ptrs, %m, i32 %evl) @@ -1171,10 +1175,11 @@ define @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1193,10 +1198,11 @@ define @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1742,18 +1748,18 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v12, v10, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret @@ -1766,10 +1772,11 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: @@ -1788,10 +1795,11 @@ define @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v12, v8, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: @@ -1811,18 +1819,20 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v12, v8, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 2 -; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v12, v8, a2 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1840,10 +1850,9 @@ define @vpgather_baseidx_nxv8f32(ptr %base, %idxs @@ -1992,18 +2001,18 @@ define @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v16, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v16, v10, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2016,9 +2025,9 @@ define @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret @@ -2038,9 +2047,9 @@ define @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret @@ -2061,18 +2070,18 @@ define @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, < define @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v16, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2093,10 +2102,11 @@ define @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, %idxs %v = call @llvm.vp.gather.nxv6f64.nxv6p0( %ptrs, %m, i32 %evl) @@ -2114,10 +2124,11 @@ define @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, < ; ; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v8, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2136,10 +2147,11 @@ define @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, < ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2235,18 +2247,18 @@ define @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v10, v8 -; RV32-NEXT: vsll.vi v16, v10, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v10, v8 -; RV64-NEXT: vsll.vi v16, v10, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2259,9 +2271,9 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret @@ -2281,9 +2293,9 @@ define @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret @@ -2304,18 +2316,18 @@ define @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, < define @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v16, v8, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v12, v8 -; RV64-NEXT: vsll.vi v16, v12, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2336,10 +2348,11 @@ define @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, %idxs %v = call @llvm.vp.gather.nxv8f64.nxv8p0( %ptrs, %m, i32 %evl) @@ -2357,10 +2370,11 @@ define @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, < ; ; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v16, v8, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2379,10 +2393,11 @@ define @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, < ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v16, v8, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2465,27 +2480,26 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: vsext.vf2 v16, v8 +; RV32-NEXT: li a3, 8 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsll.vi v24, v16, 3 -; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: srli a4, a2, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a4 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v8, a3 +; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bltu a1, a2, .LBB112_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a3, a2 ; RV32-NEXT: .LBB112_2: -; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: srli a2, a2, 3 +; RV32-NEXT: sltu a1, a1, a3 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: @@ -2523,27 +2537,26 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: vsext.vf2 v16, v8 +; RV32-NEXT: li a3, 8 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsll.vi v24, v16, 3 -; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: srli a4, a2, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a4 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v8, a3 +; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bltu a1, a2, .LBB113_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a3, a2 ; RV32-NEXT: .LBB113_2: -; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: srli a2, a2, 3 +; RV32-NEXT: sltu a1, a1, a3 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: @@ -2582,52 +2595,50 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: vzext.vf2 v16, v8 +; RV32-NEXT: li a3, 8 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsll.vi v24, v16, 3 -; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: srli a4, a2, 3 -; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a4 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulu.vx v24, v8, a3 +; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bltu a1, a2, .LBB114_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 +; RV32-NEXT: mv a3, a2 ; RV32-NEXT: .LBB114_2: -; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: srli a2, a2, 3 +; RV32-NEXT: sltu a1, a1, a3 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: vzext.vf2 v16, v8 +; RV64-NEXT: li a3, 8 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: vsll.vi v24, v16, 3 -; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: srli a4, a2, 3 -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a4 -; RV64-NEXT: sltu a4, a1, a3 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a3, a4, a3 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t +; RV64-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v8, a3 +; RV64-NEXT: mv a3, a1 ; RV64-NEXT: bltu a1, a2, .LBB114_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a2 +; RV64-NEXT: mv a3, a2 ; RV64-NEXT: .LBB114_2: -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: srli a2, a2, 3 +; RV64-NEXT: sltu a1, a1, a3 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: and a1, a1, a3 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 647e3965b7ec2..2cf6248c17598 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -659,20 +659,20 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32( %val, ptr define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v14, v12 -; RV64-NEXT: vsll.vi v12, v14, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v14, v12, a2 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v14, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -683,10 +683,11 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr define void @vpscatter_baseidx_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: @@ -705,10 +706,11 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i32( %val, ptr %bas define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: @@ -728,18 +730,20 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32( %val, ptr define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v16, v12, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 2 -; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v12, a2 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -757,10 +761,9 @@ define void @vpscatter_baseidx_nxv8i32( %val, ptr %base, %idxs @@ -904,20 +907,20 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64( %val, ptr define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v18, v16 -; RV32-NEXT: vsll.vi v16, v18, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v18, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v18, v16 -; RV64-NEXT: vsll.vi v16, v18, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v18, v16, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -928,11 +931,11 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr define void @vpscatter_baseidx_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: @@ -950,11 +953,11 @@ define void @vpscatter_baseidx_nxv8i16_nxv8i64( %val, ptr %bas define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: @@ -973,20 +976,20 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64( %val, ptr define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v20, v16 -; RV64-NEXT: vsll.vi v16, v20, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v20, v16, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1005,10 +1008,11 @@ define void @vpscatter_baseidx_nxv8i32_nxv8i64( %val, ptr %bas ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.vp.scatter.nxv8i64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -1026,10 +1030,11 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64( %val, ptr ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1048,10 +1053,11 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64( %val, ptr ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1572,20 +1578,20 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32( %val, pt define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v14, v12, a2 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v14, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v14, v12 -; RV64-NEXT: vsll.vi v12, v14, 2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v14, v12, a2 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v14, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1596,10 +1602,11 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32( %val, pt define void @vpscatter_baseidx_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: @@ -1618,10 +1625,11 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f32( %val, ptr %b define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v16, v12, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: @@ -1641,18 +1649,20 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32( %val, p define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: li a2, 4 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v16, v12, a2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 2 -; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV64-NEXT: li a2, 4 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v16, v12, a2 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1670,10 +1680,9 @@ define void @vpscatter_baseidx_nxv8f32( %val, ptr %base, %idxs @@ -1817,20 +1826,20 @@ define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64( %val, p define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v18, v16 -; RV32-NEXT: vsll.vi v16, v18, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v18, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v18, v16 -; RV64-NEXT: vsll.vi v16, v18, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v18, v16, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1841,11 +1850,11 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, p define void @vpscatter_baseidx_nxv6i16_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: @@ -1863,11 +1872,11 @@ define void @vpscatter_baseidx_nxv6i16_nxv6f64( %val, ptr % define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: @@ -1886,20 +1895,20 @@ define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64( %val, define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v20, v16 -; RV64-NEXT: vsll.vi v16, v20, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v20, v16, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1918,10 +1927,11 @@ define void @vpscatter_baseidx_nxv6i32_nxv6f64( %val, ptr % ; ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv6f64.nxv6p0( %val, %ptrs, %m, i32 %evl) @@ -1939,10 +1949,11 @@ define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64( %val, ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1961,10 +1972,11 @@ define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64( %val, ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2059,20 +2071,20 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64( %val, p define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vzext.vf2 v18, v16 -; RV32-NEXT: vsll.vi v16, v18, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vwmulu.vx v18, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vzext.vf2 v18, v16 -; RV64-NEXT: vsll.vi v16, v18, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vwmulu.vx v18, v16, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v18, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2083,11 +2095,11 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64( %val, p define void @vpscatter_baseidx_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: @@ -2105,11 +2117,11 @@ define void @vpscatter_baseidx_nxv8i16_nxv8f64( %val, ptr % define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulsu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: @@ -2128,20 +2140,20 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64( %val, define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vwmulu.vx v20, v16, a2 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vzext.vf2 v20, v16 -; RV64-NEXT: vsll.vi v16, v20, 3 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vwmulu.vx v20, v16, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v20, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2160,10 +2172,11 @@ define void @vpscatter_baseidx_nxv8i32_nxv8f64( %val, ptr % ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv8f64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -2181,10 +2194,11 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64( %val, ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulsu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2203,10 +2217,11 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64( %val, ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v16, a2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2307,25 +2322,16 @@ define void @vpscatter_nxv16f64( %val, define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vl4re16.v v24, (a1) -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf2 v0, v24 +; RV32-NEXT: vl4re16.v v4, (a1) +; RV32-NEXT: li a3, 8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsll.vi v24, v0, 3 +; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v4, a3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB109_2: -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2337,11 +2343,6 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: @@ -2404,25 +2405,16 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vl4re16.v v24, (a1) -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf2 v0, v24 +; RV32-NEXT: vl4re16.v v4, (a1) +; RV32-NEXT: li a3, 8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsll.vi v24, v0, 3 +; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v4, a3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB110_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB110_2: -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2434,11 +2426,6 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: @@ -2502,25 +2489,16 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vl4re16.v v24, (a1) -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vzext.vf2 v0, v24 +; RV32-NEXT: vl4re16.v v4, (a1) +; RV32-NEXT: li a3, 8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsll.vi v24, v0, 3 +; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulu.vx v24, v4, a3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB111_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: .LBB111_2: -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2532,34 +2510,20 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vl4re16.v v24, (a1) -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vzext.vf2 v0, v24 +; RV64-NEXT: vl4re16.v v4, (a1) +; RV64-NEXT: li a3, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v4, a3 ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB111_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB111_2: -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 @@ -2571,11 +2535,6 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 16 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll index ff807adf0e59f..fd09fe791b4fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll @@ -224,9 +224,10 @@ define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b define @vwsll_vi_nxv2i64( %a) { ; CHECK-LABEL: vwsll_vi_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64: @@ -432,9 +433,10 @@ define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b define @vwsll_vi_nxv4i32( %a) { ; CHECK-LABEL: vwsll_vi_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32: @@ -612,9 +614,10 @@ define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b) define @vwsll_vi_nxv8i16( %a) { ; CHECK-LABEL: vwsll_vi_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll index c30c4763dd46d..1358a7c69cb8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll @@ -240,9 +240,10 @@ define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b define @vwsll_vi_nxv2i64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64: @@ -464,9 +465,10 @@ define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b define @vwsll_vi_nxv4i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32: @@ -661,9 +663,10 @@ define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b, define @vwsll_vi_nxv8i16( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vwmulu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16: