@@ -11136,8 +11136,9 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
1113611136 if (!VL)
1113711137 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
1113811138
11139- SDValue Index;
11140- if (!IsUnmasked && IsExpandingLoad) {
11139+ SDValue Result;
11140+ if (!IsUnmasked && IsExpandingLoad &&
11141+ Subtarget.hasOptimizedIndexedLoadStore()) {
1114111142 MVT IndexVT = ContainerVT;
1114211143 if (ContainerVT.isFloatingPoint())
1114311144 IndexVT = IndexVT.changeVectorElementTypeToInteger();
@@ -11147,47 +11148,98 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
1114711148 IndexVT = IndexVT.changeVectorElementType(XLenVT);
1114811149
1114911150 // If index vector is an i8 vector and the element count exceeds 256, we
11150- // should change the element type of index vector to i16 to avoid overflow.
11151+ // should change the element type of index vector to i16 to avoid
11152+ // overflow.
1115111153 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
1115211154 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
1115311155 if (getLMUL(IndexVT) == RISCVII::LMUL_8)
1115411156 return SDValue();
1115511157 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
1115611158 }
1115711159
11158- Index =
11160+ SDValue Index =
1115911161 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
1116011162 DAG.getTargetConstant(Intrinsic::riscv_viota, DL, XLenVT),
1116111163 DAG.getUNDEF(IndexVT), Mask, VL);
1116211164 if (uint64_t EltSize = ContainerVT.getScalarSizeInBits(); EltSize > 8)
1116311165 Index = DAG.getNode(RISCVISD::SHL_VL, DL, IndexVT, Index,
1116411166 DAG.getConstant(Log2_64(EltSize / 8), DL, IndexVT),
1116511167 DAG.getUNDEF(IndexVT), Mask, VL);
11166- }
11167-
11168- unsigned IntID = IsUnmasked ? Intrinsic::riscv_vle
11169- : IsExpandingLoad ? Intrinsic::riscv_vluxei_mask
11170- : Intrinsic::riscv_vle_mask;
11171- SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11172- if (IsUnmasked)
11173- Ops.push_back(DAG.getUNDEF(ContainerVT));
11174- else
11168+ unsigned IntID = Intrinsic::riscv_vluxei_mask;
11169+ SmallVector<SDValue, 8> Ops{Chain,
11170+ DAG.getTargetConstant(IntID, DL, XLenVT)};
1117511171 Ops.push_back(PassThru);
11176- Ops.push_back(BasePtr);
11177- if (!IsUnmasked) {
11178- if (IsExpandingLoad)
11179- Ops.push_back(Index);
11172+ Ops.push_back(BasePtr);
11173+ Ops.push_back(Index);
1118011174 Ops.push_back(Mask);
11181- }
11182- Ops.push_back(VL);
11183- if (!IsUnmasked)
11175+ Ops.push_back(VL);
1118411176 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
1118511177
11186- SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11178+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
1118711179
11188- SDValue Result =
11189- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11190- Chain = Result.getValue(1);
11180+ Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
11181+ MemVT, MMO);
11182+ Chain = Result.getValue(1);
11183+ } else {
11184+ SDValue ExpandingVL;
11185+ if (!IsUnmasked && IsExpandingLoad &&
11186+ !Subtarget.hasOptimizedIndexedLoadStore()) {
11187+ ExpandingVL = VL;
11188+ VL = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11189+ getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG),
11190+ VL);
11191+ }
11192+
11193+ unsigned IntID = IsUnmasked || (IsExpandingLoad &&
11194+ !Subtarget.hasOptimizedIndexedLoadStore())
11195+ ? Intrinsic::riscv_vle
11196+ : Intrinsic::riscv_vle_mask;
11197+ SmallVector<SDValue, 8> Ops{Chain,
11198+ DAG.getTargetConstant(IntID, DL, XLenVT)};
11199+ if (IntID == Intrinsic::riscv_vle)
11200+ Ops.push_back(DAG.getUNDEF(ContainerVT));
11201+ else
11202+ Ops.push_back(PassThru);
11203+ Ops.push_back(BasePtr);
11204+ if (IntID == Intrinsic::riscv_vle_mask)
11205+ Ops.push_back(Mask);
11206+ Ops.push_back(VL);
11207+ if (IntID == Intrinsic::riscv_vle_mask)
11208+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11209+
11210+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11211+
11212+ Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
11213+ MemVT, MMO);
11214+ Chain = Result.getValue(1);
11215+ if (ExpandingVL) {
11216+ MVT IndexVT = ContainerVT;
11217+ if (ContainerVT.isFloatingPoint())
11218+ IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11219+
11220+ MVT IndexEltVT = IndexVT.getVectorElementType();
11221+ bool UseVRGATHEREI16 = false;
11222+ // If index vector is an i8 vector and the element count exceeds 256, we
11223+ // should change the element type of index vector to i16 to avoid
11224+ // overflow.
11225+ if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11226+ // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11227+ if (getLMUL(IndexVT) == RISCVII::LMUL_8)
11228+ return SDValue();
11229+ IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11230+ UseVRGATHEREI16 = true;
11231+ }
11232+
11233+ SDValue Iota =
11234+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11235+ DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11236+ DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11237+ Result = DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11238+ : RISCVISD::VRGATHER_VV_VL,
11239+ DL, ContainerVT, Result, Iota, PassThru, Mask,
11240+ ExpandingVL);
11241+ }
11242+ }
1119111243
1119211244 if (VT.isFixedLengthVector())
1119311245 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
0 commit comments