@@ -1289,6 +1289,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
12891289 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
12901290 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
12911291 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1292+
1293+ // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1294+ setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);
12921295 }
12931296
12941297 setOperationAction(
@@ -3622,6 +3625,78 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
36223625 return Gather;
36233626}
36243627
3628+ static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
3629+ const RISCVSubtarget &Subtarget) {
3630+ MVT VT = Op.getSimpleValueType();
3631+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
3632+
3633+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3634+
3635+ SDLoc DL(Op);
3636+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3637+
3638+ if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3639+ int64_t StepNumerator = SimpleVID->StepNumerator;
3640+ unsigned StepDenominator = SimpleVID->StepDenominator;
3641+ int64_t Addend = SimpleVID->Addend;
3642+
3643+ assert(StepNumerator != 0 && "Invalid step");
3644+ bool Negate = false;
3645+ int64_t SplatStepVal = StepNumerator;
3646+ unsigned StepOpcode = ISD::MUL;
3647+ // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3648+ // anyway as the shift of 63 won't fit in uimm5.
3649+ if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3650+ isPowerOf2_64(std::abs(StepNumerator))) {
3651+ Negate = StepNumerator < 0;
3652+ StepOpcode = ISD::SHL;
3653+ SplatStepVal = Log2_64(std::abs(StepNumerator));
3654+ }
3655+
3656+ // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3657+ // threshold since it's the immediate value many RVV instructions accept.
3658+ // There is no vmul.vi instruction so ensure multiply constant can fit in
3659+ // a single addi instruction.
3660+ if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3661+ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3662+ isPowerOf2_32(StepDenominator) &&
3663+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3664+ MVT VIDVT =
3665+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3666+ MVT VIDContainerVT =
3667+ getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3668+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3669+ // Convert right out of the scalable type so we can use standard ISD
3670+ // nodes for the rest of the computation. If we used scalable types with
3671+ // these, we'd lose the fixed-length vector info and generate worse
3672+ // vsetvli code.
3673+ VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3674+ if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3675+ (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3676+ SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3677+ VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3678+ }
3679+ if (StepDenominator != 1) {
3680+ SDValue SplatStep =
3681+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3682+ VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3683+ }
3684+ if (Addend != 0 || Negate) {
3685+ SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3686+ VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3687+ VID);
3688+ }
3689+ if (VT.isFloatingPoint()) {
3690+ // TODO: Use vfwcvt to reduce register pressure.
3691+ VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3692+ }
3693+ return VID;
3694+ }
3695+ }
3696+
3697+ return SDValue();
3698+ }
3699+
36253700/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
36263701/// which constitute a large proportion of the elements. In such cases we can
36273702/// splat a vector with the dominant element and make up the shortfall with
@@ -3839,64 +3914,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
38393914 // Try and match index sequences, which we can lower to the vid instruction
38403915 // with optional modifications. An all-undef vector is matched by
38413916 // getSplatValue, above.
3842- if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3843- int64_t StepNumerator = SimpleVID->StepNumerator;
3844- unsigned StepDenominator = SimpleVID->StepDenominator;
3845- int64_t Addend = SimpleVID->Addend;
3846-
3847- assert(StepNumerator != 0 && "Invalid step");
3848- bool Negate = false;
3849- int64_t SplatStepVal = StepNumerator;
3850- unsigned StepOpcode = ISD::MUL;
3851- // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3852- // anyway as the shift of 63 won't fit in uimm5.
3853- if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3854- isPowerOf2_64(std::abs(StepNumerator))) {
3855- Negate = StepNumerator < 0;
3856- StepOpcode = ISD::SHL;
3857- SplatStepVal = Log2_64(std::abs(StepNumerator));
3858- }
3859-
3860- // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3861- // threshold since it's the immediate value many RVV instructions accept.
3862- // There is no vmul.vi instruction so ensure multiply constant can fit in
3863- // a single addi instruction.
3864- if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3865- (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3866- isPowerOf2_32(StepDenominator) &&
3867- (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3868- MVT VIDVT =
3869- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3870- MVT VIDContainerVT =
3871- getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3872- SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3873- // Convert right out of the scalable type so we can use standard ISD
3874- // nodes for the rest of the computation. If we used scalable types with
3875- // these, we'd lose the fixed-length vector info and generate worse
3876- // vsetvli code.
3877- VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3878- if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3879- (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3880- SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3881- VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3882- }
3883- if (StepDenominator != 1) {
3884- SDValue SplatStep =
3885- DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3886- VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3887- }
3888- if (Addend != 0 || Negate) {
3889- SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3890- VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3891- VID);
3892- }
3893- if (VT.isFloatingPoint()) {
3894- // TODO: Use vfwcvt to reduce register pressure.
3895- VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3896- }
3897- return VID;
3898- }
3899- }
3917+ if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
3918+ return Res;
39003919
39013920 // For very small build_vectors, use a single scalar insert of a constant.
39023921 // TODO: Base this on constant rematerialization cost, not size.
@@ -7586,8 +7605,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
75867605 return lowerVECTOR_REVERSE(Op, DAG);
75877606 case ISD::VECTOR_SPLICE:
75887607 return lowerVECTOR_SPLICE(Op, DAG);
7589- case ISD::BUILD_VECTOR:
7608+ case ISD::BUILD_VECTOR: {
7609+ MVT VT = Op.getSimpleValueType();
7610+ MVT EltVT = VT.getVectorElementType();
7611+ if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7612+ return lowerBuildVectorViaVID(Op, DAG, Subtarget);
75907613 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7614+ }
75917615 case ISD::SPLAT_VECTOR: {
75927616 MVT VT = Op.getSimpleValueType();
75937617 MVT EltVT = VT.getVectorElementType();
0 commit comments