@@ -2073,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20732073
20742074 if (Subtarget.hasVBMI2()) {
20752075 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2076- setOperationAction(ISD::FSHL, VT, Custom );
2077- setOperationAction(ISD::FSHR, VT, Custom );
2076+ setOperationAction(ISD::FSHL, VT, Legal );
2077+ setOperationAction(ISD::FSHR, VT, Legal );
20782078 }
20792079
20802080 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
@@ -2089,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20892089 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
20902090 for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
20912091 MVT::v4i64}) {
2092- setOperationAction(ISD::FSHL, VT, Custom);
2093- setOperationAction(ISD::FSHR, VT, Custom);
2092+ setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom);
2093+ setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom);
20942094 }
20952095 }
20962096
@@ -2703,6 +2703,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
27032703 ISD::STRICT_FP_EXTEND,
27042704 ISD::FP_ROUND,
27052705 ISD::STRICT_FP_ROUND,
2706+ ISD::FSHL,
2707+ ISD::FSHR,
27062708 ISD::INTRINSIC_VOID,
27072709 ISD::INTRINSIC_WO_CHAIN,
27082710 ISD::INTRINSIC_W_CHAIN});
@@ -31314,19 +31316,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
3131431316 bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
3131531317 unsigned NumElts = VT.getVectorNumElements();
3131631318
31317- if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
31318-
31319- if (IsCstSplat) {
31320- if (IsFSHR)
31321- std::swap(Op0, Op1);
31322- uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
31323- SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
31324- return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
31325- {Op0, Op1, Imm}, DAG, Subtarget);
31326- }
31319+ // For non-VLX VBMI2 targets, widen 128/256-bit to 512-bit so
31320+ // the rest of the lowering/isel can select the VBMI2 forms.
31321+ // Only Custom types (v8i16, v4i32, v2i64, v16i16, v8i32, v4i64) can
31322+ // reach LowerFunnelShift with VBMI2 but no VLX, so no type check needed.
31323+ if (Subtarget.hasVBMI2() && !Subtarget.hasVLX() && EltSizeInBits > 8) {
3132731324 return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
3132831325 {Op0, Op1, Amt}, DAG, Subtarget);
3132931326 }
31327+
3133031328 assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
3133131329 VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
3133231330 VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
@@ -57624,6 +57622,40 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
5762457622 return SDValue();
5762557623}
5762657624
57625+ // Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
57626+ static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
57627+ TargetLowering::DAGCombinerInfo &DCI,
57628+ const X86Subtarget &Subtarget) {
57629+ SDLoc DL(N);
57630+ SDValue Op0 = N->getOperand(0);
57631+ SDValue Op1 = N->getOperand(1);
57632+ SDValue Amt = N->getOperand(2);
57633+ EVT VT = Op0.getValueType();
57634+
57635+ if (!VT.isVector())
57636+ return SDValue();
57637+
57638+ // Only combine if the operation is legal for this type.
57639+ // This ensures we don't try to convert types that need to be
57640+ // widened/promoted.
57641+ if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
57642+ return SDValue();
57643+
57644+ unsigned EltSize = VT.getScalarSizeInBits();
57645+ APInt ShiftVal;
57646+ if (!X86::isConstantSplat(Amt, ShiftVal))
57647+ return SDValue();
57648+
57649+ uint64_t ModAmt = ShiftVal.urem(EltSize);
57650+ SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
57651+ bool IsFSHR = N->getOpcode() == ISD::FSHR;
57652+
57653+ if (IsFSHR)
57654+ std::swap(Op0, Op1);
57655+ unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD;
57656+ return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm});
57657+ }
57658+
5762757659static bool needCarryOrOverflowFlag(SDValue Flags) {
5762857660 assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
5762957661
@@ -61228,6 +61260,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6122861260 case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
6122961261 case ISD::FP_TO_SINT_SAT:
6123061262 case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
61263+ case ISD::FSHL:
61264+ case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget);
6123161265 // clang-format on
6123261266 }
6123361267
0 commit comments