Skip to content

Commit d78dccc

Browse files
committed
X86: make VBMI2 funnel shifts use VSHLD/VSHRD for const splats
Move constant splat handling for vector funnel shifts into a DAG combiner so that VBMI2 legal widths emit VSHLD/VSHRD directly (fixes #166949). Signed-off-by: Arnav Mehta <[email protected]>
1 parent 79c56e8 commit d78dccc

File tree

1 file changed

+48
-14
lines changed

1 file changed

+48
-14
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2073,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20732073

20742074
if (Subtarget.hasVBMI2()) {
20752075
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
2076-
setOperationAction(ISD::FSHL, VT, Custom);
2077-
setOperationAction(ISD::FSHR, VT, Custom);
2076+
setOperationAction(ISD::FSHL, VT, Legal);
2077+
setOperationAction(ISD::FSHR, VT, Legal);
20782078
}
20792079

20802080
setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
@@ -2089,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20892089
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
20902090
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
20912091
MVT::v4i64}) {
2092-
setOperationAction(ISD::FSHL, VT, Custom);
2093-
setOperationAction(ISD::FSHR, VT, Custom);
2092+
setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom);
2093+
setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom);
20942094
}
20952095
}
20962096

@@ -2703,6 +2703,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
27032703
ISD::STRICT_FP_EXTEND,
27042704
ISD::FP_ROUND,
27052705
ISD::STRICT_FP_ROUND,
2706+
ISD::FSHL,
2707+
ISD::FSHR,
27062708
ISD::INTRINSIC_VOID,
27072709
ISD::INTRINSIC_WO_CHAIN,
27082710
ISD::INTRINSIC_W_CHAIN});
@@ -31314,19 +31316,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
3131431316
bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
3131531317
unsigned NumElts = VT.getVectorNumElements();
3131631318

31317-
if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
31318-
31319-
if (IsCstSplat) {
31320-
if (IsFSHR)
31321-
std::swap(Op0, Op1);
31322-
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
31323-
SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
31324-
return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
31325-
{Op0, Op1, Imm}, DAG, Subtarget);
31326-
}
31319+
// For non-VLX VBMI2 targets, widen 128/256-bit to 512-bit so
31320+
// the rest of the lowering/isel can select the VBMI2 forms.
31321+
// Only Custom types (v8i16, v4i32, v2i64, v16i16, v8i32, v4i64) can
31322+
// reach LowerFunnelShift with VBMI2 but no VLX, so no type check needed.
31323+
if (Subtarget.hasVBMI2() && !Subtarget.hasVLX() && EltSizeInBits > 8) {
3132731324
return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
3132831325
{Op0, Op1, Amt}, DAG, Subtarget);
3132931326
}
31327+
3133031328
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
3133131329
VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
3133231330
VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
@@ -57624,6 +57622,40 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
5762457622
return SDValue();
5762557623
}
5762657624

57625+
// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
57626+
static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
57627+
TargetLowering::DAGCombinerInfo &DCI,
57628+
const X86Subtarget &Subtarget) {
57629+
SDLoc DL(N);
57630+
SDValue Op0 = N->getOperand(0);
57631+
SDValue Op1 = N->getOperand(1);
57632+
SDValue Amt = N->getOperand(2);
57633+
EVT VT = Op0.getValueType();
57634+
57635+
if (!VT.isVector())
57636+
return SDValue();
57637+
57638+
// Only combine if the operation is legal for this type.
57639+
// This ensures we don't try to convert types that need to be
57640+
// widened/promoted.
57641+
if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
57642+
return SDValue();
57643+
57644+
unsigned EltSize = VT.getScalarSizeInBits();
57645+
APInt ShiftVal;
57646+
if (!X86::isConstantSplat(Amt, ShiftVal))
57647+
return SDValue();
57648+
57649+
uint64_t ModAmt = ShiftVal.urem(EltSize);
57650+
SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
57651+
bool IsFSHR = N->getOpcode() == ISD::FSHR;
57652+
57653+
if (IsFSHR)
57654+
std::swap(Op0, Op1);
57655+
unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD;
57656+
return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm});
57657+
}
57658+
5762757659
static bool needCarryOrOverflowFlag(SDValue Flags) {
5762857660
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
5762957661

@@ -61228,6 +61260,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6122861260
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
6122961261
case ISD::FP_TO_SINT_SAT:
6123061262
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
61263+
case ISD::FSHL:
61264+
case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget);
6123161265
// clang-format on
6123261266
}
6123361267

0 commit comments

Comments
 (0)