Skip to content

Commit 6cd8b5e

Browse files
committed
[X86] Lower CTTZ/CTLZ vXi8 vectors using GF2P8AFFINEQB
CTTZ can be lowered using GF2P8AFFINEQB if we isolate the lowest set bit (or is zero) and the use GF2P8AFFINEQB to perform a look up With CTTZ, CTLZ can be lowered as CTTZ(BITREVERSE()) As discussed on #110308
1 parent 6cbc373 commit 6cd8b5e

File tree

3 files changed

+441
-845
lines changed

3 files changed

+441
-845
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
13301330
setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
13311331
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
13321332
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
1333+
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1334+
setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
13331335
}
13341336

13351337
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -1695,6 +1697,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
16951697
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
16961698
setOperationAction(ISD::MGATHER, VT, Custom);
16971699
}
1700+
1701+
if (Subtarget.hasGFNI()) {
1702+
setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1703+
setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1704+
}
16981705
}
16991706

17001707
if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
@@ -2079,6 +2086,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20792086
setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
20802087
setOperationAction(ISD::FABS, MVT::v32f16, Custom);
20812088
setOperationAction(ISD::FCOPYSIGN, MVT::v32f16, Custom);
2089+
2090+
if (Subtarget.hasGFNI()) {
2091+
setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
2092+
setOperationAction(ISD::CTTZ, MVT::v64i8, Custom);
2093+
}
20822094
}// useAVX512Regs
20832095

20842096
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
@@ -28412,6 +28424,46 @@ SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op,
2841228424
return createSetFPEnvNodes(Env, Chain, DL, MVT::i32, MMO, DAG, Subtarget);
2841328425
}
2841428426

28427+
// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
28428+
uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt = 0) {
28429+
assert((Amt < 8) && "Shift/Rotation amount out of range");
28430+
switch (Opcode) {
28431+
case ISD::BITREVERSE:
28432+
return 0x8040201008040201ULL;
28433+
case ISD::CTTZ:
28434+
// Special case - only works for zero/single bit input.
28435+
return 0xAACCF0FF00000000ULL;
28436+
case ISD::SHL:
28437+
return ((0x0102040810204080ULL >> (Amt)) &
28438+
(0x0101010101010101ULL * (0xFF >> (Amt))));
28439+
case ISD::SRL:
28440+
return ((0x0102040810204080ULL << (Amt)) &
28441+
(0x0101010101010101ULL * ((0xFF << (Amt)) & 0xFF)));
28442+
case ISD::SRA:
28443+
return (getGFNICtrlImm(ISD::SRL, Amt) |
28444+
(0x8080808080808080ULL >> (64 - (8 * Amt))));
28445+
case ISD::ROTL:
28446+
return getGFNICtrlImm(ISD::SRL, 8 - Amt) | getGFNICtrlImm(ISD::SHL, Amt);
28447+
case ISD::ROTR:
28448+
return getGFNICtrlImm(ISD::SHL, 8 - Amt) | getGFNICtrlImm(ISD::SRL, Amt);
28449+
}
28450+
llvm_unreachable("Unsupported GFNI opcode");
28451+
}
28452+
28453+
// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
28454+
SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
28455+
MVT VT, unsigned Amt = 0) {
28456+
assert(VT.getVectorElementType() == MVT::i8 &&
28457+
(VT.getSizeInBits() % 64) == 0 && "Illegal GFNI control type");
28458+
uint64_t Imm = getGFNICtrlImm(Opcode, Amt);
28459+
SmallVector<SDValue> MaskBits;
28460+
for (unsigned I = 0, E = VT.getSizeInBits(); I != E; I += 8) {
28461+
uint64_t Bits = (Imm >> (I % 64)) & 255;
28462+
MaskBits.push_back(DAG.getConstant(Bits, DL, MVT::i8));
28463+
}
28464+
return DAG.getBuildVector(VT, DL, MaskBits);
28465+
}
28466+
2841528467
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
2841628468
//
2841728469
// i8/i16 vector implemented using dword LZCNT vector instruction
@@ -28535,6 +28587,11 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2853528587
SelectionDAG &DAG) {
2853628588
MVT VT = Op.getSimpleValueType();
2853728589

28590+
// GFNI targets - fold as cttz(bitreverse())
28591+
if (Subtarget.hasGFNI() && VT.getVectorElementType() == MVT::i8)
28592+
return DAG.getNode(ISD::CTTZ, DL, VT,
28593+
DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0)));
28594+
2853828595
if (Subtarget.hasCDI() &&
2853928596
// vXi8 vectors need to be promoted to 512-bits for vXi32.
2854028597
(Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8))
@@ -28598,6 +28655,14 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
2859828655
SDValue N0 = Op.getOperand(0);
2859928656
SDLoc dl(Op);
2860028657

28658+
// GFNI - isolate LSB and perform GF2P8AFFINEQB lookup.
28659+
if (Subtarget.hasGFNI() && VT.isVector()) {
28660+
SDValue B = DAG.getNode(ISD::AND, dl, VT, N0, DAG.getNegative(N0, dl, VT));
28661+
SDValue M = getGFNICtrlMask(ISD::CTTZ, DAG, dl, VT);
28662+
return DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, B, M,
28663+
DAG.getTargetConstant(0x8, dl, MVT::i8));
28664+
}
28665+
2860128666
assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
2860228667
"Only scalar CTTZ requires custom lowering");
2860328668

@@ -29597,43 +29662,6 @@ SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
2959729662
return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
2959829663
}
2959929664

29600-
// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
29601-
uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt = 0) {
29602-
assert((Amt < 8) && "Shift/Rotation amount out of range");
29603-
switch (Opcode) {
29604-
case ISD::BITREVERSE:
29605-
return 0x8040201008040201ULL;
29606-
case ISD::SHL:
29607-
return ((0x0102040810204080ULL >> (Amt)) &
29608-
(0x0101010101010101ULL * (0xFF >> (Amt))));
29609-
case ISD::SRL:
29610-
return ((0x0102040810204080ULL << (Amt)) &
29611-
(0x0101010101010101ULL * ((0xFF << (Amt)) & 0xFF)));
29612-
case ISD::SRA:
29613-
return (getGFNICtrlImm(ISD::SRL, Amt) |
29614-
(0x8080808080808080ULL >> (64 - (8 * Amt))));
29615-
case ISD::ROTL:
29616-
return getGFNICtrlImm(ISD::SRL, 8 - Amt) | getGFNICtrlImm(ISD::SHL, Amt);
29617-
case ISD::ROTR:
29618-
return getGFNICtrlImm(ISD::SHL, 8 - Amt) | getGFNICtrlImm(ISD::SRL, Amt);
29619-
}
29620-
llvm_unreachable("Unsupported GFNI opcode");
29621-
}
29622-
29623-
// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
29624-
SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL, MVT VT,
29625-
unsigned Amt = 0) {
29626-
assert(VT.getVectorElementType() == MVT::i8 &&
29627-
(VT.getSizeInBits() % 64) == 0 && "Illegal GFNI control type");
29628-
uint64_t Imm = getGFNICtrlImm(Opcode, Amt);
29629-
SmallVector<SDValue> MaskBits;
29630-
for (unsigned I = 0, E = VT.getSizeInBits(); I != E; I += 8) {
29631-
uint64_t Bits = (Imm >> (I % 64)) & 255;
29632-
MaskBits.push_back(DAG.getConstant(Bits, DL, MVT::i8));
29633-
}
29634-
return DAG.getBuildVector(VT, DL, MaskBits);
29635-
}
29636-
2963729665
// Return true if the required (according to Opcode) shift-imm form is natively
2963829666
// supported by the Subtarget
2963929667
static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget,

0 commit comments

Comments
 (0)