Skip to content

Commit 157a532

Browse files
[X86][GFNI] Add lowering support for CTLZ vXi8 using GF2P8AFFINEQB
1 parent 6d3b72a commit 157a532

File tree

2 files changed

+291
-376
lines changed

2 files changed

+291
-376
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28998,6 +28998,35 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2899828998
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
2899928999
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
2900029000
}
29001+
static SDValue LowerVectorCTLZ_GFNI(SDValue Op, SelectionDAG &DAG,
29002+
const X86Subtarget &Subtarget) {
29003+
SDLoc dl(Op);
29004+
MVT VT = Op.getSimpleValueType();
29005+
SDValue Input = Op.getOperand(0);
29006+
29007+
if (!VT.isVector() || VT.getVectorElementType() != MVT::i8)
29008+
return SDValue();
29009+
SmallVector<SDValue, 16> MatrixVals;
29010+
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
29011+
uint8_t mask = 1 << (7 - (i % 8));
29012+
MatrixVals.push_back(DAG.getConstant(mask, dl, MVT::i8));
29013+
}
29014+
29015+
SDValue Matrix = DAG.getBuildVector(VT, dl, MatrixVals);
29016+
SDValue Reversed = DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, Input, Matrix,
29017+
DAG.getTargetConstant(0, dl, MVT::i8));
29018+
SDValue AddMask = DAG.getConstant(0xFF, dl, MVT::i8);
29019+
29020+
SDValue AddVec = DAG.getSplatBuildVector(VT, dl, AddMask);
29021+
SDValue Summed = DAG.getNode(ISD::ADD, dl, VT, Reversed, AddVec);
29022+
SDValue NotSummed = DAG.getNode(ISD::XOR, dl, VT, Summed, AddVec);
29023+
SDValue Filtered = DAG.getNode(ISD::AND, dl, VT, NotSummed, Reversed);
29024+
SDValue FinalMatrix = DAG.getBuildVector(VT, dl, MatrixVals);
29025+
SDValue LZCNT =
29026+
DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, Filtered, FinalMatrix,
29027+
DAG.getTargetConstant(8, dl, MVT::i8));
29028+
return LZCNT;
29029+
}
2900129030

2900229031
static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
2900329032
SelectionDAG &DAG) {
@@ -29007,6 +29036,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
2900729036
SDLoc dl(Op);
2900829037
unsigned Opc = Op.getOpcode();
2900929038

29039+
if (VT.isVector() && VT.getScalarType() == MVT::i8 && Subtarget.hasGFNI())
29040+
return LowerVectorCTLZ_GFNI(Op, DAG, Subtarget);
29041+
2901029042
if (VT.isVector())
2901129043
return LowerVectorCTLZ(Op, dl, Subtarget, DAG);
2901229044

0 commit comments

Comments
 (0)