@@ -28988,6 +28988,30 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2898828988 return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
2898928989}
2899028990
28991+ static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
28992+ SelectionDAG &DAG,
28993+ const X86Subtarget &Subtarget) {
28994+ MVT VT = Op.getSimpleValueType();
28995+ SDValue Input = Op.getOperand(0);
28996+
28997+ assert(VT.isVector() && VT.getVectorElementType() == MVT::i8 &&
28998+ "Expected vXi8 input for GFNI-based CTLZ lowering");
28999+
29000+ SDValue Reversed = DAG.getNode(ISD::BITREVERSE, DL, VT, Input);
29001+
29002+ SDValue Neg = DAG.getNegative(Reversed, DL, VT);
29003+ SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, Reversed, Neg);
29004+
29005+ MVT VT64 = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
29006+ SDValue CTTZConst = DAG.getConstant(0xAACCF0FF00000000ULL, DL, VT64);
29007+ SDValue CTTZMatrix = DAG.getBitcast(VT, CTTZConst);
29008+
29009+ SDValue LZCNT =
29010+ DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, Filtered, CTTZMatrix,
29011+ DAG.getTargetConstant(8, DL, MVT::i8));
29012+ return LZCNT;
29013+ }
29014+
2899129015static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
2899229016 SelectionDAG &DAG) {
2899329017 MVT VT = Op.getSimpleValueType();
@@ -28996,6 +29020,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
2899629020 SDLoc dl(Op);
2899729021 unsigned Opc = Op.getOpcode();
2899829022
29023+ if (VT.isVector() && VT.getScalarType() == MVT::i8 && Subtarget.hasGFNI())
29024+ return LowerVectorCTLZ_GFNI(Op, dl, DAG, Subtarget);
29025+
2899929026 if (VT.isVector())
2900029027 return LowerVectorCTLZ(Op, dl, Subtarget, DAG);
2900129028
0 commit comments