@@ -1330,6 +1330,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
13301330 setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
13311331 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
13321332 setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
1333+ setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1334+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
13331335 }
13341336
13351337 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -1695,6 +1697,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
16951697 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
16961698 setOperationAction(ISD::MGATHER, VT, Custom);
16971699 }
1700+
1701+ if (Subtarget.hasGFNI()) {
1702+ setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1703+ setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1704+ }
16981705 }
16991706
17001707 if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
@@ -2079,6 +2086,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20792086 setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
20802087 setOperationAction(ISD::FABS, MVT::v32f16, Custom);
20812088 setOperationAction(ISD::FCOPYSIGN, MVT::v32f16, Custom);
2089+
2090+ if (Subtarget.hasGFNI()) {
2091+ setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
2092+ setOperationAction(ISD::CTTZ, MVT::v64i8, Custom);
2093+ }
20822094 }// useAVX512Regs
20832095
20842096 if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
@@ -28418,6 +28430,9 @@ uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt = 0) {
2841828430 switch (Opcode) {
2841928431 case ISD::BITREVERSE:
2842028432 return 0x8040201008040201ULL;
28433+ case ISD::CTTZ:
28434+ // Special case - only works for zero/single bit input.
28435+ return 0xAACCF0FF00000000ULL;
2842128436 case ISD::SHL:
2842228437 return ((0x0102040810204080ULL >> (Amt)) &
2842328438 (0x0101010101010101ULL * (0xFF >> (Amt))));
@@ -28572,6 +28587,11 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2857228587 SelectionDAG &DAG) {
2857328588 MVT VT = Op.getSimpleValueType();
2857428589
28590+ // GFNI targets - fold as cttz(bitreverse())
28591+ if (Subtarget.hasGFNI() && VT.getVectorElementType() == MVT::i8)
28592+ return DAG.getNode(ISD::CTTZ, DL, VT,
28593+ DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0)));
28594+
2857528595 if (Subtarget.hasCDI() &&
2857628596 // vXi8 vectors need to be promoted to 512-bits for vXi32.
2857728597 (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8))
@@ -28635,6 +28655,14 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
2863528655 SDValue N0 = Op.getOperand(0);
2863628656 SDLoc dl(Op);
2863728657
28658+ // GFNI - isolate LSB and perform GF2P8AFFINEQB lookup.
28659+ if (Subtarget.hasGFNI() && VT.isVector()) {
28660+ SDValue B = DAG.getNode(ISD::AND, dl, VT, N0, DAG.getNegative(N0, dl, VT));
28661+ SDValue M = getGFNICtrlMask(ISD::CTTZ, DAG, dl, VT);
28662+ return DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, B, M,
28663+ DAG.getTargetConstant(0x8, dl, MVT::i8));
28664+ }
28665+
2863828666 assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
2863928667 "Only scalar CTTZ requires custom lowering");
2864028668
0 commit comments