@@ -460,7 +460,7 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
460460// the numbers from 0 to \p InputBits that should represent cttz results.
461461static bool isCTTZTable (Constant *Table, uint64_t Mul, uint64_t Shift,
462462 uint64_t AndMask, Type *AccessTy, unsigned InputBits,
463- unsigned GEPIdxFactor, const DataLayout &DL) {
463+ APInt GEPIdxFactor, const DataLayout &DL) {
464464 for (unsigned Idx = 0 ; Idx < InputBits; Idx++) {
465465 APInt Index = (APInt (InputBits, 1ull << Idx) * Mul).lshr (Shift) & AndMask;
466466 ConstantInt *C = dyn_cast_or_null<ConstantInt>(
@@ -484,6 +484,11 @@ static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
484484// this can be lowered to `cttz` instruction.
485485// There is also a special case when the element is 0.
486486//
487+ // The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
488+ // sequence that contains each patterns of bits in it. The shift extracts
489+ // the top bits after the multiply, and that index into the table should
490+ // represent the number of trailing zeros in the original number.
491+ //
487492// Here are some examples or LLVM IR for a 64-bit target:
488493//
489494// CASE 1:
@@ -525,7 +530,7 @@ static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
525530// i64 %shr
526531// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
527532//
528- // All this can be lowered to @llvm.cttz.i32/64 intrinsic .
533+ // All these can be lowered to @llvm.cttz.i32/64 intrinsics .
529534static bool tryToRecognizeTableBasedCttz (Instruction &I, const DataLayout &DL) {
530535 LoadInst *LI = dyn_cast<LoadInst>(&I);
531536 if (!LI)
@@ -539,45 +544,40 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
539544 if (!GEP || !GEP->hasNoUnsignedSignedWrap ())
540545 return false ;
541546
542- Type *GEPSrcEltTy = GEP->getSourceElementType ();
543- Value *GepIdx;
544- if (GEP->getNumIndices () == 2 ) {
545- if (!GEPSrcEltTy->isArrayTy () ||
546- !match (GEP->idx_begin ()->get (), m_ZeroInt ()))
547- return false ;
548- GEPSrcEltTy = GEPSrcEltTy->getArrayElementType ();
549- GepIdx = std::next (GEP->idx_begin ())->get ();
550- } else if (GEP->getNumIndices () == 1 )
551- GepIdx = GEP->idx_begin ()->get ();
552- else
553- return false ;
554-
555547 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand ());
556548 if (!GVTable || !GVTable->hasInitializer () || !GVTable->isConstant ())
557549 return false ;
558550
551+ unsigned BW = DL.getIndexTypeSizeInBits (GEP->getType ());
552+ APInt ModOffset (BW, 0 );
553+ SmallMapVector<Value *, APInt, 4 > VarOffsets;
554+ if (!GEP->collectOffset (DL, BW, VarOffsets, ModOffset) ||
555+ VarOffsets.size () != 1 || ModOffset != 0 )
556+ return false ;
557+ auto [GepIdx, GEPScale] = VarOffsets.front ();
558+
559559 Value *X1;
560560 uint64_t MulConst, ShiftConst, AndCst = ~0ull ;
561+ // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
562+ // This might be extended to the pointer index type, and if the gep index type
563+ // has been replaced with an i8 then a new And (and different ShiftConst) will
564+ // be present.
561565 // FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
562566 // probably fail for other (e.g. 32-bit) targets.
563- if (!match (GepIdx, m_ZExtOrSelf (m_LShr (
564- m_Mul (m_c_And (m_Neg (m_Value (X1)), m_Deferred (X1)),
565- m_ConstantInt (MulConst)),
566- m_ConstantInt (ShiftConst)))) &&
567- !match (GepIdx, m_ZExtOrSelf (m_And (m_LShr (m_Mul (m_c_And (m_Neg (m_Value (X1)),
568- m_Deferred (X1)),
569- m_ConstantInt (MulConst)),
570- m_ConstantInt (ShiftConst)),
571- m_ConstantInt (AndCst)))))
567+ auto MatchInner = m_LShr (m_Mul (m_c_And (m_Neg (m_Value (X1)), m_Deferred (X1)),
568+ m_ConstantInt (MulConst)),
569+ m_ConstantInt (ShiftConst));
570+ if (!match (GepIdx, m_ZExtOrSelf (MatchInner)) &&
571+ !match (GepIdx, m_ZExtOrSelf (m_And (MatchInner, m_ConstantInt (AndCst)))))
572572 return false ;
573573
574574 unsigned InputBits = X1->getType ()->getScalarSizeInBits ();
575575 if (InputBits != 16 && InputBits != 32 && InputBits != 64 )
576576 return false ;
577577
578- if (!isCTTZTable (GVTable-> getInitializer (), MulConst, ShiftConst, AndCst,
579- AccessType, InputBits ,
580- GEPSrcEltTy-> getScalarSizeInBits () / 8 , DL))
578+ if (!GEPScale. isIntN (InputBits) ||
579+ ! isCTTZTable (GVTable-> getInitializer (), MulConst, ShiftConst, AndCst ,
580+ AccessType, InputBits, GEPScale. trunc (InputBits) , DL))
581581 return false ;
582582
583583 ConstantInt *ZeroTableElem = cast<ConstantInt>(
0 commit comments