@@ -458,14 +458,15 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
458458// Check if this array of constants represents a cttz table.
459459// Iterate over the elements from \p Table by trying to find/match all
460460// the numbers from 0 to \p InputBits that should represent cttz results.
461- static bool isCTTZTable (Constant *Table, uint64_t Mul, uint64_t Shift,
462- uint64_t AndMask, Type *AccessTy, unsigned InputBits,
463- APInt GEPIdxFactor, const DataLayout &DL) {
461+ static bool isCTTZTable (Constant *Table, const APInt &Mul, const APInt &Shift,
462+ const APInt &AndMask, Type *AccessTy,
463+ unsigned InputBits, const APInt &GEPIdxFactor,
464+ const DataLayout &DL) {
464465 for (unsigned Idx = 0 ; Idx < InputBits; Idx++) {
465- APInt Index = (APInt (InputBits, 1ull << Idx) * Mul).lshr (Shift) & AndMask;
466+ APInt Index = (APInt (InputBits, 1 ). shl ( Idx) * Mul).lshr (Shift) & AndMask;
466467 ConstantInt *C = dyn_cast_or_null<ConstantInt>(
467468 ConstantFoldLoadFromConst (Table, AccessTy, Index * GEPIdxFactor, DL));
468- if (!C || C->getZExtValue () != Idx)
469+ if (!C || C->getValue () != Idx)
469470 return false ;
470471 }
471472
@@ -485,7 +486,7 @@ static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
485486// There is also a special case when the element is 0.
486487//
487488// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
488- // sequence that contains each patterns of bits in it. The shift extracts
489+ // sequence that contains each pattern of bits in it. The shift extracts
489490// the top bits after the multiply, and that index into the table should
490491// represent the number of trailing zeros in the original number.
491492//
@@ -557,27 +558,26 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
557558 auto [GepIdx, GEPScale] = VarOffsets.front ();
558559
559560 Value *X1;
560- uint64_t MulConst, ShiftConst, AndCst = ~ 0ull ;
561+ const APInt * MulConst, * ShiftConst, * AndCst = nullptr ;
561562 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
562563 // This might be extended to the pointer index type, and if the gep index type
563564 // has been replaced with an i8 then a new And (and different ShiftConst) will
564565 // be present.
565- // FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
566- // probably fail for other (e.g. 32-bit) targets.
567- auto MatchInner = m_LShr (m_Mul (m_c_And (m_Neg (m_Value (X1)), m_Deferred (X1)),
568- m_ConstantInt (MulConst)),
569- m_ConstantInt (ShiftConst));
570- if (!match (GepIdx, m_ZExtOrSelf (MatchInner)) &&
571- !match (GepIdx, m_ZExtOrSelf (m_And (MatchInner, m_ConstantInt (AndCst)))))
566+ auto MatchInner = m_LShr (
567+ m_Mul (m_c_And (m_Neg (m_Value (X1)), m_Deferred (X1)), m_APInt (MulConst)),
568+ m_APInt (ShiftConst));
569+ if (!match (GepIdx, m_CastOrSelf (MatchInner)) &&
570+ !match (GepIdx, m_CastOrSelf (m_And (MatchInner, m_APInt (AndCst)))))
572571 return false ;
573572
574573 unsigned InputBits = X1->getType ()->getScalarSizeInBits ();
575- if (InputBits != 16 && InputBits != 32 && InputBits != 64 )
574+ if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128 )
576575 return false ;
577576
578577 if (!GEPScale.isIntN (InputBits) ||
579- !isCTTZTable (GVTable->getInitializer (), MulConst, ShiftConst, AndCst,
580- AccessType, InputBits, GEPScale.trunc (InputBits), DL))
578+ !isCTTZTable (GVTable->getInitializer (), *MulConst, *ShiftConst,
579+ AndCst ? *AndCst : APInt::getAllOnes (InputBits), AccessType,
580+ InputBits, GEPScale.zextOrTrunc (InputBits), DL))
581581 return false ;
582582
583583 ConstantInt *ZeroTableElem = cast<ConstantInt>(
0 commit comments