@@ -457,30 +457,20 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
457457
458458// Check if this array of constants represents a cttz table.
459459// Iterate over the elements from \p Table by trying to find/match all
460- // the numbers from 0 to \p InputBits that should represent cttz results.
461- static bool isCTTZTable (const ConstantDataArray &Table, uint64_t Mul,
462- uint64_t Shift, uint64_t InputBits) {
463- unsigned Length = Table.getNumElements ();
464- if (Length < InputBits || Length > InputBits * 2 )
465- return false ;
466-
467- APInt Mask = APInt::getBitsSetFrom (InputBits, Shift);
468- unsigned Matched = 0 ;
469-
470- for (unsigned i = 0 ; i < Length; i++) {
471- uint64_t Element = Table.getElementAsInteger (i);
472- if (Element >= InputBits)
473- continue ;
474-
475- // Check if \p Element matches a concrete answer. It could fail for some
476- // elements that are never accessed, so we keep iterating over each element
477- // from the table. The number of matched elements should be equal to the
478- // number of potential right answers which is \p InputBits actually.
479- if ((((Mul << Element) & Mask.getZExtValue ()) >> Shift) == i)
480- Matched++;
460+ // the numbers from 0 to \p InputTy->getSizeInBits() that should represent cttz
461+ // results.
462+ static bool isCTTZTable (Constant *Table, uint64_t Mul, uint64_t Shift,
463+ Type *AccessTy, unsigned InputBits,
464+ unsigned GEPIdxFactor, const DataLayout &DL) {
465+ for (unsigned Idx = 0 ; Idx < InputBits; Idx++) {
466+ APInt Index = (APInt (InputBits, 1ull << Idx) * Mul).lshr (Shift);
467+ ConstantInt *C = dyn_cast_or_null<ConstantInt>(
468+ ConstantFoldLoadFromConst (Table, AccessTy, Index * GEPIdxFactor, DL));
469+ if (!C || C->getZExtValue () != Idx)
470+ return false ;
481471 }
482472
483- return Matched == InputBits ;
473+ return true ;
484474}
485475
486476// Try to recognize table-based ctz implementation.
@@ -537,7 +527,7 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
537527// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
538528//
539529// All this can be lowered to @llvm.cttz.i32/64 intrinsic.
540- static bool tryToRecognizeTableBasedCttz (Instruction &I) {
530+ static bool tryToRecognizeTableBasedCttz (Instruction &I, const DataLayout &DL ) {
541531 LoadInst *LI = dyn_cast<LoadInst>(&I);
542532 if (!LI)
543533 return false ;
@@ -567,11 +557,6 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
567557 if (!GVTable || !GVTable->hasInitializer () || !GVTable->isConstant ())
568558 return false ;
569559
570- ConstantDataArray *ConstData =
571- dyn_cast<ConstantDataArray>(GVTable->getInitializer ());
572- if (!ConstData || ConstData->getElementType () != GEPSrcEltTy)
573- return false ;
574-
575560 Value *X1;
576561 uint64_t MulConst, ShiftConst;
577562 // FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
@@ -583,19 +568,21 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
583568 return false ;
584569
585570 unsigned InputBits = X1->getType ()->getScalarSizeInBits ();
586- if (InputBits != 32 && InputBits != 64 )
571+ if (InputBits != 16 && InputBits != 32 && InputBits != 64 )
587572 return false ;
588573
589- // Shift should extract top 5 ..7 bits.
574+ // Shift should extract top 4 ..7 bits.
590575 if (InputBits - Log2_32 (InputBits) != ShiftConst &&
591576 InputBits - Log2_32 (InputBits) - 1 != ShiftConst)
592577 return false ;
593578
594- if (!isCTTZTable (*ConstData, MulConst, ShiftConst, InputBits))
579+ if (!isCTTZTable (GVTable->getInitializer (), MulConst, ShiftConst, AccessType,
580+ InputBits, GEPSrcEltTy->getScalarSizeInBits () / 8 , DL))
595581 return false ;
596582
597- auto ZeroTableElem = ConstData->getElementAsInteger (0 );
598- bool DefinedForZero = ZeroTableElem == InputBits;
583+ ConstantInt *ZeroTableElem = cast<ConstantInt>(
584+ ConstantFoldLoadFromConst (GVTable->getInitializer (), AccessType, DL));
585+ bool DefinedForZero = ZeroTableElem->getZExtValue () == InputBits;
599586
600587 IRBuilder<> B (LI);
601588 ConstantInt *BoolConst = B.getInt1 (!DefinedForZero);
@@ -609,8 +596,7 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
609596 // If the value in elem 0 isn't the same as InputBits, we still want to
610597 // produce the value from the table.
611598 auto Cmp = B.CreateICmpEQ (X1, ConstantInt::get (XType, 0 ));
612- auto Select =
613- B.CreateSelect (Cmp, ConstantInt::get (XType, ZeroTableElem), Cttz);
599+ auto Select = B.CreateSelect (Cmp, B.CreateZExt (ZeroTableElem, XType), Cttz);
614600
615601 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
616602 // it should be handled as: `cttz(x) & (typeSize - 1)`.
@@ -1479,7 +1465,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
14791465 MadeChange |= foldGuardedFunnelShift (I, DT);
14801466 MadeChange |= tryToRecognizePopCount (I);
14811467 MadeChange |= tryToFPToSat (I, TTI);
1482- MadeChange |= tryToRecognizeTableBasedCttz (I);
1468+ MadeChange |= tryToRecognizeTableBasedCttz (I, DL );
14831469 MadeChange |= foldConsecutiveLoads (I, DL, TTI, AA, DT);
14841470 MadeChange |= foldPatternedLoads (I, DL);
14851471 MadeChange |= foldICmpOrChain (I, DL, TTI, AA, DT);
0 commit comments