Skip to content
Merged
6 changes: 6 additions & 0 deletions llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ struct DisableLIRP {

/// When true, Memcpy is disabled.
static bool Memcpy;

/// When true, Strlen is disabled.
static bool Strlen;

/// When true, Wcslen is disabled.
static bool Wcslen;
};

/// Performs Loop Idiom Recognize Pass.
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ namespace llvm {
Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI);

/// Emit a call to the wcslen function to the builder, for the specified
/// pointer. Ptr is required to be some pointer type, and the return value has
/// 'size_t' type.
Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI);

/// Emit a call to the strdup function to the builder, for the specified
/// pointer. Ptr is required to be some pointer type, and the return value has
/// 'i8*' type.
Expand Down
311 changes: 306 additions & 5 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
//
// TODO List:
//
// Future loop memory idioms to recognize:
// memcmp, strlen, etc.
// Future loop memory idioms to recognize: memcmp, etc.
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
Expand All @@ -33,6 +32,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
Expand Down Expand Up @@ -97,6 +97,7 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
STATISTIC(
NumShiftUntilBitTest,
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
Expand Down Expand Up @@ -126,6 +127,26 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Memcpy), cl::init(false),
cl::ReallyHidden);

bool DisableLIRP::Strlen;
static cl::opt<bool, true>
DisableLIRPStrlen("disable-loop-idiom-strlen",
cl::desc("Proceed with loop idiom recognize pass, but do "
"not convert loop(s) to strlen."),
cl::location(DisableLIRP::Strlen), cl::init(false),
cl::ReallyHidden);

/// Some target libraries have a significant call overhead for `wcslen`,
/// which can degrade performance when the input string is not long enough
/// to justify the cost. To avoid unnecessary performance penalties,
/// we disable it by default.
bool DisableLIRP::Wcslen;
static cl::opt<bool, true>
EnableLIRPWcslen("enable-loop-idiom-wcslen",
cl::desc("Proceed with loop idiom recognize pass, "
"enable conversion of loop(s) to wcslen."),
cl::location(DisableLIRP::Wcslen), cl::init(true),
cl::ReallyHidden);

static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling "
Expand Down Expand Up @@ -246,6 +267,7 @@ class LoopIdiomRecognize {

bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
bool recognizeAndInsertStrLen();

/// @}
};
Expand Down Expand Up @@ -1494,7 +1516,17 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {

return recognizePopcount() || recognizeAndInsertFFS() ||
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
recognizeShiftUntilLessThan();
recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
}

/// Check if a Value is either a nullptr or a constant int zero
static bool isZeroConstant(const Value *Val) {
if (isa<ConstantPointerNull>(Val))
return true;
const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val);
if (!CmpZero || !CmpZero->isZero())
return false;
return true;
}

/// Check if the given conditional branch is based on the comparison between
Expand All @@ -1512,8 +1544,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
if (!Cond)
return nullptr;

ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
if (!isZeroConstant(Cond->getOperand(1)))
return nullptr;

BasicBlock *TrueSucc = BI->getSuccessor(0);
Expand All @@ -1529,6 +1560,276 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}

namespace {

class StrlenVerifier {
public:
explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
const TargetLibraryInfo *TLI)
: CurLoop(CurLoop), SE(SE), TLI(TLI) {}

bool isValidStrlenIdiom() {
// Give up if the loop has multiple blocks, multiple backedges, or
// multiple exit blocks
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
!CurLoop->getUniqueExitBlock())
return false;

// It should have a preheader and a branch instruction.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
if (!Preheader)
return false;

BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
if (!EntryBI)
return false;

// The loop exit must be conditioned on an icmp with 0 the null terminator.
// The icmp operand has to be a load on some SSA reg that increments
// by 1 in the loop.
BasicBlock *LoopBody = *CurLoop->block_begin();

// Skip if the body is too big as it most likely is not a strlen idiom.
if (!LoopBody || LoopBody->size() >= 15)
return false;

BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
Value *LoopCond = matchCondition(LoopTerm, LoopBody);
if (!LoopCond)
return false;

LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
return false;

OperandType = LoopLoad->getType();
if (!OperandType || !OperandType->isIntegerTy())
return false;

// See if the pointer expression is an AddRec with constant step a of form
// ({n,+,a}) where a is the width of the char type.
Value *IncPtr = LoopLoad->getPointerOperand();
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
return false;
LoadBaseEv = LoadEv->getStart();

LLVM_DEBUG({
dbgs() << "pointer load scev: ";
LoadEv->print(outs());
dbgs() << "\n";
});

const SCEVConstant *Step =
dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
if (!Step)
return false;

unsigned StepSize = 0;
StepSizeCI = dyn_cast<ConstantInt>(Step->getValue());
if (!StepSizeCI)
return false;
StepSize = StepSizeCI->getZExtValue();

// Verify that StepSize is consistent with platform char width.
OpWidth = OperandType->getIntegerBitWidth();
unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
if (OpWidth != StepSize * 8)
return false;
if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
return false;
if (OpWidth >= 16)
if (OpWidth != WcharSize * 8)
return false;

// Scan every instruction in the loop to ensure there are no side effects.
for (Instruction &I : *LoopBody)
if (I.mayHaveSideEffects())
return false;

BasicBlock *LoopExitBB = CurLoop->getExitBlock();
if (!LoopExitBB)
return false;

for (PHINode &PN : LoopExitBB->phis()) {
if (!SE->isSCEVable(PN.getType()))
return false;

const SCEV *Ev = SE->getSCEV(&PN);
if (!Ev)
return false;

LLVM_DEBUG({
dbgs() << "loop exit phi scev: ";
Ev->print(dbgs());
dbgs() << "\n";
});

// Since we verified that the loop trip count will be a valid strlen
// idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
// SCEVExpander materialize the loop output.
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
if (!AddRecEv || !AddRecEv->isAffine())
return false;

// We only want RecAddExpr with recurrence step that is constant. This
// is good enough for all the idioms we want to recognize. Later we expand
// and materialize the recurrence as {base,+,a} -> (base + a * strlen)
if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
return false;
}

return true;
}

public:
const Loop *CurLoop;
ScalarEvolution *SE;
const TargetLibraryInfo *TLI;

unsigned OpWidth;
ConstantInt *StepSizeCI;
const SCEV *LoadBaseEv;
Type *OperandType;
};

} // namespace

/// The Strlen Idiom we are trying to detect has the following structure
///
/// preheader:
/// ...
/// br label %body, ...
///
/// body:
/// ... ; %0 is incremented by a gep
/// %1 = load i8, ptr %0, align 1
/// %2 = icmp eq i8 %1, 0
/// br i1 %2, label %exit, label %body
///
/// exit:
/// %lcssa = phi [%0, %body], ...
///
/// We expect the strlen idiom to have a load of a character type that
/// is compared against '\0', and such load pointer operand must have scev
/// expression of the form {%str,+,c} where c is a ConstantInt of the
/// appropiate character width for the idiom, and %str is the base of the string
/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
///
/// When transforming the output of the strlen idiom, the lccsa phi are
/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
/// and all subsequent uses are replaced. For example,
///
/// \code{.c}
/// const char* base = str;
/// while (*str != '\0')
/// ++str;
/// size_t result = str - base;
/// \endcode
///
/// will be transformed as follows: The idiom will be replaced by a strlen
/// computation to compute the address of the null terminator of the string.
///
/// \code{.c}
/// const char* base = str;
/// const char* end = base + strlen(str);
/// size_t result = end - base;
/// \endcode
///
/// In the case we index by an induction variable, as long as the induction
/// variable has a constant int increment, we can replace all such indvars
/// with the closed form computation of strlen
///
/// \code{.c}
/// size_t i = 0;
/// while (str[i] != '\0')
/// ++i;
/// size_t result = i;
/// \endcode
///
/// Will be replaced by
///
/// \code{.c}
/// size_t i = 0 + strlen(str);
/// size_t result = i;
/// \endcode
///
bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
if (DisableLIRP::All)
return false;

StrlenVerifier Verifier(CurLoop, SE, TLI);

if (!Verifier.isValidStrlenIdiom())
return false;

BasicBlock *Preheader = CurLoop->getLoopPreheader();
BasicBlock *LoopExitBB = CurLoop->getExitBlock();

IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
"strlen_idiom");
Value *MaterialzedBase = Expander.expandCodeFor(
Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
Builder.GetInsertPoint());

Value *StrLenFunc = nullptr;
if (Verifier.OpWidth == 8) {
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
return false;
StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
} else {
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen) &&
!DisableLIRP::Wcslen)
return false;
StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
}
assert(StrLenFunc && "Failed to emit strlen function.");

const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
SmallVector<PHINode *, 4> Cleanup;
for (PHINode &PN : LoopExitBB->phis()) {
// We can now materialize the loop output as all phi have scev {base,+,a}.
// We expand the phi as:
// %strlen = call i64 @strlen(%str)
// %phi.new = base expression + step * %strlen
const SCEV *Ev = SE->getSCEV(&PN);
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
const SCEVConstant *Step =
dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE));
const SCEV *Base = AddRecEv->getStart();

// It is safe to truncate to base since if base is narrower than size_t
// the equivalent user code will have to truncate anyways.
const SCEV *NewEv = SE->getAddExpr(
Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend(
StrlenEv, Base->getType())));

Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
Builder.GetInsertPoint());
Expander.clear();
PN.replaceAllUsesWith(MaterializedPHI);
Cleanup.push_back(&PN);
}

// All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
// up by later passes
for (PHINode *PN : Cleanup)
RecursivelyDeleteDeadPHINode(PN);
SE->forgetLoop(CurLoop);

++NumStrLen;
LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
CurLoop->getStartLoc(), Preheader)
<< "Transformed " << StrLenFunc->getName() << " loop idiom";
});

return true;
}

/// Check if the given conditional branch is based on an unsigned less-than
/// comparison between a variable and a constant, and if the comparison is false
/// the control yields to the loop entry. If the branch matches the behaviour,
Expand Down
Loading