From afdca599a2c084812f3402e37514d64ada25c65d Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sat, 7 Sep 2024 22:27:50 -0400 Subject: [PATCH 01/15] Initial upstreaming of strlen8 LIR 1 out of 3 --- .../Transforms/Scalar/LoopIdiomRecognize.h | 3 + .../Transforms/Scalar/LoopIdiomRecognize.cpp | 296 +++++++++++++++++- llvm/test/Transforms/LoopIdiom/strlen.ll | 149 +++++++++ 3 files changed, 445 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LoopIdiom/strlen.ll diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h index 0c6406d861851..3a9f016ce9bd6 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -34,6 +34,9 @@ struct DisableLIRP { /// When true, Memcpy is disabled. static bool Memcpy; + + /// When true, Strlen is disabled. + static bool Strlen; }; /// Performs Loop Idiom Recognize Pass. diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2462ec33e0c20..2a37d8757a0bd 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -97,6 +97,7 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores"); +STATISTIC(NumStrLen, "Number of strlen's formed from loop loads"); STATISTIC( NumShiftUntilBitTest, "Number of uncountable loops recognized as 'shift until bitttest' idiom"); @@ -126,6 +127,14 @@ static cl::opt cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden); +bool DisableLIRP::Strlen; +static cl::opt + DisableLIRPStrlen("disable-" DEBUG_TYPE "-strlen", + cl::desc("Proceed with loop idiom recognize pass, but do " + "not convert loop(s) to strlen."), + cl::location(DisableLIRP::Strlen), cl::init(false), + cl::ReallyHidden); + static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " @@ -246,6 +255,7 @@ class LoopIdiomRecognize { bool recognizeShiftUntilBitTest(); bool recognizeShiftUntilZero(); + bool recognizeAndInsertStrLen(); /// @} }; @@ -1512,9 +1522,11 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, if (!Cond) return nullptr; - ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); - if (!CmpZero || !CmpZero->isZero()) - return nullptr; + if (!isa(Cond->getOperand(1))) { + ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); + if (!CmpZero || !CmpZero->isZero()) + return nullptr; + } BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -1529,6 +1541,284 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, return nullptr; } +/// getCandidateResInstr - If there is strlen calculated, return the Result +/// instruction based on the \p OpWidth passed, else return nullptr +static Instruction *getCandidateResInstr(Instruction *EndAddress, + Value *StartAddress, + unsigned OpWidth) { + using namespace llvm::PatternMatch; + + assert(StartAddress && "Valid start address required."); + + // lambda expression to check that the instruction has a single user + auto GetSingleUser = [](Instruction *I) -> User * { + if (I->hasOneUse()) + return *I->user_begin(); + return nullptr; + }; + + // The pointer to the end address should only have one use which is a pointer + // to int instruction. + auto *TmpUser = GetSingleUser(EndAddress); + if (!TmpUser) + return nullptr; + + if (PtrToIntInst *PToI = dyn_cast(TmpUser)) { + // The only user of the PtrToIntInst should be the sub instruction that + // calculates the difference b/w the two pointer operands. + TmpUser = GetSingleUser(PToI); + if (!TmpUser) + return nullptr; + Instruction *Inst = dyn_cast(TmpUser); + + if (!Inst || Inst->getOpcode() != Instruction::Sub || + Inst->getOperand(0) != PToI) + return nullptr; + Value *MatchAddr; + if (match(Inst->getOperand(1), m_PtrToInt(m_Value(MatchAddr)))) { + if (MatchAddr != StartAddress) + return nullptr; + + // We found the candidate sub instruction + switch (OpWidth) { + case 8: + return Inst; + default: + return nullptr; + } + } + } + + return nullptr; +} + +/// Recognizes a strlen idiom by checking for loops that increment +/// a char pointer and then subtract with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +/// +/// The core idiom we are trying to detect is: +/// \code +/// if (str == NULL) +/// goto loop-exit // the precondition of the loop +/// start = str; +/// do { +/// str++; +/// } while(*str!='\0'); +/// return (str - start); +/// loop-exit: +/// \endcode +/// +/// The transformed output is similar to below c-code: +/// \code +/// if (str == NULL) +/// goto loop-exit // the precondition of the loop +/// return strlen(str); +/// \endcode +bool LoopIdiomRecognize::recognizeAndInsertStrLen() { + if (DisableLIRPStrlen) + return false; + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + // It should have a preheader containing nothing but an unconditional branch. + auto *Pre = CurLoop->getLoopPreheader(); + if (!Pre || &Pre->front() != Pre->getTerminator()) + return false; + + auto *EntryBI = dyn_cast(Pre->getTerminator()); + if (!EntryBI || EntryBI->isConditional()) + return false; + + // It should have a precondition block + auto *PreCondBB = Pre->getSinglePredecessor(); + if (!PreCondBB) + return false; + + // The precondition terminator instruction should skip the loop body based on + // an icmp with zero/null. + if (!matchCondition(dyn_cast(PreCondBB->getTerminator()), Pre)) + return false; + + // The loop exit must be conditioned on an icmp with 0. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + auto *LoopBody = *(CurLoop->block_begin()); + auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); + auto *LoopCond = matchCondition(LoopTerm, LoopBody); + + if (!LoopCond) + return false; + + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) + return false; + + Type *OperandType = LoopLoad->getType(); + if (!OperandType || !OperandType->isIntegerTy()) + return false; + + // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on + // the loop, indicating strlen calculation. + auto *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + const SCEVConstant *Step = + dyn_cast(LoadEv->getStepRecurrence(*SE)); + if (!Step) + return false; + + unsigned int ConstIntValue = 0; + if (ConstantInt *CI = dyn_cast(Step->getValue())) + ConstIntValue = CI->getZExtValue(); + + unsigned OpWidth = OperandType->getIntegerBitWidth(); + if (OpWidth != ConstIntValue * 8) + return false; + if (OpWidth != 8) + return false; + + // Scan every instruction in the loop to ensure there are no side effects. + for (auto &I : *LoopBody) + if (I.mayHaveSideEffects()) + return false; + + auto *LoopExitBB = CurLoop->getExitBlock(); + if (!LoopExitBB) + return false; + + // Check that the loop exit block is valid: + // It needs to have exactly one LCSSA Phi which is an AddRec. + PHINode *LCSSAPhi = nullptr; + for (PHINode &PN : LoopExitBB->phis()) { + if (!LCSSAPhi && PN.getNumIncomingValues() == 1) + LCSSAPhi = &PN; + else + return false; + } + + if (!LCSSAPhi || !SE->isSCEVable(LCSSAPhi->getType())) + return false; + + if (LCSSAPhi->getIncomingValueForBlock(LoopBody) != + LoopLoad->getPointerOperand()) + return false; + + const SCEVAddRecExpr *LCSSAEv = + dyn_cast(SE->getSCEV(LCSSAPhi->getIncomingValue(0))); + + if (!LCSSAEv || !dyn_cast(SE->getPointerBase(LCSSAEv)) || + !LCSSAEv->isAffine()) + return false; + + // We can now expand the base of the str + IRBuilder<> Builder(Pre->getTerminator()); + + PHINode *LoopPhi = &*LoopBody->phis().begin(); + if (!LoopPhi || ++LoopBody->phis().begin() != LoopBody->phis().end()) + return false; + Value *PreVal = LoopBody->phis().begin()->getIncomingValueForBlock(Pre); + if (!PreVal) + return false; + + Value *Expanded = nullptr; + if (auto *GEP = dyn_cast(LoopLoad->getPointerOperand())) { + if (GEP->getPointerOperand() != LoopPhi) + return false; + GetElementPtrInst *NewGEP = + GetElementPtrInst::Create(GEP->getSourceElementType(), PreVal, + SmallVector(GEP->indices()), + "newgep", Pre->getTerminator()); + Expanded = NewGEP; + } else if (LoopLoad->getPointerOperand() == LoopPhi) + Expanded = PreVal; + if (!Expanded) + return false; + + // Check that the LoopExitBB is calculating the string length and identify + // the instruction that has the string length calculation + Instruction *ResInst = getCandidateResInstr(LCSSAPhi, PreVal, OpWidth); + if (!ResInst) + return false; + + // Ensure that the GEP has the correct index if the pointer was modified. + // This can happen when the pointer in the user code, outside the loop, + // walks past a certain pre-checked index of the string. + if (auto *GEP = dyn_cast(Expanded)) { + if (GEP->getNumOperands() != 2) + return false; + + ConstantInt *I0 = dyn_cast(GEP->getOperand(1)); + if (!I0) + return false; + + int64_t Index = I0->getSExtValue(); // GEP index + auto *SAdd = dyn_cast(LoadEv->getStart()); + if (!SAdd || SAdd->getNumOperands() != 2) + return false; + + auto *SAdd0 = dyn_cast(SAdd->getOperand(0)); + if (!SAdd0) + return false; + + ConstantInt *CInt = SAdd0->getValue(); // SCEV index + assert(CInt && "Expecting CInt to be valid."); + int64_t Offset = CInt->getSExtValue(); + + // Update the index based on the Offset + assert((Offset * 8) % GEP->getSourceElementType()->getIntegerBitWidth() == + 0 && + "Invalid offset"); + int64_t NewIndex = + (Offset * 8) / GEP->getSourceElementType()->getIntegerBitWidth() - + Index; + Value *NewIndexVal = + ConstantInt::get(GEP->getOperand(1)->getType(), NewIndex); + GEP->setOperand(1, NewIndexVal); + } + + Value *StrLenFunc = nullptr; + switch (OpWidth) { + case 8: + StrLenFunc = emitStrLen(Expanded, Builder, *DL, TLI); + break; + } + + assert(StrLenFunc && "Failed to emit strlen function."); + + // Replace the subtraction instruction by the result of strlen + ResInst->replaceAllUsesWith(StrLenFunc); + + // Remove the loop-exit branch and delete dead instructions + RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI); + + ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody + ? Builder.getFalse() + : Builder.getTrue(); + LoopTerm->setCondition(NewLoopCond); + + deleteDeadInstruction(cast(LoopCond)); + deleteDeadInstruction(cast(IncPtr)); + SE->forgetLoop(CurLoop); + + LLVM_DEBUG(dbgs() << " Formed strlen: " << *StrLenFunc << "\n"); + + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", + CurLoop->getStartLoc(), Pre) + << "Transformed pointer difference into a call to strlen() function"; + }); + + ++NumStrLen; + + return true; +} + /// Check if the given conditional branch is based on an unsigned less-than /// comparison between a variable and a constant, and if the comparison is false /// the control yields to the loop entry. If the branch matches the behaviour, diff --git a/llvm/test/Transforms/LoopIdiom/strlen.ll b/llvm/test/Transforms/LoopIdiom/strlen.ll new file mode 100644 index 0000000000000..641fce0da8b78 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/strlen.ll @@ -0,0 +1,149 @@ +; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define i64 @valid_strlen_i8_test1(ptr %Str) { +; CHECK-LABEL: @valid_strlen_i8_test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP]], label [[FOR_INC_PREHEADER:%.*]] +; CHECK: for.inc.preheader: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 0 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]]) +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[SRC_09:%.*]] = phi ptr [ poison, [[FOR_INC]] ], [ [[STR]], [[FOR_INC_PREHEADER]] ] +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_INC]] +; CHECK: for.end: +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[LOR_LHS_FALSE]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %tobool = icmp eq ptr %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, ptr %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.09 = phi ptr [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, ptr %Src.09, i64 1 + %.pr = load i8, ptr %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint ptr %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i64 %retval.0 +} + +define i64 @valid_strlen_i8_test2(ptr %Str) { +; CHECK-LABEL: @valid_strlen_i8_test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; CHECK: for.cond.preheader: +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %tobool = icmp eq ptr %Str, null + br i1 %tobool, label %cleanup, label %for.cond + +for.cond: ; preds = %entry, %for.cond + %Src.0 = phi ptr [ %incdec.ptr, %for.cond ], [ %Str, %entry ] + %0 = load i8, ptr %Src.0, align 1 + %tobool1 = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, ptr %Src.0, i64 1 + br i1 %tobool1, label %for.end, label %for.cond + +for.end: ; preds = %for.cond + %sub.ptr.lhs.cast = ptrtoint ptr %Src.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + + cleanup: ; preds = %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] + ret i64 %retval.0 +} + +define void @invalid_strlen_i8_test3(ptr %s, i32 zeroext %i) { +; CHECK-LABEL: @invalid_strlen_i8_test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi ptr [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S_ADDR_0]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[S_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_COND]] +; CHECK: while.end: +; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi ptr [ [[S_ADDR_0]], [[WHILE_COND]] ] +; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_COND]] ] +; CHECK-NEXT: store i8 45, ptr [[S_ADDR_0_LCSSA]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i8 65, ptr [[INCDEC_PTR1_LCSSA]], align 1 +; CHECK-NEXT: br label [[IF_END9:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i8 66, ptr [[INCDEC_PTR1_LCSSA]], align 1 +; CHECK-NEXT: br label [[IF_END9]] +; CHECK: if.end9: +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %s.addr.0 = phi ptr [ %s, %entry ], [ %incdec.ptr1, %while.cond ] + %0 = load i8, ptr %s.addr.0, align 1 + %tobool.not = icmp eq i8 %0, 0 + %incdec.ptr1 = getelementptr inbounds i8, ptr %s.addr.0, i64 1 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %s.addr.0.lcssa = phi ptr [ %s.addr.0, %while.cond ] + %incdec.ptr1.lcssa = phi ptr [ %incdec.ptr1, %while.cond ] + store i8 45, ptr %s.addr.0.lcssa, align 1 + %cmp = icmp ult i32 %i, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %while.end + store i8 65, ptr %incdec.ptr1.lcssa, align 1 + br label %if.end9 + +if.end: ; preds = %while.end + store i8 66, ptr %incdec.ptr1.lcssa, align 1 + br label %if.end9 + +if.end9: ; preds = %if.end, %if.then + ret void +} + From 7ae690a45f17dfc521dc71fff131305154b642b9 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 8 Sep 2024 13:17:03 -0400 Subject: [PATCH 02/15] enable strlen insert --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2a37d8757a0bd..fd90935f19d59 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1504,7 +1504,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { return recognizePopcount() || recognizeAndInsertFFS() || recognizeShiftUntilBitTest() || recognizeShiftUntilZero() || - recognizeShiftUntilLessThan(); + recognizeShiftUntilLessThan() || recognizeAndInsertStrLen(); } /// Check if the given conditional branch is based on the comparison between From f752baa0270941af05137740d6069a8ca0a45235 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Thu, 12 Sep 2024 16:39:07 -0400 Subject: [PATCH 03/15] replace LCSSA with null term ptr --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 125 +++++------------- 1 file changed, 32 insertions(+), 93 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index fd90935f19d59..211a7a68ed639 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -1541,57 +1542,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, return nullptr; } -/// getCandidateResInstr - If there is strlen calculated, return the Result -/// instruction based on the \p OpWidth passed, else return nullptr -static Instruction *getCandidateResInstr(Instruction *EndAddress, - Value *StartAddress, - unsigned OpWidth) { - using namespace llvm::PatternMatch; - - assert(StartAddress && "Valid start address required."); - - // lambda expression to check that the instruction has a single user - auto GetSingleUser = [](Instruction *I) -> User * { - if (I->hasOneUse()) - return *I->user_begin(); - return nullptr; - }; - - // The pointer to the end address should only have one use which is a pointer - // to int instruction. - auto *TmpUser = GetSingleUser(EndAddress); - if (!TmpUser) - return nullptr; - - if (PtrToIntInst *PToI = dyn_cast(TmpUser)) { - // The only user of the PtrToIntInst should be the sub instruction that - // calculates the difference b/w the two pointer operands. - TmpUser = GetSingleUser(PToI); - if (!TmpUser) - return nullptr; - Instruction *Inst = dyn_cast(TmpUser); - - if (!Inst || Inst->getOpcode() != Instruction::Sub || - Inst->getOperand(0) != PToI) - return nullptr; - Value *MatchAddr; - if (match(Inst->getOperand(1), m_PtrToInt(m_Value(MatchAddr)))) { - if (MatchAddr != StartAddress) - return nullptr; - - // We found the candidate sub instruction - switch (OpWidth) { - case 8: - return Inst; - default: - return nullptr; - } - } - } - - return nullptr; -} - /// Recognizes a strlen idiom by checking for loops that increment /// a char pointer and then subtract with the base pointer. /// @@ -1600,22 +1550,19 @@ static Instruction *getCandidateResInstr(Instruction *EndAddress, /// /// The core idiom we are trying to detect is: /// \code -/// if (str == NULL) -/// goto loop-exit // the precondition of the loop /// start = str; /// do { /// str++; -/// } while(*str!='\0'); -/// return (str - start); -/// loop-exit: +/// } while(*str != '\0'); /// \endcode /// /// The transformed output is similar to below c-code: /// \code -/// if (str == NULL) -/// goto loop-exit // the precondition of the loop -/// return strlen(str); +/// str = start + strlen(start) +/// len = str - start /// \endcode +/// +/// Later the pointer subtraction will be folded by InstCombine bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (DisableLIRPStrlen) return false; @@ -1625,30 +1572,20 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { return false; // It should have a preheader containing nothing but an unconditional branch. - auto *Pre = CurLoop->getLoopPreheader(); - if (!Pre || &Pre->front() != Pre->getTerminator()) + auto *Preheader = CurLoop->getLoopPreheader(); + if (!Preheader || &Preheader->front() != Preheader->getTerminator()) return false; - auto *EntryBI = dyn_cast(Pre->getTerminator()); + auto *EntryBI = dyn_cast(Preheader->getTerminator()); if (!EntryBI || EntryBI->isConditional()) return false; - // It should have a precondition block - auto *PreCondBB = Pre->getSinglePredecessor(); - if (!PreCondBB) - return false; - - // The precondition terminator instruction should skip the loop body based on - // an icmp with zero/null. - if (!matchCondition(dyn_cast(PreCondBB->getTerminator()), Pre)) - return false; - // The loop exit must be conditioned on an icmp with 0. // The icmp operand has to be a load on some SSA reg that increments // by 1 in the loop. - auto *LoopBody = *(CurLoop->block_begin()); - auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); - auto *LoopCond = matchCondition(LoopTerm, LoopBody); + BasicBlock *LoopBody = *CurLoop->block_begin(); + BranchInst *LoopTerm = dyn_cast(LoopBody->getTerminator()); + Value *LoopCond = matchCondition(LoopTerm, LoopBody); if (!LoopCond) return false; @@ -1665,6 +1602,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { // the loop, indicating strlen calculation. auto *IncPtr = LoopLoad->getPointerOperand(); const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) return false; @@ -1705,6 +1643,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (!LCSSAPhi || !SE->isSCEVable(LCSSAPhi->getType())) return false; + // This matched the pointer version of the idiom if (LCSSAPhi->getIncomingValueForBlock(LoopBody) != LoopLoad->getPointerOperand()) return false; @@ -1717,35 +1656,34 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { return false; // We can now expand the base of the str - IRBuilder<> Builder(Pre->getTerminator()); + IRBuilder<> Builder(Preheader->getTerminator()); - PHINode *LoopPhi = &*LoopBody->phis().begin(); - if (!LoopPhi || ++LoopBody->phis().begin() != LoopBody->phis().end()) + auto LoopPhiRange = LoopBody->phis(); + if (!hasNItems(LoopPhiRange, 1)) return false; - Value *PreVal = LoopBody->phis().begin()->getIncomingValueForBlock(Pre); + auto *LoopPhi = &*LoopPhiRange.begin(); + Value *PreVal = LoopPhi->getIncomingValueForBlock(Preheader); if (!PreVal) return false; Value *Expanded = nullptr; + Type *ExpandedType = nullptr; if (auto *GEP = dyn_cast(LoopLoad->getPointerOperand())) { if (GEP->getPointerOperand() != LoopPhi) return false; GetElementPtrInst *NewGEP = GetElementPtrInst::Create(GEP->getSourceElementType(), PreVal, SmallVector(GEP->indices()), - "newgep", Pre->getTerminator()); + "newgep", Preheader->getTerminator()); Expanded = NewGEP; - } else if (LoopLoad->getPointerOperand() == LoopPhi) + ExpandedType = NewGEP->getSourceElementType(); + } else if (LoopLoad->getPointerOperand() == LoopPhi) { Expanded = PreVal; + ExpandedType = LoopLoad->getType(); + } if (!Expanded) return false; - // Check that the LoopExitBB is calculating the string length and identify - // the instruction that has the string length calculation - Instruction *ResInst = getCandidateResInstr(LCSSAPhi, PreVal, OpWidth); - if (!ResInst) - return false; - // Ensure that the GEP has the correct index if the pointer was modified. // This can happen when the pointer in the user code, outside the loop, // walks past a certain pre-checked index of the string. @@ -1791,11 +1729,12 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { assert(StrLenFunc && "Failed to emit strlen function."); - // Replace the subtraction instruction by the result of strlen - ResInst->replaceAllUsesWith(StrLenFunc); - - // Remove the loop-exit branch and delete dead instructions - RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI); + // Replace LCSSA Phi use with new pointer to the null terminator + SmallVector NewBaseIndex{StrLenFunc}; + GetElementPtrInst *NewEndPtr = GetElementPtrInst::Create( + ExpandedType, Expanded, NewBaseIndex, "end", Preheader->getTerminator()); + LCSSAPhi->replaceAllUsesWith(NewEndPtr); + RecursivelyDeleteDeadPHINode(LCSSAPhi); ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody ? Builder.getFalse() @@ -1810,7 +1749,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", - CurLoop->getStartLoc(), Pre) + CurLoop->getStartLoc(), Preheader) << "Transformed pointer difference into a call to strlen() function"; }); From f8057ffe3638507b0a309ac2685ec70e4ba1a0ef Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Thu, 12 Sep 2024 18:38:19 -0400 Subject: [PATCH 04/15] update tests --- llvm/test/Transforms/LoopIdiom/strlen.ll | 396 +++++++++++++++-------- 1 file changed, 270 insertions(+), 126 deletions(-) diff --git a/llvm/test/Transforms/LoopIdiom/strlen.ll b/llvm/test/Transforms/LoopIdiom/strlen.ll index 641fce0da8b78..43ed9d0980bc4 100644 --- a/llvm/test/Transforms/LoopIdiom/strlen.ll +++ b/llvm/test/Transforms/LoopIdiom/strlen.ll @@ -1,149 +1,293 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s -target datalayout = "e-m:e-i64:64-n32:64" -target triple = "powerpc64le-unknown-linux-gnu" - -define i64 @valid_strlen_i8_test1(ptr %Str) { -; CHECK-LABEL: @valid_strlen_i8_test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] -; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP]], label [[FOR_INC_PREHEADER:%.*]] -; CHECK: for.inc.preheader: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 0 -; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]]) -; CHECK-NEXT: br label [[FOR_INC:%.*]] -; CHECK: for.inc: -; CHECK-NEXT: [[SRC_09:%.*]] = phi ptr [ poison, [[FOR_INC]] ], [ [[STR]], [[FOR_INC_PREHEADER]] ] -; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 poison, 0 -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_INC]] -; CHECK: for.end: -; CHECK-NEXT: br label [[CLEANUP]] -; CHECK: cleanup: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[LOR_LHS_FALSE]] ] -; CHECK-NEXT: ret i64 [[RETVAL_0]] +declare void @use(ptr) + +define i64 @valid_strlen_1(ptr %0) { +; CHECK-LABEL: define i64 @valid_strlen_1( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[TMP0]]) +; CHECK-NEXT: [[DOTLCSSA:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[STRLEN]] +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 +; CHECK-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB2]] +; CHECK: [[BB5]]: +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[DOTLCSSA]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: ret i64 [[TMP14]] ; -entry: - %tobool = icmp eq ptr %Str, null - br i1 %tobool, label %cleanup, label %lor.lhs.false - -lor.lhs.false: ; preds = %entry - %0 = load i8, ptr %Str, align 1 - %cmp = icmp eq i8 %0, 0 - br i1 %cmp, label %cleanup, label %for.inc - -for.inc: ; preds = %lor.lhs.false, %for.inc - %Src.09 = phi ptr [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] - %incdec.ptr = getelementptr inbounds i8, ptr %Src.09, i64 1 - %.pr = load i8, ptr %incdec.ptr, align 1 - %tobool2 = icmp eq i8 %.pr, 0 - br i1 %tobool2, label %for.end, label %for.inc - -for.end: ; preds = %for.inc - %sub.ptr.lhs.cast = ptrtoint ptr %incdec.ptr to i64 - %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - br label %cleanup + br label %2 -cleanup: ; preds = %lor.lhs.false, %entry, %for.end - %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] - ret i64 %retval.0 +2: ; preds = %2, %1 + %3 = phi ptr [ %0, %1 ], [ %6, %2 ] + %4 = load i8, ptr %3, align 1 + %5 = icmp eq i8 %4, 0 + %6 = getelementptr inbounds i8, ptr %3, i64 1 + br i1 %5, label %7, label %2 + +7: ; preds = %2 + %8 = ptrtoint ptr %3 to i64 + %9 = ptrtoint ptr %0 to i64 + %10 = sub i64 %8, %9 + ret i64 %10 } -define i64 @valid_strlen_i8_test2(ptr %Str) { -; CHECK-LABEL: @valid_strlen_i8_test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[FOR_COND_PREHEADER:%.*]] -; CHECK: for.cond.preheader: + +define i32 @valid_strlen_2(ptr %0) { +; CHECK-LABEL: define i32 @valid_strlen_2( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB14:.*]], label %[[BB3:.*]] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[BB14]], label %[[DOTPREHEADER:.*]] +; CHECK: [[_PREHEADER:.*:]] +; CHECK-NEXT: [[STR:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) -; CHECK-NEXT: br label [[FOR_COND:%.*]] -; CHECK: for.cond: -; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] +; CHECK-NEXT: br label %[[BB6:.*]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi ptr [ poison, %[[BB6]] ], [ [[TMP0]], %[[DOTPREHEADER]] ] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: br i1 true, label %[[BB9:.*]], label %[[BB6]] +; CHECK: [[BB9]]: +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i32 +; CHECK-NEXT: br label %[[BB14]] +; CHECK: [[BB14]]: +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP13]], %[[BB9]] ], [ 0, %[[BB3]] ], [ 0, [[TMP1:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP15]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %16, label %3 + +3: ; preds = %1 + %4 = load i8, ptr %0, align 1 + %5 = icmp eq i8 %4, 0 + br i1 %5, label %16, label %6 + +6: ; preds = %3, %6 + %7 = phi ptr [ %8, %6 ], [ %0, %3 ] + %8 = getelementptr inbounds i8, ptr %7, i64 1 + %9 = load i8, ptr %8, align 1 + %10 = icmp eq i8 %9, 0 + br i1 %10, label %11, label %6 + +11: ; preds = %6 + %12 = ptrtoint ptr %8 to i64 + %13 = ptrtoint ptr %0 to i64 + %14 = sub i64 %12, %13 + %15 = trunc i64 %14 to i32 + br label %16 + +16: ; preds = %1, %3, %11 + %17 = phi i32 [ %15, %11 ], [ 0, %3 ], [ 0, %1 ] + ret i32 %17 +} + +define i64 @valid_strlen_3(ptr %str) local_unnamed_addr #0 { +; CHECK-LABEL: define i64 @valid_strlen_3( +; CHECK-SAME: ptr [[STR:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[_PREHEADER:.*:]] +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 poison, 0 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_COND]] -; CHECK: for.end: -; CHECK-NEXT: br label [[CLEANUP]] -; CHECK: cleanup: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i64 [[RETVAL_0]] +; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP10]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: tail call void @use(ptr [[TMP0]]) +; CHECK-NEXT: tail call void @use(ptr [[STR]]) +; CHECK-NEXT: ret i64 [[TMP13]] ; entry: - %tobool = icmp eq ptr %Str, null - br i1 %tobool, label %cleanup, label %for.cond - -for.cond: ; preds = %entry, %for.cond - %Src.0 = phi ptr [ %incdec.ptr, %for.cond ], [ %Str, %entry ] - %0 = load i8, ptr %Src.0, align 1 - %tobool1 = icmp eq i8 %0, 0 - %incdec.ptr = getelementptr inbounds i8, ptr %Src.0, i64 1 - br i1 %tobool1, label %for.end, label %for.cond - -for.end: ; preds = %for.cond - %sub.ptr.lhs.cast = ptrtoint ptr %Src.0 to i64 - %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64 + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %0 = load i8, ptr %str.addr.0, align 1 + %cmp.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, ptr %str.addr.0, i64 1 + br i1 %cmp.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - br label %cleanup + tail call void @use(ptr %str.addr.0) + tail call void @use(ptr %str) + ret i64 %sub.ptr.sub +} - cleanup: ; preds = %entry, %for.end - %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] - ret i64 %retval.0 +define i64 @valid_strlen_4(ptr %0) { +; CHECK-LABEL: define i64 @valid_strlen_4( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB10:.*]], label %[[DOTPREHEADER:.*]] +; CHECK: [[_PREHEADER:.*:]] +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[NEWGEP]]) +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[NEWGEP]], i64 [[STRLEN]] +; CHECK-NEXT: br label %[[BB3:.*]] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi ptr [ poison, %[[BB3]] ], [ [[TMP0]], %[[DOTPREHEADER]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: br i1 true, label %[[BB6:.*]], label %[[BB3]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br label %[[BB10]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP9]], %[[BB6]] ], [ 0, [[TMP1:%.*]] ] +; CHECK-NEXT: ret i64 [[TMP11]] +; + %2 = icmp eq ptr %0, null + br i1 %2, label %12, label %3 + +3: ; preds = %1, %3 + %4 = phi ptr [ %5, %3 ], [ %0, %1 ] + %5 = getelementptr inbounds i8, ptr %4, i64 1 + %6 = load i8, ptr %5, align 1 + %7 = icmp eq i8 %6, 0 + br i1 %7, label %8, label %3 + +8: ; preds = %3 + %9 = ptrtoint ptr %5 to i64 + %10 = ptrtoint ptr %0 to i64 + %11 = sub i64 %9, %10 + br label %12 + +12: ; preds = %1, %8 + %13 = phi i64 [ %11, %8 ], [ 0, %1 ] + ret i64 %13 } -define void @invalid_strlen_i8_test3(ptr %s, i32 zeroext %i) { -; CHECK-LABEL: @invalid_strlen_i8_test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[WHILE_COND:%.*]] -; CHECK: while.cond: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi ptr [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_COND]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S_ADDR_0]], align 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 -; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[S_ADDR_0]], i64 1 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_COND]] -; CHECK: while.end: -; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi ptr [ [[S_ADDR_0]], [[WHILE_COND]] ] -; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_COND]] ] -; CHECK-NEXT: store i8 45, ptr [[S_ADDR_0_LCSSA]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 10 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store i8 65, ptr [[INCDEC_PTR1_LCSSA]], align 1 -; CHECK-NEXT: br label [[IF_END9:%.*]] -; CHECK: if.end: -; CHECK-NEXT: store i8 66, ptr [[INCDEC_PTR1_LCSSA]], align 1 -; CHECK-NEXT: br label [[IF_END9]] -; CHECK: if.end9: -; CHECK-NEXT: ret void +define i64 @valid_strlen_use(ptr %str) { +; CHECK-LABEL: define i64 @valid_strlen_use( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 poison, 0 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 +; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: tail call void @use(ptr noundef nonnull [[END]]) +; CHECK-NEXT: tail call void @use(ptr noundef [[STR]]) +; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] ; entry: br label %while.cond while.cond: ; preds = %while.cond, %entry - %s.addr.0 = phi ptr [ %s, %entry ], [ %incdec.ptr1, %while.cond ] - %0 = load i8, ptr %s.addr.0, align 1 - %tobool.not = icmp eq i8 %0, 0 - %incdec.ptr1 = getelementptr inbounds i8, ptr %s.addr.0, i64 1 - br i1 %tobool.not, label %while.end, label %while.cond + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %0 = load i8, ptr %str.addr.0, align 1 + %cmp.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, ptr %str.addr.0, i64 1 + br i1 %cmp.not, label %while.end, label %while.cond while.end: ; preds = %while.cond - %s.addr.0.lcssa = phi ptr [ %s.addr.0, %while.cond ] - %incdec.ptr1.lcssa = phi ptr [ %incdec.ptr1, %while.cond ] - store i8 45, ptr %s.addr.0.lcssa, align 1 - %cmp = icmp ult i32 %i, 10 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %while.end - store i8 65, ptr %incdec.ptr1.lcssa, align 1 - br label %if.end9 - -if.end: ; preds = %while.end - store i8 66, ptr %incdec.ptr1.lcssa, align 1 - br label %if.end9 - -if.end9: ; preds = %if.end, %if.then - ret void + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + tail call void @use(ptr noundef nonnull %str.addr.0) + tail call void @use(ptr noundef %str) + ret i64 %sub.ptr.sub } +define i64 @invalid_strlen_has_side_effect(ptr %0) { +; CHECK-LABEL: define i64 @invalid_strlen_has_side_effect( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP6:%.*]], %[[BB2]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6]] = getelementptr inbounds i8, ptr [[TMP3]], i64 1 +; CHECK-NEXT: br i1 [[TMP5]], label %[[BB7:.*]], label %[[BB2]] +; CHECK: [[BB7]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP3]], %[[BB2]] ] +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[DOTLCSSA]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: ret i64 [[TMP10]] +; + br label %2 + +2: ; preds = %2, %1 + %3 = phi ptr [ %0, %1 ], [ %6, %2 ] + %4 = load volatile i8, ptr %3, align 1 + %5 = icmp eq i8 %4, 0 + %6 = getelementptr inbounds i8, ptr %3, i64 1 + br i1 %5, label %7, label %2 + +7: ; preds = %2 + %8 = ptrtoint ptr %3 to i64 + %9 = ptrtoint ptr %0 to i64 + %10 = sub i64 %8, %9 + ret i64 %10 +} + +define i64 @invalid_strlen_idx_idiom(ptr %0) { +; CHECK-LABEL: define i64 @invalid_strlen_idx_idiom( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label %[[BB13:.*]], label %[[DOTPREHEADER:.*]] +; CHECK: [[_PREHEADER:.*:]] +; CHECK-NEXT: br label %[[BB4:.*]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP7:%.*]], %[[BB4]] ], [ 0, %[[DOTPREHEADER]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[TMP8:%.*]], %[[BB4]] ], [ [[TMP0]], %[[DOTPREHEADER]] ] +; CHECK-NEXT: [[TMP7]] = add nuw nsw i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP8]] = getelementptr inbounds i8, ptr [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB4]] +; CHECK: [[BB11]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], %[[BB4]] ] +; CHECK-NEXT: [[TMP12:%.*]] = zext nneg i32 [[DOTLCSSA]] to i64 +; CHECK-NEXT: br label %[[BB13]] +; CHECK: [[BB13]]: +; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ 0, [[TMP1:%.*]] ], [ [[TMP12]], %[[BB11]] ] +; CHECK-NEXT: ret i64 [[TMP14]] +; + %2 = load i8, ptr %0, align 1 + %3 = icmp eq i8 %2, 0 + br i1 %3, label %13, label %4 + +4: ; preds = %1, %4 + %5 = phi i32 [ %7, %4 ], [ 0, %1 ] + %6 = phi ptr [ %8, %4 ], [ %0, %1 ] + %7 = add nuw nsw i32 %5, 1 + %8 = getelementptr inbounds i8, ptr %6, i64 1 + %9 = load i8, ptr %8, align 1 + %10 = icmp eq i8 %9, 0 + br i1 %10, label %11, label %4 + +11: ; preds = %4 + %12 = zext nneg i32 %7 to i64 + br label %13 + +13: ; preds = %11, %1 + %14 = phi i64 [ 0, %1 ], [ %12, %11 ] + ret i64 %14 +} + + From 280bc117d99b6d151c014936aeb73ef3a56dcbd7 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Fri, 13 Sep 2024 15:12:24 -0400 Subject: [PATCH 05/15] Add wcslen idiom --- .../Transforms/Scalar/LoopIdiomRecognize.h | 3 + .../llvm/Transforms/Utils/BuildLibCalls.h | 6 ++ .../Transforms/Scalar/LoopIdiomRecognize.cpp | 36 +++++++--- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 9 +++ llvm/test/Transforms/LoopIdiom/wcslen16.ll | 66 +++++++++++++++++ llvm/test/Transforms/LoopIdiom/wcslen32.ll | 70 +++++++++++++++++++ 6 files changed, 181 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/LoopIdiom/wcslen16.ll create mode 100644 llvm/test/Transforms/LoopIdiom/wcslen32.ll diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h index 3a9f016ce9bd6..241a3fc109360 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -37,6 +37,9 @@ struct DisableLIRP { /// When true, Strlen is disabled. static bool Strlen; + + /// When true, Wcslen is disabled. + static bool Wcslen; }; /// Performs Loop Idiom Recognize Pass. diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index a8fb38e726004..50f695dbe6c07 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -93,6 +93,12 @@ namespace llvm { Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the wcslen function to the builder, for the specified + /// pointer. Ptr is required to be some pointer type, and the return value has + /// 'size_t' type. + Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, + const TargetLibraryInfo *TLI); + /// Emit a call to the strdup function to the builder, for the specified /// pointer. Ptr is required to be some pointer type, and the return value has /// 'i8*' type. diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 211a7a68ed639..5586b476513ea 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -136,6 +136,14 @@ static cl::opt cl::location(DisableLIRP::Strlen), cl::init(false), cl::ReallyHidden); +bool DisableLIRP::Wcslen; +static cl::opt + DisableLIRPWcslen("disable-" DEBUG_TYPE "-wcslen", + cl::desc("Proceed with loop idiom recognize pass, but do " + "not convert loop(s) to wcslen."), + cl::location(DisableLIRP::Wcslen), cl::init(false), + cl::ReallyHidden); + static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " @@ -1611,15 +1619,19 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (!Step) return false; - unsigned int ConstIntValue = 0; + unsigned int StepSize = 0; if (ConstantInt *CI = dyn_cast(Step->getValue())) - ConstIntValue = CI->getZExtValue(); + StepSize = CI->getZExtValue(); unsigned OpWidth = OperandType->getIntegerBitWidth(); - if (OpWidth != ConstIntValue * 8) + unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule()); + if (OpWidth != StepSize * 8) return false; - if (OpWidth != 8) + if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32) return false; + if (OpWidth >= 16) + if (OpWidth != WcharSize * 8 || DisableLIRPWcslen) + return false; // Scan every instruction in the loop to ensure there are no side effects. for (auto &I : *LoopBody) @@ -1671,12 +1683,11 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (auto *GEP = dyn_cast(LoopLoad->getPointerOperand())) { if (GEP->getPointerOperand() != LoopPhi) return false; - GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(GEP->getSourceElementType(), PreVal, - SmallVector(GEP->indices()), - "newgep", Preheader->getTerminator()); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + LoopLoad->getType(), PreVal, SmallVector(GEP->indices()), + "newgep", Preheader->getTerminator()); Expanded = NewGEP; - ExpandedType = NewGEP->getSourceElementType(); + ExpandedType = LoopLoad->getType(); } else if (LoopLoad->getPointerOperand() == LoopPhi) { Expanded = PreVal; ExpandedType = LoopLoad->getType(); @@ -1723,8 +1734,15 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { Value *StrLenFunc = nullptr; switch (OpWidth) { case 8: + if (!TLI->has(LibFunc_strlen)) + return false; StrLenFunc = emitStrLen(Expanded, Builder, *DL, TLI); break; + case 16: + case 32: + if (!TLI->has(LibFunc_wcslen)) + return false; + StrLenFunc = emitWcsLen(Expanded, Builder, *DL, TLI); } assert(StrLenFunc && "Failed to emit strlen function."); diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 2301be6977cef..24eefc91117b4 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1582,6 +1582,15 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI); } +Value *llvm::emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + assert(Ptr && Ptr->getType()->isPointerTy() && + "Argument to wcslen intrinsic must be a pointer."); + Type *PtrTy = B.getPtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_wcslen, SizeTTy, PtrTy, Ptr, B, TLI); +} + Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI) { Type *CharPtrTy = B.getPtrTy(); diff --git a/llvm/test/Transforms/LoopIdiom/wcslen16.ll b/llvm/test/Transforms/LoopIdiom/wcslen16.ll new file mode 100644 index 0000000000000..6c140ddf90d4e --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/wcslen16.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @valid_strlen16(ptr %src) { +; CHECK-LABEL: define i64 @valid_strlen16( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[SRC]], null +; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[LOR_LHS_FALSE:.*]] +; CHECK: [[LOR_LHS_FALSE]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] +; CHECK: [[WHILE_COND_PREHEADER]]: +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 -1 +; CHECK-NEXT: [[WCSLEN:%.*]] = call i64 @wcslen(ptr [[NEWGEP]]) +; CHECK-NEXT: [[END:%.*]] = getelementptr i16, ptr [[NEWGEP]], i64 [[WCSLEN]] +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ poison, %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 poison, 0 +; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[SUB_PTR_DIV:%.*]] = ashr exact i64 [[SUB_PTR_SUB]], 1 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_DIV]], %[[WHILE_END]] ], [ 0, %[[LOR_LHS_FALSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %cmp = icmp eq ptr %src, null + br i1 %cmp, label %return, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i16, ptr %src, align 2 + %cmp1 = icmp eq i16 %0, 0 + br i1 %cmp1, label %return, label %while.cond + +while.cond: ; preds = %lor.lhs.false, %while.cond + %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %lor.lhs.false ] + %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 2 + %1 = load i16, ptr %curr.0, align 2 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %sub.ptr.lhs.cast = ptrtoint ptr %curr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %src to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 1 + br label %return + +return: ; preds = %entry, %lor.lhs.false, %while.end + %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ] + ret i64 %retval.0 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"wchar_size", i32 2} + diff --git a/llvm/test/Transforms/LoopIdiom/wcslen32.ll b/llvm/test/Transforms/LoopIdiom/wcslen32.ll new file mode 100644 index 0000000000000..fad4c52078967 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/wcslen32.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @valid_wcslen32(ptr %src) { +; CHECK-LABEL: define i64 @valid_wcslen32( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[SRC]], null +; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[LOR_LHS_FALSE:.*]] +; CHECK: [[LOR_LHS_FALSE]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] +; CHECK: [[WHILE_COND_PREHEADER]]: +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 -3 +; CHECK-NEXT: [[WCSLEN:%.*]] = call i64 @wcslen(ptr [[NEWGEP]]) +; CHECK-NEXT: [[END:%.*]] = getelementptr i32, ptr [[NEWGEP]], i64 [[WCSLEN]] +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ poison, %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 poison, 0 +; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[SUB_PTR_DIV:%.*]] = ashr exact i64 [[SUB_PTR_SUB]], 2 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_DIV]], %[[WHILE_END]] ], [ 0, %[[LOR_LHS_FALSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %cmp = icmp eq ptr %src, null + br i1 %cmp, label %return, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i32, ptr %src, align 4 + %cmp1 = icmp eq i32 %0, 0 + br i1 %cmp1, label %return, label %while.cond.preheader + +while.cond.preheader: ; preds = %lor.lhs.false + br label %while.cond + +while.cond: ; preds = %while.cond.preheader, %while.cond + %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %while.cond.preheader ] + %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 4 + %1 = load i32, ptr %curr.0, align 4 + %tobool.not = icmp eq i32 %1, 0 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %curr.0.lcssa = phi ptr [ %curr.0, %while.cond ] + %sub.ptr.lhs.cast = ptrtoint ptr %curr.0.lcssa to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %src to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2 + br label %return + +return: ; preds = %entry, %lor.lhs.false, %while.end + %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ] + ret i64 %retval.0 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"wchar_size", i32 4} + From 86497d24934ad14a9bcb9ff16d079dd68c1c7890 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 24 Nov 2024 01:58:13 -0500 Subject: [PATCH 06/15] refactor with SCEV Expander --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 330 +++++++++--------- 1 file changed, 173 insertions(+), 157 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5586b476513ea..bbb0105f6c693 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -253,7 +253,7 @@ class LoopIdiomRecognize { bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX, Instruction *DefX, PHINode *CntPhi, Instruction *CntInst); - bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz + bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz bool recognizeShiftUntilLessThan(); void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, @@ -621,7 +621,8 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, const SCEVAddRecExpr *FirstStoreEv = cast(SE->getSCEV(FirstStorePtr)); APInt FirstStride = getStoreStride(FirstStoreEv); - unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); + unsigned FirstStoreSize = + DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); // See if we can optimize just this store in isolation. if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { @@ -1112,13 +1113,14 @@ bool LoopIdiomRecognize::processLoopStridedStore( BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); } else { - assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); + assert(isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; StringRef FuncName = "memset_pattern16"; - FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, - Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); + FunctionCallee MSP = + getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, + Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); // Otherwise we should form a memset_pattern16. PatternValue is known to be @@ -1160,8 +1162,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( R << "Transformed loop-strided store in " << ore::NV("Function", TheStore->getFunction()) << " function into a call to " - << ore::NV("NewFunction", NewCall->getCalledFunction()) - << "() intrinsic"; + << ore::NV("NewFunction", NewCall->getCalledFunction()) << "() intrinsic"; if (!Stores.empty()) R << ore::setExtraArgs(); for (auto *I : Stores) { @@ -1467,8 +1468,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( << ore::NV("NewFunction", NewCall->getCalledFunction()) << "() intrinsic from " << ore::NV("Inst", InstRemark) << " instruction in " << ore::NV("Function", TheStore->getFunction()) - << " function" - << ore::setExtraArgs() + << " function" << ore::setExtraArgs() << ore::NV("FromBlock", TheStore->getParent()->getName()) << ore::NV("ToBlock", Preheader->getName()); }); @@ -1550,51 +1550,47 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, return nullptr; } -/// Recognizes a strlen idiom by checking for loops that increment -/// a char pointer and then subtract with the base pointer. -/// -/// If detected, transforms the relevant code to a strlen function -/// call, and returns true; otherwise, returns false. -/// -/// The core idiom we are trying to detect is: -/// \code -/// start = str; -/// do { -/// str++; -/// } while(*str != '\0'); -/// \endcode -/// -/// The transformed output is similar to below c-code: -/// \code -/// str = start + strlen(start) -/// len = str - start -/// \endcode -/// -/// Later the pointer subtraction will be folded by InstCombine -bool LoopIdiomRecognize::recognizeAndInsertStrLen() { - if (DisableLIRPStrlen) - return false; +struct StrlenIdiom { + unsigned IdiomSize; + ConstantInt *StepSize; + const SCEV *LoadBaseEv; + Type *LoadType; +}; + +/// Trying to detect strlen idiom that increments a char pointer +/// with a single loop body bb. +static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, + const TargetLibraryInfo *TLI, + StrlenIdiom &Idiom) { + + outs() << "current loop:\n"; + CurLoop->print(outs()); + outs() << "\n"; // Give up if the loop has multiple blocks or multiple backedges. if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) return false; - // It should have a preheader containing nothing but an unconditional branch. - auto *Preheader = CurLoop->getLoopPreheader(); - if (!Preheader || &Preheader->front() != Preheader->getTerminator()) + // It should have a preheader and a branch instruction. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + if (!Preheader) return false; - auto *EntryBI = dyn_cast(Preheader->getTerminator()); - if (!EntryBI || EntryBI->isConditional()) + BranchInst *EntryBI = dyn_cast(Preheader->getTerminator()); + if (!EntryBI) return false; - // The loop exit must be conditioned on an icmp with 0. + // The loop exit must be conditioned on an icmp with 0 the null terminator. // The icmp operand has to be a load on some SSA reg that increments // by 1 in the loop. BasicBlock *LoopBody = *CurLoop->block_begin(); + + // Skip if the body is too big as it most likely is not a strlen idiom. + if (!LoopBody || LoopBody->size() >= 10) + return false; + BranchInst *LoopTerm = dyn_cast(LoopBody->getTerminator()); Value *LoopCond = matchCondition(LoopTerm, LoopBody); - if (!LoopCond) return false; @@ -1606,23 +1602,29 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (!OperandType || !OperandType->isIntegerTy()) return false; - // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on - // the loop, indicating strlen calculation. + // See if the pointer expression is an AddRec with constant step a of form + // ({n,+,a}) where a is the width of the char type. auto *IncPtr = LoopLoad->getPointerOperand(); const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) return false; + outs() << "pointer load ev: "; + LoadEv->print(outs()); + outs() << "\n"; + const SCEVConstant *Step = dyn_cast(LoadEv->getStepRecurrence(*SE)); if (!Step) return false; - unsigned int StepSize = 0; - if (ConstantInt *CI = dyn_cast(Step->getValue())) - StepSize = CI->getZExtValue(); + unsigned StepSize = 0; + ConstantInt *StepSizeCI = dyn_cast(Step->getValue()); + if (!StepSizeCI) + return false; + StepSize = StepSizeCI->getZExtValue(); + // Verify that StepSize is consistent with platform char width. unsigned OpWidth = OperandType->getIntegerBitWidth(); unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule()); if (OpWidth != StepSize * 8) @@ -1630,7 +1632,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32) return false; if (OpWidth >= 16) - if (OpWidth != WcharSize * 8 || DisableLIRPWcslen) + if (OpWidth != WcharSize * 8) return false; // Scan every instruction in the loop to ensure there are no side effects. @@ -1642,137 +1644,152 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (!LoopExitBB) return false; - // Check that the loop exit block is valid: - // It needs to have exactly one LCSSA Phi which is an AddRec. - PHINode *LCSSAPhi = nullptr; for (PHINode &PN : LoopExitBB->phis()) { - if (!LCSSAPhi && PN.getNumIncomingValues() == 1) - LCSSAPhi = &PN; - else + const SCEV *Ev = SE->getSCEV(&PN); + outs() << "loop exit block scev exprs: "; + PN.print(outs()); + if (Ev) + Ev->print(outs()); + outs() << "\n"; + + if (!Ev) return false; - } - if (!LCSSAPhi || !SE->isSCEVable(LCSSAPhi->getType())) - return false; - - // This matched the pointer version of the idiom - if (LCSSAPhi->getIncomingValueForBlock(LoopBody) != - LoopLoad->getPointerOperand()) - return false; + // Since we verified that the loop trip count will be a valid strlen idiom, + // we can expand all lcssa phi with {n,+,1} as (n + strlen) and use + // SCEVExpander materialize the loop output. + const SCEVAddRecExpr *AddRecEv = dyn_cast(Ev); + if (!AddRecEv || !AddRecEv->isAffine()) + return false; - const SCEVAddRecExpr *LCSSAEv = - dyn_cast(SE->getSCEV(LCSSAPhi->getIncomingValue(0))); + // We only want RecAddExpr with recurrence step that are constant. This + // is good enough for all the idioms we want to recognize. Later we expand + // the recurrence as {base,+,a} -> (base + a * strlen) and materialize + if (!dyn_cast(AddRecEv->getStepRecurrence(*SE))) + return false; + } - if (!LCSSAEv || !dyn_cast(SE->getPointerBase(LCSSAEv)) || - !LCSSAEv->isAffine()) - return false; + Idiom.LoadBaseEv = LoadEv->getStart(); + Idiom.IdiomSize = OpWidth; + Idiom.StepSize = StepSizeCI; + Idiom.LoadType = OperandType; + return true; +} - // We can now expand the base of the str - IRBuilder<> Builder(Preheader->getTerminator()); +/// Recognizes a strlen idiom by checking for loops that increment +/// a char pointer and then subtract with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +/// +/// The core idiom we are trying to detect is: +/// \code +/// start = str; +/// do { +/// str++; +/// } while(*str != '\0'); +/// \endcode +/// +/// The transformed output is similar to below c-code: +/// \code +/// str = start + strlen(start) +/// len = str - start +/// \endcode +/// +/// Later the pointer subtraction will be folded by InstCombine +bool LoopIdiomRecognize::recognizeAndInsertStrLen() { + /* + const auto *First = CurLoop->block_begin(); + if (First != CurLoop->block_end()) { + auto *F = (*First)->getParent(); + outs() << "\n\n\n\n\n========== NEW LOOP ============\n"; + F->print(outs()); + } + */ - auto LoopPhiRange = LoopBody->phis(); - if (!hasNItems(LoopPhiRange, 1)) - return false; - auto *LoopPhi = &*LoopPhiRange.begin(); - Value *PreVal = LoopPhi->getIncomingValueForBlock(Preheader); - if (!PreVal) + // TODO: check for disable options + StrlenIdiom Idiom; + if (!detectStrLenIdiom(CurLoop, SE, TLI, Idiom)) return false; - Value *Expanded = nullptr; - Type *ExpandedType = nullptr; - if (auto *GEP = dyn_cast(LoopLoad->getPointerOperand())) { - if (GEP->getPointerOperand() != LoopPhi) - return false; - GetElementPtrInst *NewGEP = GetElementPtrInst::Create( - LoopLoad->getType(), PreVal, SmallVector(GEP->indices()), - "newgep", Preheader->getTerminator()); - Expanded = NewGEP; - ExpandedType = LoopLoad->getType(); - } else if (LoopLoad->getPointerOperand() == LoopPhi) { - Expanded = PreVal; - ExpandedType = LoopLoad->getType(); - } - if (!Expanded) - return false; - - // Ensure that the GEP has the correct index if the pointer was modified. - // This can happen when the pointer in the user code, outside the loop, - // walks past a certain pre-checked index of the string. - if (auto *GEP = dyn_cast(Expanded)) { - if (GEP->getNumOperands() != 2) - return false; - - ConstantInt *I0 = dyn_cast(GEP->getOperand(1)); - if (!I0) - return false; - - int64_t Index = I0->getSExtValue(); // GEP index - auto *SAdd = dyn_cast(LoadEv->getStart()); - if (!SAdd || SAdd->getNumOperands() != 2) - return false; + // outs() << "idiom is good\n\n"; - auto *SAdd0 = dyn_cast(SAdd->getOperand(0)); - if (!SAdd0) - return false; - - ConstantInt *CInt = SAdd0->getValue(); // SCEV index - assert(CInt && "Expecting CInt to be valid."); - int64_t Offset = CInt->getSExtValue(); + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + BasicBlock *LoopExitBB = CurLoop->getExitBlock(); - // Update the index based on the Offset - assert((Offset * 8) % GEP->getSourceElementType()->getIntegerBitWidth() == - 0 && - "Invalid offset"); - int64_t NewIndex = - (Offset * 8) / GEP->getSourceElementType()->getIntegerBitWidth() - - Index; - Value *NewIndexVal = - ConstantInt::get(GEP->getOperand(1)->getType(), NewIndex); - GEP->setOperand(1, NewIndexVal); - } + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), "scev"); + Value *MaterialzedBase = Expander.expandCodeFor( + Idiom.LoadBaseEv, Idiom.LoadBaseEv->getType(), Builder.GetInsertPoint()); Value *StrLenFunc = nullptr; - switch (OpWidth) { + switch (Idiom.IdiomSize) { case 8: - if (!TLI->has(LibFunc_strlen)) + if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen)) return false; - StrLenFunc = emitStrLen(Expanded, Builder, *DL, TLI); + StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI); break; case 16: case 32: - if (!TLI->has(LibFunc_wcslen)) + if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen)) return false; - StrLenFunc = emitWcsLen(Expanded, Builder, *DL, TLI); + StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI); } - assert(StrLenFunc && "Failed to emit strlen function."); - // Replace LCSSA Phi use with new pointer to the null terminator - SmallVector NewBaseIndex{StrLenFunc}; - GetElementPtrInst *NewEndPtr = GetElementPtrInst::Create( - ExpandedType, Expanded, NewBaseIndex, "end", Preheader->getTerminator()); - LCSSAPhi->replaceAllUsesWith(NewEndPtr); - RecursivelyDeleteDeadPHINode(LCSSAPhi); - - ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody - ? Builder.getFalse() - : Builder.getTrue(); - LoopTerm->setCondition(NewLoopCond); - - deleteDeadInstruction(cast(LoopCond)); - deleteDeadInstruction(cast(IncPtr)); + const SCEV *StrlenEv = SE->getSCEV(StrLenFunc); + SmallVector Cleanup; + for (PHINode &PN : LoopExitBB->phis()) { + const SCEV *Ev = SE->getSCEV(&PN); + const SCEVAddRecExpr *AddRecEv = dyn_cast(Ev); + const SCEVConstant *Step = + dyn_cast(AddRecEv->getStepRecurrence(*SE)); + const SCEV *Base = AddRecEv->getStart(); + + /* + outs() << "creating new mult scev: "; + Base->getType()->print(outs()); + outs() << " "; + Step->getType()->print(outs()); + outs() << " "; + StrlenEv->getType()->print(outs()); + outs() << "\n"; + */ + + // It is safe to truncate to base since if base is narrower than size_t + // the equivalent user code will have to truncate anyways. + const SCEV *NewEv = SE->getAddExpr( + Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend( + StrlenEv, Base->getType()))); + + /* + outs() << "new ev exprs: "; + PN.print(outs()); + if (NewEv) + NewEv->print(outs()); + outs() << "\n"; + */ + + Expander.clear(); + Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(), + Builder.GetInsertPoint()); + PN.replaceAllUsesWith(MaterializedPHI); + Cleanup.push_back(&PN); + } + + for (PHINode *PN : Cleanup) { + RecursivelyDeleteDeadPHINode(PN); + } SE->forgetLoop(CurLoop); - LLVM_DEBUG(dbgs() << " Formed strlen: " << *StrLenFunc << "\n"); - + ++NumStrLen; + LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n"); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", CurLoop->getStartLoc(), Preheader) - << "Transformed pointer difference into a call to strlen() function"; + << "Transformed strlen loop idiom"; }); - ++NumStrLen; - return true; } @@ -1983,8 +2000,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, ConstantInt *Dec = dyn_cast(SubOneOp->getOperand(1)); if (!Dec || !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) || - (SubOneOp->getOpcode() == Instruction::Add && - Dec->isMinusOne()))) { + (SubOneOp->getOpcode() == Instruction::Add && Dec->isMinusOne()))) { return false; } } @@ -2095,8 +2111,8 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1" if (!DefX || !DefX->isShift()) return false; - IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz : - Intrinsic::ctlz; + IntrinID = + DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz : Intrinsic::ctlz; ConstantInt *Shft = dyn_cast(DefX->getOperand(1)); if (!Shft || !Shft->isOne()) return false; @@ -2599,9 +2615,8 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, TcPhi->insertBefore(Body->begin()); Builder.SetInsertPoint(LbCond); - Instruction *TcDec = cast( - Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1), - "tcdec", false, true)); + Instruction *TcDec = cast(Builder.CreateSub( + TcPhi, ConstantInt::get(Ty, 1), "tcdec", false, true)); TcPhi->addIncoming(TripCnt, PreHead); TcPhi->addIncoming(TcDec, Body); @@ -3231,7 +3246,8 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { // intrinsic we'll use are not cheap. Note that we are okay with *just* // making the loop countable, even if nothing else changes. IntrinsicCostAttributes Attrs( - IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()}); + IntrID, Ty, + {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()}); InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); if (Cost > TargetTransformInfo::TCC_Basic) { LLVM_DEBUG(dbgs() << DEBUG_TYPE From 34fefa5d1e230174c0f5eea0e3391c871c03a026 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 24 Nov 2024 15:55:14 -0500 Subject: [PATCH 07/15] Add more tests --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 23 +- llvm/test/Transforms/LoopIdiom/strlen.ll | 611 +++++++++++------- 2 files changed, 388 insertions(+), 246 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index bbb0105f6c693..521a448ea5dfc 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -20,8 +20,7 @@ // // TODO List: // -// Future loop memory idioms to recognize: -// memcmp, strlen, etc. +// Future loop memory idioms to recognize: memcmp, etc. // // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). @@ -1562,13 +1561,16 @@ struct StrlenIdiom { static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, const TargetLibraryInfo *TLI, StrlenIdiom &Idiom) { - + /* outs() << "current loop:\n"; CurLoop->print(outs()); outs() << "\n"; + */ - // Give up if the loop has multiple blocks or multiple backedges. - if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + // Give up if the loop has multiple blocks, multiple backedges, or + // multiple exit blocks + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 || + !CurLoop->getUniqueExitBlock()) return false; // It should have a preheader and a branch instruction. @@ -1586,7 +1588,7 @@ static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, BasicBlock *LoopBody = *CurLoop->block_begin(); // Skip if the body is too big as it most likely is not a strlen idiom. - if (!LoopBody || LoopBody->size() >= 10) + if (!LoopBody || LoopBody->size() >= 15) return false; BranchInst *LoopTerm = dyn_cast(LoopBody->getTerminator()); @@ -1609,9 +1611,11 @@ static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) return false; + /* outs() << "pointer load ev: "; LoadEv->print(outs()); outs() << "\n"; + */ const SCEVConstant *Step = dyn_cast(LoadEv->getStepRecurrence(*SE)); @@ -1645,12 +1649,17 @@ static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, return false; for (PHINode &PN : LoopExitBB->phis()) { + if (!SE->isSCEVable(PN.getType())) + return false; + const SCEV *Ev = SE->getSCEV(&PN); + /* outs() << "loop exit block scev exprs: "; PN.print(outs()); if (Ev) Ev->print(outs()); outs() << "\n"; + */ if (!Ev) return false; @@ -1777,6 +1786,8 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { Cleanup.push_back(&PN); } + // All LCSSA Loop Phi are dead, the left over loop body can be cleaned up by + // later passes for (PHINode *PN : Cleanup) { RecursivelyDeleteDeadPHINode(PN); } diff --git a/llvm/test/Transforms/LoopIdiom/strlen.ll b/llvm/test/Transforms/LoopIdiom/strlen.ll index 43ed9d0980bc4..0dc833ec0e35f 100644 --- a/llvm/test/Transforms/LoopIdiom/strlen.ll +++ b/llvm/test/Transforms/LoopIdiom/strlen.ll @@ -1,293 +1,424 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s +; RUN: opt -passes='loop(loop-idiom),verify' < %s -S | FileCheck %s +declare void @other() declare void @use(ptr) +declare void @usei(i32) +declare void @usel(i64) -define i64 @valid_strlen_1(ptr %0) { -; CHECK-LABEL: define i64 @valid_strlen_1( -; CHECK-SAME: ptr [[TMP0:%.*]]) { -; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[TMP0]]) -; CHECK-NEXT: [[DOTLCSSA:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[STRLEN]] -; CHECK-NEXT: br label %[[BB2:.*]] -; CHECK: [[BB2]]: -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i8 poison, 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 -; CHECK-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB2]] -; CHECK: [[BB5]]: -; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[DOTLCSSA]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP0]] to i64 -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: ret i64 [[TMP14]] -; - br label %2 - -2: ; preds = %2, %1 - %3 = phi ptr [ %0, %1 ], [ %6, %2 ] - %4 = load i8, ptr %3, align 1 - %5 = icmp eq i8 %4, 0 - %6 = getelementptr inbounds i8, ptr %3, i64 1 - br i1 %5, label %7, label %2 - -7: ; preds = %2 - %8 = ptrtoint ptr %3 to i64 - %9 = ptrtoint ptr %0 to i64 - %10 = sub i64 %8, %9 - ret i64 %10 -} - - -define i32 @valid_strlen_2(ptr %0) { -; CHECK-LABEL: define i32 @valid_strlen_2( -; CHECK-SAME: ptr [[TMP0:%.*]]) { -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; CHECK-NEXT: br i1 [[TMP2]], label %[[BB14:.*]], label %[[BB3:.*]] -; CHECK: [[BB3]]: -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP0]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[BB14]], label %[[DOTPREHEADER:.*]] -; CHECK: [[_PREHEADER:.*:]] -; CHECK-NEXT: [[STR:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) -; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] -; CHECK-NEXT: br label %[[BB6:.*]] -; CHECK: [[BB6]]: -; CHECK-NEXT: [[TMP7:%.*]] = phi ptr [ poison, %[[BB6]] ], [ [[TMP0]], %[[DOTPREHEADER]] ] -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 poison, 0 -; CHECK-NEXT: br i1 true, label %[[BB9:.*]], label %[[BB6]] -; CHECK: [[BB9]]: -; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 -; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP0]] to i64 -; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] -; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i32 -; CHECK-NEXT: br label %[[BB14]] -; CHECK: [[BB14]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP13]], %[[BB9]] ], [ 0, %[[BB3]] ], [ 0, [[TMP1:%.*]] ] -; CHECK-NEXT: ret i32 [[TMP15]] -; - %2 = icmp eq ptr %0, null - br i1 %2, label %16, label %3 - -3: ; preds = %1 - %4 = load i8, ptr %0, align 1 - %5 = icmp eq i8 %4, 0 - br i1 %5, label %16, label %6 - -6: ; preds = %3, %6 - %7 = phi ptr [ %8, %6 ], [ %0, %3 ] - %8 = getelementptr inbounds i8, ptr %7, i64 1 - %9 = load i8, ptr %8, align 1 - %10 = icmp eq i8 %9, 0 - br i1 %10, label %11, label %6 - -11: ; preds = %6 - %12 = ptrtoint ptr %8 to i64 - %13 = ptrtoint ptr %0 to i64 - %14 = sub i64 %12, %13 - %15 = trunc i64 %14 to i32 - br label %16 - -16: ; preds = %1, %3, %11 - %17 = phi i32 [ %15, %11 ], [ 0, %3 ], [ 0, %1 ] - ret i32 %17 -} - -define i64 @valid_strlen_3(ptr %str) local_unnamed_addr #0 { -; CHECK-LABEL: define i64 @valid_strlen_3( -; CHECK-SAME: ptr [[STR:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[_PREHEADER:.*:]] +; size_t basic_strlen(const char* str) { +; while (*str != '\0') { +; ++str; +; } +; return str - base; +; } +define i64 @valid_basic_strlen(ptr %str) { +; CHECK-LABEL: define i64 @valid_basic_strlen( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] ; CHECK-NEXT: br label %[[WHILE_COND:.*]] ; CHECK: [[WHILE_COND]]: -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 poison, 0 -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 -; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[INCDEC_PTR:%.*]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR_ADDR_0]], align 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] ; CHECK: [[WHILE_END]]: -; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[SCEVGEP]] to i64 ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP10]], [[SUB_PTR_RHS_CAST]] -; CHECK-NEXT: tail call void @use(ptr [[TMP0]]) -; CHECK-NEXT: tail call void @use(ptr [[STR]]) -; CHECK-NEXT: ret i64 [[TMP13]] +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] ; entry: br label %while.cond -while.cond: ; preds = %while.cond, %entry +while.cond: %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] %0 = load i8, ptr %str.addr.0, align 1 %cmp.not = icmp eq i8 %0, 0 - %incdec.ptr = getelementptr inbounds i8, ptr %str.addr.0, i64 1 + %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 1 br i1 %cmp.not, label %while.end, label %while.cond -while.end: ; preds = %while.cond +while.end: %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - tail call void @use(ptr %str.addr.0) - tail call void @use(ptr %str) ret i64 %sub.ptr.sub } -define i64 @valid_strlen_4(ptr %0) { -; CHECK-LABEL: define i64 @valid_strlen_4( -; CHECK-SAME: ptr [[TMP0:%.*]]) { -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; CHECK-NEXT: br i1 [[TMP2]], label %[[BB10:.*]], label %[[DOTPREHEADER:.*]] -; CHECK: [[_PREHEADER:.*:]] -; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[NEWGEP]]) -; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[NEWGEP]], i64 [[STRLEN]] -; CHECK-NEXT: br label %[[BB3:.*]] -; CHECK: [[BB3]]: -; CHECK-NEXT: [[TMP4:%.*]] = phi ptr [ poison, %[[BB3]] ], [ [[TMP0]], %[[DOTPREHEADER]] ] -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 poison, 0 -; CHECK-NEXT: br i1 true, label %[[BB6:.*]], label %[[BB3]] -; CHECK: [[BB6]]: -; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP0]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -; CHECK-NEXT: br label %[[BB10]] -; CHECK: [[BB10]]: -; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP9]], %[[BB6]] ], [ 0, [[TMP1:%.*]] ] -; CHECK-NEXT: ret i64 [[TMP11]] +; int valid_basic_strlen_rotated(const char* str) { +; const char* base = str; +; if (!*str) return 0; +; do { +; ++str; +; } while (*str); +; return str - base; +; } +define i32 @valid_basic_strlen_rotated(ptr %str) { +; CHECK-LABEL: define i32 @valid_basic_strlen_rotated( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[CLEANUP:.*]], label %[[DO_BODY_PREHEADER:.*]] +; CHECK: [[DO_BODY_PREHEADER]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 1 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[STRLEN]], 1 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[TMP1]] +; CHECK-NEXT: br label %[[DO_BODY:.*]] +; CHECK: [[DO_BODY]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[DO_BODY]] ], [ [[STR]], %[[DO_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL1_NOT]], label %[[DO_END:.*]], label %[[DO_BODY]] +; CHECK: [[DO_END]]: +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[SCEVGEP1]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i32 +; CHECK-NEXT: br label %[[CLEANUP]] +; CHECK: [[CLEANUP]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CONV]], %[[DO_END]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] ; - %2 = icmp eq ptr %0, null - br i1 %2, label %12, label %3 +entry: + %0 = load i8, ptr %str, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %cleanup, label %do.body + +do.body: + %str.addr.0 = phi ptr [ %incdec.ptr, %do.body ], [ %str, %entry ] + %incdec.ptr = getelementptr inbounds nuw i8, ptr %str.addr.0, i64 1 + %1 = load i8, ptr %incdec.ptr, align 1 + %tobool1.not = icmp eq i8 %1, 0 + br i1 %tobool1.not, label %do.end, label %do.body -3: ; preds = %1, %3 - %4 = phi ptr [ %5, %3 ], [ %0, %1 ] - %5 = getelementptr inbounds i8, ptr %4, i64 1 - %6 = load i8, ptr %5, align 1 - %7 = icmp eq i8 %6, 0 - br i1 %7, label %8, label %3 +do.end: + %sub.ptr.lhs.cast = ptrtoint ptr %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %conv = trunc i64 %sub.ptr.sub to i32 + br label %cleanup + +cleanup: + %retval.0 = phi i32 [ %conv, %do.end ], [ 0, %entry ] + ret i32 %retval.0 +} + +; int valid_strlen_with_aux_indvar(const char* str) { +; int count = 0; +; int count_offset = -10; +; int count_multiple = 0; +; +; while (*str) { +; ++str; +; ++count; +; ++count_offset; +; count_multiple += 2; +; ++foo; +; } +; +; usei(count); +; usei(count_offset); +; usei(count_multiple); +; use(str); +; use(foo); +; } +define dso_local void @valid_strlen_with_aux_indvar(ptr noundef %str, ptr noundef %foo) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @valid_strlen_with_aux_indvar( +; CHECK-SAME: ptr noundef [[STR:%.*]], ptr noundef [[FOO:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT9:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT9]], label %[[WHILE_END:.*]], label %[[WHILE_BODY_PREHEADER:.*]] +; CHECK: [[WHILE_BODY_PREHEADER]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 1 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[STRLEN]], 1 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[STRLEN]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[STRLEN]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -9 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[STRLEN]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[STRLEN]], 1 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[FOO]], i64 [[TMP9]] +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[COUNT_MULTIPLE_014:%.*]] = phi i32 [ [[ADD:%.*]], %[[WHILE_BODY]] ], [ 0, %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[COUNT_OFFSET_013:%.*]] = phi i32 [ [[INC1:%.*]], %[[WHILE_BODY]] ], [ -10, %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[COUNT_012:%.*]] = phi i32 [ [[INC:%.*]], %[[WHILE_BODY]] ], [ 0, %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[FOO_ADDR_011:%.*]] = phi ptr [ [[INCDEC_PTR2:%.*]], %[[WHILE_BODY]] ], [ [[FOO]], %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[STR_ADDR_010:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[STR]], %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[STR_ADDR_010]], i64 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[COUNT_012]], 1 +; CHECK-NEXT: [[INC1]] = add nsw i32 [[COUNT_OFFSET_013]], 1 +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[COUNT_MULTIPLE_014]], 2 +; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds nuw i8, ptr [[FOO_ADDR_011]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[WHILE_END]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[SCEVGEP1]], %[[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[FOO_ADDR_0_LCSSA:%.*]] = phi ptr [ [[FOO]], %[[ENTRY]] ], [ [[SCEVGEP2]], %[[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP3]], %[[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[COUNT_OFFSET_0_LCSSA:%.*]] = phi i32 [ -10, %[[ENTRY]] ], [ [[TMP5]], %[[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[COUNT_MULTIPLE_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP8]], %[[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: tail call void @usei(i32 noundef [[COUNT_0_LCSSA]]) +; CHECK-NEXT: tail call void @usei(i32 noundef [[COUNT_OFFSET_0_LCSSA]]) +; CHECK-NEXT: tail call void @usei(i32 noundef [[COUNT_MULTIPLE_0_LCSSA]]) +; CHECK-NEXT: tail call void @use(ptr noundef nonnull [[STR_ADDR_0_LCSSA]]) +; CHECK-NEXT: tail call void @use(ptr noundef [[FOO_ADDR_0_LCSSA]]) +; CHECK-NEXT: ret void +; +entry: + %0 = load i8, ptr %str, align 1 + %tobool.not9 = icmp eq i8 %0, 0 + br i1 %tobool.not9, label %while.end, label %while.body -8: ; preds = %3 - %9 = ptrtoint ptr %5 to i64 - %10 = ptrtoint ptr %0 to i64 - %11 = sub i64 %9, %10 - br label %12 +while.body: + %count_multiple.014 = phi i32 [ %add, %while.body ], [ 0, %entry ] + %count_offset.013 = phi i32 [ %inc1, %while.body ], [ -10, %entry ] + %count.012 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %foo.addr.011 = phi ptr [ %incdec.ptr2, %while.body ], [ %foo, %entry ] + %str.addr.010 = phi ptr [ %incdec.ptr, %while.body ], [ %str, %entry ] + %incdec.ptr = getelementptr inbounds nuw i8, ptr %str.addr.010, i64 1 + %inc = add nuw nsw i32 %count.012, 1 + %inc1 = add nsw i32 %count_offset.013, 1 + %add = add nuw nsw i32 %count_multiple.014, 2 + %incdec.ptr2 = getelementptr inbounds nuw i8, ptr %foo.addr.011, i64 1 + %1 = load i8, ptr %incdec.ptr, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %while.end, label %while.body -12: ; preds = %1, %8 - %13 = phi i64 [ %11, %8 ], [ 0, %1 ] - ret i64 %13 +while.end: + %str.addr.0.lcssa = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.body ] + %foo.addr.0.lcssa = phi ptr [ %foo, %entry ], [ %incdec.ptr2, %while.body ] + %count.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %count_offset.0.lcssa = phi i32 [ -10, %entry ], [ %inc1, %while.body ] + %count_multiple.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ] + tail call void @usei(i32 noundef %count.0.lcssa) #3 + tail call void @usei(i32 noundef %count_offset.0.lcssa) #3 + tail call void @usei(i32 noundef %count_multiple.0.lcssa) #3 + tail call void @use(ptr noundef nonnull %str.addr.0.lcssa) #3 + tail call void @use(ptr noundef %foo.addr.0.lcssa) #3 + ret void } -define i64 @valid_strlen_use(ptr %str) { -; CHECK-LABEL: define i64 @valid_strlen_use( +; int valid_strlen_index(const char* str) { +; int i = 0; +; while (str[i]) { +; ++i; +; } +; return i; +; } +define i32 @valid_strlen_index(ptr %str) { +; CHECK-LABEL: define i32 @valid_strlen_index( ; CHECK-SAME: ptr [[STR:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]]) -; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]] ; CHECK-NEXT: br label %[[WHILE_COND:.*]] ; CHECK: [[WHILE_COND]]: -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 poison, 0 -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1 -; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_COND]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[STR]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] ; CHECK: [[WHILE_END]]: -; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 -; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] -; CHECK-NEXT: tail call void @use(ptr noundef nonnull [[END]]) -; CHECK-NEXT: tail call void @use(ptr noundef [[STR]]) -; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[STRLEN]] to i32 +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: br label %while.cond -while.cond: ; preds = %while.cond, %entry - %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] - %0 = load i8, ptr %str.addr.0, align 1 - %cmp.not = icmp eq i8 %0, 0 - %incdec.ptr = getelementptr inbounds i8, ptr %str.addr.0, i64 1 - br i1 %cmp.not, label %while.end, label %while.cond +while.cond: + %indvars.iv = phi i64 [ %indvars.iv.next, %while.cond ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %str, i64 %indvars.iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %tobool.not, label %while.end, label %while.cond -while.end: ; preds = %while.cond - %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 - %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - tail call void @use(ptr noundef nonnull %str.addr.0) - tail call void @use(ptr noundef %str) - ret i64 %sub.ptr.sub +while.end: + %1 = trunc nuw nsw i64 %indvars.iv to i32 + ret i32 %1 } -define i64 @invalid_strlen_has_side_effect(ptr %0) { -; CHECK-LABEL: define i64 @invalid_strlen_has_side_effect( -; CHECK-SAME: ptr [[TMP0:%.*]]) { -; CHECK-NEXT: br label %[[BB2:.*]] -; CHECK: [[BB2]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP6:%.*]], %[[BB2]] ] -; CHECK-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; CHECK-NEXT: [[TMP6]] = getelementptr inbounds i8, ptr [[TMP3]], i64 1 -; CHECK-NEXT: br i1 [[TMP5]], label %[[BB7:.*]], label %[[BB2]] -; CHECK: [[BB7]]: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP3]], %[[BB2]] ] -; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[DOTLCSSA]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP0]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -; CHECK-NEXT: ret i64 [[TMP10]] +; void valid_strlen_offset(const my_char* str) { +; if (*(str++) == '\0') return; +; if (*(str++) == '\0') return; +; if (*(str++) == '\0') return; +; while (*str) { +; ++str; +; } +; use(str); +; } +define dso_local void @valid_strlen_offset(ptr noundef %str) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @valid_strlen_offset( +; CHECK-SAME: ptr noundef [[STR:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[STR]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[CMP4]], label %[[RETURN]], label %[[IF_END7:.*]] +; CHECK: [[IF_END7]]: +; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[STR]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR2]], align 1 +; CHECK-NEXT: [[CMP10:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[CMP10]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] +; CHECK: [[WHILE_COND_PREHEADER]]: +; CHECK-NEXT: [[INCDEC_PTR8:%.*]] = getelementptr i8, ptr [[STR]], i64 3 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[INCDEC_PTR8]]) +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[STRLEN]], 3 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 [[TMP3]] +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[INCDEC_PTR14:%.*]], %[[WHILE_COND]] ], [ [[INCDEC_PTR8]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[STR_ADDR_0]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[INCDEC_PTR14]] = getelementptr inbounds nuw i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: tail call void @use(ptr noundef nonnull [[SCEVGEP]]) +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: ret void ; - br label %2 +entry: + %0 = load i8, ptr %str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %return, label %if.end + +if.end: + %incdec.ptr = getelementptr inbounds nuw i8, ptr %str, i64 1 + %1 = load i8, ptr %incdec.ptr, align 1 + %cmp4 = icmp eq i8 %1, 0 + br i1 %cmp4, label %return, label %if.end7 -2: ; preds = %2, %1 - %3 = phi ptr [ %0, %1 ], [ %6, %2 ] - %4 = load volatile i8, ptr %3, align 1 - %5 = icmp eq i8 %4, 0 - %6 = getelementptr inbounds i8, ptr %3, i64 1 - br i1 %5, label %7, label %2 +if.end7: + %incdec.ptr2 = getelementptr inbounds nuw i8, ptr %str, i64 2 + %2 = load i8, ptr %incdec.ptr2, align 1 + %cmp10 = icmp eq i8 %2, 0 + br i1 %cmp10, label %return, label %while.cond.preheader + +while.cond.preheader: + %incdec.ptr8 = getelementptr inbounds nuw i8, ptr %str, i64 3 + br label %while.cond -7: ; preds = %2 - %8 = ptrtoint ptr %3 to i64 - %9 = ptrtoint ptr %0 to i64 - %10 = sub i64 %8, %9 - ret i64 %10 +while.cond: + %str.addr.0 = phi ptr [ %incdec.ptr14, %while.cond ], [ %incdec.ptr8, %while.cond.preheader ] + %3 = load i8, ptr %str.addr.0, align 1 + %tobool.not = icmp eq i8 %3, 0 + %incdec.ptr14 = getelementptr inbounds nuw i8, ptr %str.addr.0, i64 1 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: + tail call void @use(ptr noundef nonnull %str.addr.0) #3 + br label %return + +return: + ret void } -define i64 @invalid_strlen_idx_idiom(ptr %0) { -; CHECK-LABEL: define i64 @invalid_strlen_idx_idiom( -; CHECK-SAME: ptr [[TMP0:%.*]]) { -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[TMP3]], label %[[BB13:.*]], label %[[DOTPREHEADER:.*]] -; CHECK: [[_PREHEADER:.*:]] -; CHECK-NEXT: br label %[[BB4:.*]] -; CHECK: [[BB4]]: -; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP7:%.*]], %[[BB4]] ], [ 0, %[[DOTPREHEADER]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[TMP8:%.*]], %[[BB4]] ], [ [[TMP0]], %[[DOTPREHEADER]] ] -; CHECK-NEXT: [[TMP7]] = add nuw nsw i32 [[TMP5]], 1 -; CHECK-NEXT: [[TMP8]] = getelementptr inbounds i8, ptr [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB4]] -; CHECK: [[BB11]]: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], %[[BB4]] ] -; CHECK-NEXT: [[TMP12:%.*]] = zext nneg i32 [[DOTLCSSA]] to i64 -; CHECK-NEXT: br label %[[BB13]] -; CHECK: [[BB13]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ 0, [[TMP1:%.*]] ], [ [[TMP12]], %[[BB11]] ] -; CHECK-NEXT: ret i64 [[TMP14]] +; void valid_nested_idiom(const char** strs, int n) { +; for (int i = 0; i < n; ++i) { +; const char* s = strs[i]; +; int count = 0; +; while (*s) { +; ++s; +; ++count; +; } +; usei(count); +; } +; } +define void @valid_nested_idiom(ptr %strs, i32 %n) { +; CHECK-LABEL: define void @nested_idiom( +; CHECK-SAME: ptr [[STRS:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_END:.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[STRS]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT6:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT6]], label %[[WHILE_END]], label %[[WHILE_BODY_PREHEADER:.*]] +; CHECK: [[WHILE_BODY_PREHEADER]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 1 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[STRLEN]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[COUNT_08:%.*]] = phi i32 [ [[INC:%.*]], %[[WHILE_BODY]] ], [ 0, %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[S_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[TMP0]], %[[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[S_07]], i64 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[COUNT_08]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[WHILE_END]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP3]], %[[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: tail call void @usei(i32 [[COUNT_0_LCSSA]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] ; - %2 = load i8, ptr %0, align 1 - %3 = icmp eq i8 %2, 0 - br i1 %3, label %13, label %4 +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup -4: ; preds = %1, %4 - %5 = phi i32 [ %7, %4 ], [ 0, %1 ] - %6 = phi ptr [ %8, %4 ], [ %0, %1 ] - %7 = add nuw nsw i32 %5, 1 - %8 = getelementptr inbounds i8, ptr %6, i64 1 - %9 = load i8, ptr %8, align 1 - %10 = icmp eq i8 %9, 0 - br i1 %10, label %11, label %4 +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body -11: ; preds = %4 - %12 = zext nneg i32 %7 to i64 - br label %13 +for.cond.cleanup: + ret void -13: ; preds = %11, %1 - %14 = phi i64 [ 0, %1 ], [ %12, %11 ] - ret i64 %14 -} +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %while.end ] + %arrayidx = getelementptr inbounds ptr, ptr %strs, i64 %indvars.iv + %0 = load ptr, ptr %arrayidx, align 8 + %1 = load i8, ptr %0, align 1 + %tobool.not6 = icmp eq i8 %1, 0 + br i1 %tobool.not6, label %while.end, label %while.body +while.body: + %count.08 = phi i32 [ %inc, %while.body ], [ 0, %for.body ] + %s.07 = phi ptr [ %incdec.ptr, %while.body ], [ %0, %for.body ] + %incdec.ptr = getelementptr inbounds nuw i8, ptr %s.07, i64 1 + %inc = add nuw nsw i32 %count.08, 1 + %2 = load i8, ptr %incdec.ptr, align 1 + %tobool.not = icmp eq i8 %2, 0 + br i1 %tobool.not, label %while.end, label %while.body +while.end: + %count.0.lcssa = phi i32 [ 0, %for.body ], [ %inc, %while.body ] + tail call void @usei(i32 %count.0.lcssa) #2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} From 35a2fc63c674fb158c4fdfd183296397992fc27f Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 2 Feb 2025 01:00:48 -0500 Subject: [PATCH 08/15] Refactor strlen detection --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 279 ++++++++---------- 1 file changed, 126 insertions(+), 153 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 521a448ea5dfc..49f3de21e66b1 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1549,141 +1549,141 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, return nullptr; } -struct StrlenIdiom { - unsigned IdiomSize; - ConstantInt *StepSize; - const SCEV *LoadBaseEv; - Type *LoadType; -}; - -/// Trying to detect strlen idiom that increments a char pointer -/// with a single loop body bb. -static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, - const TargetLibraryInfo *TLI, - StrlenIdiom &Idiom) { - /* - outs() << "current loop:\n"; - CurLoop->print(outs()); - outs() << "\n"; - */ - - // Give up if the loop has multiple blocks, multiple backedges, or - // multiple exit blocks - if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 || - !CurLoop->getUniqueExitBlock()) - return false; +namespace { - // It should have a preheader and a branch instruction. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - if (!Preheader) - return false; +class StrlenVerifier { +public: + explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE, + const TargetLibraryInfo *TLI) + : CurLoop(CurLoop), SE(SE), TLI(TLI) {} + + bool isValidStrlenIdiom() { + // Give up if the loop has multiple blocks, multiple backedges, or + // multiple exit blocks + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 || + !CurLoop->getUniqueExitBlock()) + return false; - BranchInst *EntryBI = dyn_cast(Preheader->getTerminator()); - if (!EntryBI) - return false; + // It should have a preheader and a branch instruction. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + if (!Preheader) + return false; - // The loop exit must be conditioned on an icmp with 0 the null terminator. - // The icmp operand has to be a load on some SSA reg that increments - // by 1 in the loop. - BasicBlock *LoopBody = *CurLoop->block_begin(); + BranchInst *EntryBI = dyn_cast(Preheader->getTerminator()); + if (!EntryBI) + return false; - // Skip if the body is too big as it most likely is not a strlen idiom. - if (!LoopBody || LoopBody->size() >= 15) - return false; + // The loop exit must be conditioned on an icmp with 0 the null terminator. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + BasicBlock *LoopBody = *CurLoop->block_begin(); - BranchInst *LoopTerm = dyn_cast(LoopBody->getTerminator()); - Value *LoopCond = matchCondition(LoopTerm, LoopBody); - if (!LoopCond) - return false; + // Skip if the body is too big as it most likely is not a strlen idiom. + if (!LoopBody || LoopBody->size() >= 15) + return false; - auto *LoopLoad = dyn_cast(LoopCond); - if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) - return false; + BranchInst *LoopTerm = dyn_cast(LoopBody->getTerminator()); + Value *LoopCond = matchCondition(LoopTerm, LoopBody); + if (!LoopCond) + return false; - Type *OperandType = LoopLoad->getType(); - if (!OperandType || !OperandType->isIntegerTy()) - return false; + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) + return false; - // See if the pointer expression is an AddRec with constant step a of form - // ({n,+,a}) where a is the width of the char type. - auto *IncPtr = LoopLoad->getPointerOperand(); - const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; + OperandType = LoopLoad->getType(); + if (!OperandType || !OperandType->isIntegerTy()) + return false; - /* - outs() << "pointer load ev: "; - LoadEv->print(outs()); - outs() << "\n"; - */ + // See if the pointer expression is an AddRec with constant step a of form + // ({n,+,a}) where a is the width of the char type. + Value *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = + dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + LoadBaseEv = LoadEv->getStart(); - const SCEVConstant *Step = - dyn_cast(LoadEv->getStepRecurrence(*SE)); - if (!Step) - return false; + LLVM_DEBUG({ + dbgs() << "pointer load scev: "; + LoadEv->print(outs()); + dbgs() << "\n"; + }); - unsigned StepSize = 0; - ConstantInt *StepSizeCI = dyn_cast(Step->getValue()); - if (!StepSizeCI) - return false; - StepSize = StepSizeCI->getZExtValue(); + const SCEVConstant *Step = + dyn_cast(LoadEv->getStepRecurrence(*SE)); + if (!Step) + return false; - // Verify that StepSize is consistent with platform char width. - unsigned OpWidth = OperandType->getIntegerBitWidth(); - unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule()); - if (OpWidth != StepSize * 8) - return false; - if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32) - return false; - if (OpWidth >= 16) - if (OpWidth != WcharSize * 8) + unsigned StepSize = 0; + StepSizeCI = dyn_cast(Step->getValue()); + if (!StepSizeCI) return false; + StepSize = StepSizeCI->getZExtValue(); - // Scan every instruction in the loop to ensure there are no side effects. - for (auto &I : *LoopBody) - if (I.mayHaveSideEffects()) + // Verify that StepSize is consistent with platform char width. + OpWidth = OperandType->getIntegerBitWidth(); + unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule()); + if (OpWidth != StepSize * 8) return false; + if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32) + return false; + if (OpWidth >= 16) + if (OpWidth != WcharSize * 8) + return false; - auto *LoopExitBB = CurLoop->getExitBlock(); - if (!LoopExitBB) - return false; + // Scan every instruction in the loop to ensure there are no side effects. + for (auto &I : *LoopBody) + if (I.mayHaveSideEffects()) + return false; - for (PHINode &PN : LoopExitBB->phis()) { - if (!SE->isSCEVable(PN.getType())) + BasicBlock *LoopExitBB = CurLoop->getExitBlock(); + if (!LoopExitBB) return false; - const SCEV *Ev = SE->getSCEV(&PN); - /* - outs() << "loop exit block scev exprs: "; - PN.print(outs()); - if (Ev) - Ev->print(outs()); - outs() << "\n"; - */ - - if (!Ev) - return false; + for (PHINode &PN : LoopExitBB->phis()) { + if (!SE->isSCEVable(PN.getType())) + return false; - // Since we verified that the loop trip count will be a valid strlen idiom, - // we can expand all lcssa phi with {n,+,1} as (n + strlen) and use - // SCEVExpander materialize the loop output. - const SCEVAddRecExpr *AddRecEv = dyn_cast(Ev); - if (!AddRecEv || !AddRecEv->isAffine()) - return false; + const SCEV *Ev = SE->getSCEV(&PN); + if (!Ev) + return false; - // We only want RecAddExpr with recurrence step that are constant. This - // is good enough for all the idioms we want to recognize. Later we expand - // the recurrence as {base,+,a} -> (base + a * strlen) and materialize - if (!dyn_cast(AddRecEv->getStepRecurrence(*SE))) - return false; + LLVM_DEBUG({ + dbgs() << "loop exit phi scev: "; + Ev->print(dbgs()); + dbgs() << "\n"; + }); + + // Since we verified that the loop trip count will be a valid strlen + // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use + // SCEVExpander materialize the loop output. + const SCEVAddRecExpr *AddRecEv = dyn_cast(Ev); + if (!AddRecEv || !AddRecEv->isAffine()) + return false; + + // We only want RecAddExpr with recurrence step that are constant. This + // is good enough for all the idioms we want to recognize. Later we expand + // the recurrence as {base,+,a} -> (base + a * strlen) and materialize + if (!dyn_cast(AddRecEv->getStepRecurrence(*SE))) + return false; + } + + return true; } - Idiom.LoadBaseEv = LoadEv->getStart(); - Idiom.IdiomSize = OpWidth; - Idiom.StepSize = StepSizeCI; - Idiom.LoadType = OperandType; - return true; -} +public: + const Loop *CurLoop; + ScalarEvolution *SE; + const TargetLibraryInfo *TLI; + + unsigned OpWidth; + ConstantInt *StepSizeCI; + const SCEV *LoadBaseEv; + Type *OperandType; +}; + +} // namespace /// Recognizes a strlen idiom by checking for loops that increment /// a char pointer and then subtract with the base pointer. @@ -1707,21 +1707,13 @@ static bool detectStrLenIdiom(const Loop *CurLoop, ScalarEvolution *SE, /// /// Later the pointer subtraction will be folded by InstCombine bool LoopIdiomRecognize::recognizeAndInsertStrLen() { - /* - const auto *First = CurLoop->block_begin(); - if (First != CurLoop->block_end()) { - auto *F = (*First)->getParent(); - outs() << "\n\n\n\n\n========== NEW LOOP ============\n"; - F->print(outs()); - } - */ - - // TODO: check for disable options - StrlenIdiom Idiom; - if (!detectStrLenIdiom(CurLoop, SE, TLI, Idiom)) + if (DisableLIRP::All) return false; - // outs() << "idiom is good\n\n"; + StrlenVerifier Verifier(CurLoop, SE, TLI); + + if (!Verifier.isValidStrlenIdiom()) + return false; BasicBlock *Preheader = CurLoop->getLoopPreheader(); BasicBlock *LoopExitBB = CurLoop->getExitBlock(); @@ -1729,18 +1721,17 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), "scev"); Value *MaterialzedBase = Expander.expandCodeFor( - Idiom.LoadBaseEv, Idiom.LoadBaseEv->getType(), Builder.GetInsertPoint()); + Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(), + Builder.GetInsertPoint()); Value *StrLenFunc = nullptr; - switch (Idiom.IdiomSize) { - case 8: + if (Verifier.OpWidth == 8) { if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen)) return false; StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI); - break; - case 16: - case 32: - if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen)) + } else { + if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen) && + !DisableLIRP::Wcslen) return false; StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI); } @@ -1755,39 +1746,21 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { dyn_cast(AddRecEv->getStepRecurrence(*SE)); const SCEV *Base = AddRecEv->getStart(); - /* - outs() << "creating new mult scev: "; - Base->getType()->print(outs()); - outs() << " "; - Step->getType()->print(outs()); - outs() << " "; - StrlenEv->getType()->print(outs()); - outs() << "\n"; - */ - // It is safe to truncate to base since if base is narrower than size_t // the equivalent user code will have to truncate anyways. const SCEV *NewEv = SE->getAddExpr( Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend( StrlenEv, Base->getType()))); - /* - outs() << "new ev exprs: "; - PN.print(outs()); - if (NewEv) - NewEv->print(outs()); - outs() << "\n"; - */ - - Expander.clear(); Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(), Builder.GetInsertPoint()); + Expander.clear(); PN.replaceAllUsesWith(MaterializedPHI); Cleanup.push_back(&PN); } - // All LCSSA Loop Phi are dead, the left over loop body can be cleaned up by - // later passes + // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned + // up by later passes for (PHINode *PN : Cleanup) { RecursivelyDeleteDeadPHINode(PN); } From a4b7cd67dd1fe340a6275240d80a0c1fa1004887 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 2 Feb 2025 13:14:28 -0500 Subject: [PATCH 09/15] update tests --- llvm/test/Transforms/LoopIdiom/strlen.ll | 192 ++++++++++++++++++++- llvm/test/Transforms/LoopIdiom/wcslen16.ll | 72 +++++++- llvm/test/Transforms/LoopIdiom/wcslen32.ll | 74 +++++++- 3 files changed, 326 insertions(+), 12 deletions(-) diff --git a/llvm/test/Transforms/LoopIdiom/strlen.ll b/llvm/test/Transforms/LoopIdiom/strlen.ll index 0dc833ec0e35f..137a17f541cd4 100644 --- a/llvm/test/Transforms/LoopIdiom/strlen.ll +++ b/llvm/test/Transforms/LoopIdiom/strlen.ll @@ -345,7 +345,7 @@ return: ; } ; } define void @valid_nested_idiom(ptr %strs, i32 %n) { -; CHECK-LABEL: define void @nested_idiom( +; CHECK-LABEL: define void @valid_nested_idiom( ; CHECK-SAME: ptr [[STRS:%.*]], i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 @@ -422,3 +422,193 @@ while.end: %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } + +define i64 @invalid_strlen_has_side_effects(ptr %str) { +; CHECK-LABEL: define i64 @invalid_strlen_has_side_effects( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[INCDEC_PTR:%.*]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i8, ptr [[STR_ADDR_0]], align 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = phi ptr [ [[STR_ADDR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] +; +entry: + br label %while.cond + +while.cond: + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %0 = load volatile i8, ptr %str.addr.0, align 1 + %cmp.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 1 + br i1 %cmp.not, label %while.end, label %while.cond + +while.end: + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + ret i64 %sub.ptr.sub +} + + +define i8 @invalid_exit_phi_scev(ptr %str) { +; CHECK-LABEL: define i8 @invalid_exit_phi_scev( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[INCDEC_PTR:%.*]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR_ADDR_0]], align 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = phi ptr [ [[STR_ADDR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8 [ [[TMP0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: ret i8 [[DOTLCSSA]] +; +entry: + br label %while.cond + +while.cond: + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %0 = load i8, ptr %str.addr.0, align 1 + %cmp.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 1 + br i1 %cmp.not, label %while.end, label %while.cond + +while.end: + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + + ; %0.lcssa has invalid scev rec {%0} expected to be {%str,+,constant} + ret i8 %0 +} + + + +define i64 @invalid_branch_cond(ptr %str) { +; CHECK-LABEL: define i64 @invalid_branch_cond( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[INCDEC_PTR:%.*]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR_ADDR_0]], align 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[TMP0]], 10 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = phi ptr [ [[STR_ADDR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] +; +entry: + br label %while.cond + +while.cond: + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %0 = load i8, ptr %str.addr.0, align 1 + + ; We compare against '\n' instead of '\0' + %cmp.not = icmp eq i8 %0, 10 + + %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 1 + br i1 %cmp.not, label %while.end, label %while.cond + +while.end: + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + ret i64 %sub.ptr.sub +} + +define i64 @invalid_unknown_step_size(ptr %str, i64 %step) { +; CHECK-LABEL: define i64 @invalid_unknown_step_size( +; CHECK-SAME: ptr [[STR:%.*]], i64 [[STEP:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[INCDEC_PTR:%.*]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR_ADDR_0]], align 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[STR_ADDR_0]], i64 [[STEP]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = phi ptr [ [[STR_ADDR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] +; +entry: + br label %while.cond + +while.cond: + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %0 = load i8, ptr %str.addr.0, align 1 + %cmp.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 %step + br i1 %cmp.not, label %while.end, label %while.cond + +while.end: + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + ret i64 %sub.ptr.sub +} + +declare ptr @pure(ptr) #0; +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } + +define i64 @invalid_add_rec(ptr %str) { +; CHECK-LABEL: define i64 @invalid_add_rec( +; CHECK-SAME: ptr [[STR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[STR_ADDR_0:%.*]] = phi ptr [ [[STR]], %[[ENTRY]] ], [ [[INCDEC_PTR:%.*]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[INDIRECT:%.*]] = tail call ptr @pure(ptr [[STR_ADDR_0]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[INDIRECT]], align 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[STR_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = phi ptr [ [[STR_ADDR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]] +; +entry: + br label %while.cond + +while.cond: + %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ] + %indirect = tail call ptr @pure(ptr %str.addr.0) + %0 = load i8, ptr %indirect, align 1 + %cmp.not = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 1 + br i1 %cmp.not, label %while.end, label %while.cond + +while.end: + %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + ret i64 %sub.ptr.sub +} + diff --git a/llvm/test/Transforms/LoopIdiom/wcslen16.ll b/llvm/test/Transforms/LoopIdiom/wcslen16.ll index 6c140ddf90d4e..d3b0b8d208cd8 100644 --- a/llvm/test/Transforms/LoopIdiom/wcslen16.ll +++ b/llvm/test/Transforms/LoopIdiom/wcslen16.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s +; RUN: opt -passes='loop(loop-idiom),verify' < %s -S | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -15,14 +15,18 @@ define i64 @valid_strlen16(ptr %src) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] ; CHECK: [[WHILE_COND_PREHEADER]]: -; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 -1 +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 2 ; CHECK-NEXT: [[WCSLEN:%.*]] = call i64 @wcslen(ptr [[NEWGEP]]) -; CHECK-NEXT: [[END:%.*]] = getelementptr i16, ptr [[NEWGEP]], i64 [[WCSLEN]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[WCSLEN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 2 +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: br label %[[WHILE_COND:.*]] ; CHECK: [[WHILE_COND]]: -; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ poison, %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 poison, 0 -; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ [[CURR_0:%.*]], %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[CURR_0]] = getelementptr inbounds i8, ptr [[SRC_PN]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[CURR_0]], align 2 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64 ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64 @@ -61,6 +65,62 @@ return: ; preds = %entry, %lor.lhs.fal ret i64 %retval.0 } +define i64 @invalid_char_size(ptr %src) { +; CHECK-LABEL: define i64 @invalid_char_size( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[SRC]], null +; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[LOR_LHS_FALSE:.*]] +; CHECK: [[LOR_LHS_FALSE]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] +; CHECK: [[WHILE_COND_PREHEADER]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ [[CURR_0:%.*]], %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[CURR_0]] = getelementptr inbounds i8, ptr [[SRC_PN]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CURR_0]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[CURR_0_LCSSA:%.*]] = phi ptr [ [[CURR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[CURR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[SUB_PTR_DIV:%.*]] = ashr exact i64 [[SUB_PTR_SUB]], 2 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_DIV]], %[[WHILE_END]] ], [ 0, %[[LOR_LHS_FALSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %cmp = icmp eq ptr %src, null + br i1 %cmp, label %return, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i32, ptr %src, align 2 + %cmp1 = icmp eq i32 %0, 0 + br i1 %cmp1, label %return, label %while.cond + +while.cond: ; preds = %lor.lhs.false, %while.cond + %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %lor.lhs.false ] + %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 4 + %1 = load i32, ptr %curr.0, align 4 + %tobool.not = icmp eq i32 %1, 0 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %sub.ptr.lhs.cast = ptrtoint ptr %curr.0 to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %src to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2 + br label %return + +return: ; preds = %entry, %lor.lhs.false, %while.end + %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ] + ret i64 %retval.0 +} !llvm.module.flags = !{!0} !0 = !{i32 1, !"wchar_size", i32 2} diff --git a/llvm/test/Transforms/LoopIdiom/wcslen32.ll b/llvm/test/Transforms/LoopIdiom/wcslen32.ll index fad4c52078967..76936b537fad7 100644 --- a/llvm/test/Transforms/LoopIdiom/wcslen32.ll +++ b/llvm/test/Transforms/LoopIdiom/wcslen32.ll @@ -15,14 +15,18 @@ define i64 @valid_wcslen32(ptr %src) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] ; CHECK: [[WHILE_COND_PREHEADER]]: -; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 -3 +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 4 ; CHECK-NEXT: [[WCSLEN:%.*]] = call i64 @wcslen(ptr [[NEWGEP]]) -; CHECK-NEXT: [[END:%.*]] = getelementptr i32, ptr [[NEWGEP]], i64 [[WCSLEN]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[WCSLEN]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 4 +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: br label %[[WHILE_COND:.*]] ; CHECK: [[WHILE_COND]]: -; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ poison, %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 poison, 0 -; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ [[CURR_0:%.*]], %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[CURR_0]] = getelementptr inbounds i8, ptr [[SRC_PN]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CURR_0]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64 ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64 @@ -65,6 +69,66 @@ return: ; preds = %entry, %lor.lhs.fal ret i64 %retval.0 } +define i64 @invalid_char_size(ptr %src) { +; CHECK-LABEL: define i64 @invalid_char_size( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[SRC]], null +; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[LOR_LHS_FALSE:.*]] +; CHECK: [[LOR_LHS_FALSE]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]] +; CHECK: [[WHILE_COND_PREHEADER]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ [[CURR_0:%.*]], %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[CURR_0]] = getelementptr inbounds i8, ptr [[SRC_PN]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[CURR_0]], align 2 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_COND]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: [[CURR_0_LCSSA:%.*]] = phi ptr [ [[CURR_0]], %[[WHILE_COND]] ] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[CURR_0_LCSSA]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[SUB_PTR_DIV:%.*]] = ashr exact i64 [[SUB_PTR_SUB]], 1 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_DIV]], %[[WHILE_END]] ], [ 0, %[[LOR_LHS_FALSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %cmp = icmp eq ptr %src, null + br i1 %cmp, label %return, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i16, ptr %src, align 2 + %cmp1 = icmp eq i16 %0, 0 + br i1 %cmp1, label %return, label %while.cond.preheader + +while.cond.preheader: ; preds = %lor.lhs.false + br label %while.cond + +while.cond: ; preds = %while.cond.preheader, %while.cond + %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %while.cond.preheader ] + %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 2 + %1 = load i16, ptr %curr.0, align 2 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %curr.0.lcssa = phi ptr [ %curr.0, %while.cond ] + %sub.ptr.lhs.cast = ptrtoint ptr %curr.0.lcssa to i64 + %sub.ptr.rhs.cast = ptrtoint ptr %src to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 1 + br label %return + +return: ; preds = %entry, %lor.lhs.false, %while.end + %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ] + ret i64 %retval.0 +} !llvm.module.flags = !{!0} !0 = !{i32 1, !"wchar_size", i32 4} From 05e21242f55f47213a116f19e40203e764ccfc61 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 2 Feb 2025 15:11:33 -0500 Subject: [PATCH 10/15] improve comments --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 66 +++++++++++-------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 49f3de21e66b1..1e72dc9850474 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1515,6 +1515,16 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { recognizeShiftUntilLessThan() || recognizeAndInsertStrLen(); } +/// Check if a Value is either a nullptr or a constant int zero +static bool isZeroConstant(const Value *Val) { + if (isa(Val)) + return true; + const ConstantInt *CmpZero = dyn_cast(Val); + if (!CmpZero || !CmpZero->isZero()) + return false; + return true; +} + /// Check if the given conditional branch is based on the comparison between /// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is /// true), the control yields to the loop entry. If the branch matches the @@ -1530,11 +1540,8 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, if (!Cond) return nullptr; - if (!isa(Cond->getOperand(1))) { - ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); - if (!CmpZero || !CmpZero->isZero()) - return nullptr; - } + if (!isZeroConstant(Cond->getOperand(1))) + return nullptr; BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -1587,7 +1594,7 @@ class StrlenVerifier { if (!LoopCond) return false; - auto *LoopLoad = dyn_cast(LoopCond); + LoadInst *LoopLoad = dyn_cast(LoopCond); if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) return false; @@ -1633,7 +1640,7 @@ class StrlenVerifier { return false; // Scan every instruction in the loop to ensure there are no side effects. - for (auto &I : *LoopBody) + for (Instruction &I : *LoopBody) if (I.mayHaveSideEffects()) return false; @@ -1685,27 +1692,30 @@ class StrlenVerifier { } // namespace -/// Recognizes a strlen idiom by checking for loops that increment -/// a char pointer and then subtract with the base pointer. +/// The Strlen Idiom we are trying to detect has the following structure /// -/// If detected, transforms the relevant code to a strlen function -/// call, and returns true; otherwise, returns false. +/// preheader: +/// ... +/// br label %body, ... /// -/// The core idiom we are trying to detect is: -/// \code -/// start = str; -/// do { -/// str++; -/// } while(*str != '\0'); -/// \endcode +/// body: +/// ... ; %0 is incremented by a gep +/// %1 = load i8, ptr %0, align 1 +/// %2 = icmp eq i8 %1, 0 +/// br i1 %2, label %exit, label %body /// -/// The transformed output is similar to below c-code: -/// \code -/// str = start + strlen(start) -/// len = str - start -/// \endcode +/// exit: +/// %lcssa = phi [%0, %body], ... +/// +/// We expect the strlen idiom to have a load of a character type that +/// is compared against '\0', and such load pointer operand must have scev +/// expression of the form {%str,+,c} where c is a ConstantInt of the +/// appropiate character width for the idiom, and %str is the base of the string +/// And, that all lcssa phis have the form {...,+,n} where n is a constant, /// -/// Later the pointer subtraction will be folded by InstCombine +/// When transforming the output of the strlen idiom, the lccsa phi are +/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen) +/// and all subsequent uses are replaced. bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (DisableLIRP::All) return false; @@ -1740,6 +1750,10 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { const SCEV *StrlenEv = SE->getSCEV(StrLenFunc); SmallVector Cleanup; for (PHINode &PN : LoopExitBB->phis()) { + // We can now materialize the loop output as all phi have scev {base,+,a}. + // We expand the phi as: + // %strlen = call i64 @strlen(%str) + // %phi.new = base expression + step * %strlen const SCEV *Ev = SE->getSCEV(&PN); const SCEVAddRecExpr *AddRecEv = dyn_cast(Ev); const SCEVConstant *Step = @@ -1759,7 +1773,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { Cleanup.push_back(&PN); } - // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned + // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned // up by later passes for (PHINode *PN : Cleanup) { RecursivelyDeleteDeadPHINode(PN); @@ -1771,7 +1785,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", CurLoop->getStartLoc(), Preheader) - << "Transformed strlen loop idiom"; + << "Transformed " << StrLenFunc->getName() << " loop idiom"; }); return true; From e6faac5390b351813b27cd373825d9003911ff88 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 2 Feb 2025 15:20:40 -0500 Subject: [PATCH 11/15] revert formatting --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 1e72dc9850474..f1f7a2571f08d 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -252,7 +252,7 @@ class LoopIdiomRecognize { bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX, Instruction *DefX, PHINode *CntPhi, Instruction *CntInst); - bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz + bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz bool recognizeShiftUntilLessThan(); void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, @@ -620,8 +620,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, const SCEVAddRecExpr *FirstStoreEv = cast(SE->getSCEV(FirstStorePtr)); APInt FirstStride = getStoreStride(FirstStoreEv); - unsigned FirstStoreSize = - DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); + unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); // See if we can optimize just this store in isolation. if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { @@ -1112,14 +1111,13 @@ bool LoopIdiomRecognize::processLoopStridedStore( BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); } else { - assert(isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); + assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; StringRef FuncName = "memset_pattern16"; - FunctionCallee MSP = - getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, - Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); + FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, + Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); // Otherwise we should form a memset_pattern16. PatternValue is known to be @@ -1161,7 +1159,8 @@ bool LoopIdiomRecognize::processLoopStridedStore( R << "Transformed loop-strided store in " << ore::NV("Function", TheStore->getFunction()) << " function into a call to " - << ore::NV("NewFunction", NewCall->getCalledFunction()) << "() intrinsic"; + << ore::NV("NewFunction", NewCall->getCalledFunction()) + << "() intrinsic"; if (!Stores.empty()) R << ore::setExtraArgs(); for (auto *I : Stores) { @@ -1467,7 +1466,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( << ore::NV("NewFunction", NewCall->getCalledFunction()) << "() intrinsic from " << ore::NV("Inst", InstRemark) << " instruction in " << ore::NV("Function", TheStore->getFunction()) - << " function" << ore::setExtraArgs() + << " function" + << ore::setExtraArgs() << ore::NV("FromBlock", TheStore->getParent()->getName()) << ore::NV("ToBlock", Preheader->getName()); }); @@ -1998,7 +1998,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, ConstantInt *Dec = dyn_cast(SubOneOp->getOperand(1)); if (!Dec || !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) || - (SubOneOp->getOpcode() == Instruction::Add && Dec->isMinusOne()))) { + (SubOneOp->getOpcode() == Instruction::Add && + Dec->isMinusOne()))) { return false; } } @@ -2109,8 +2110,8 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1" if (!DefX || !DefX->isShift()) return false; - IntrinID = - DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz : Intrinsic::ctlz; + IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz : + Intrinsic::ctlz; ConstantInt *Shft = dyn_cast(DefX->getOperand(1)); if (!Shft || !Shft->isOne()) return false; @@ -2613,8 +2614,9 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, TcPhi->insertBefore(Body->begin()); Builder.SetInsertPoint(LbCond); - Instruction *TcDec = cast(Builder.CreateSub( - TcPhi, ConstantInt::get(Ty, 1), "tcdec", false, true)); + Instruction *TcDec = cast( + Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1), + "tcdec", false, true)); TcPhi->addIncoming(TripCnt, PreHead); TcPhi->addIncoming(TcDec, Body); @@ -3244,8 +3246,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { // intrinsic we'll use are not cheap. Note that we are okay with *just* // making the loop countable, even if nothing else changes. IntrinsicCostAttributes Attrs( - IntrID, Ty, - {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()}); + IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()}); InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); if (Cost > TargetTransformInfo::TCC_Basic) { LLVM_DEBUG(dbgs() << DEBUG_TYPE From 40ee5fab593b05e1930fecc0f33650830cbf0828 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 2 Feb 2025 15:25:21 -0500 Subject: [PATCH 12/15] remove DEBUG_TYPE from disable flags --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f1f7a2571f08d..9861c643d3c8b 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -129,7 +129,7 @@ static cl::opt bool DisableLIRP::Strlen; static cl::opt - DisableLIRPStrlen("disable-" DEBUG_TYPE "-strlen", + DisableLIRPStrlen("disable-loop-idiom-strlen", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to strlen."), cl::location(DisableLIRP::Strlen), cl::init(false), @@ -137,7 +137,7 @@ static cl::opt bool DisableLIRP::Wcslen; static cl::opt - DisableLIRPWcslen("disable-" DEBUG_TYPE "-wcslen", + DisableLIRPWcslen("disable-loop-idiom-wcslen", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to wcslen."), cl::location(DisableLIRP::Wcslen), cl::init(false), From 4e8851b7cb2387dcb8f748d71206179d7536b7d2 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Sun, 2 Feb 2025 19:00:02 -0500 Subject: [PATCH 13/15] add more documentation --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 9861c643d3c8b..580cc9d71d9fd 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1715,7 +1715,42 @@ class StrlenVerifier { /// /// When transforming the output of the strlen idiom, the lccsa phi are /// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen) -/// and all subsequent uses are replaced. +/// and all subsequent uses are replaced. For example, +/// +/// \code{.c} +/// const char* base = str; +/// while (*str != '\0') +/// ++str; +/// size_t result = str - base; +/// \endcode +/// +/// will be transformed as as follow: The idiom will be replaced by a strlen +/// computation to compute the address of the null terminator of the string. +/// +/// \code{.c} +/// const char* base = str; +/// const char* end = base + strlen(str); +/// size_t result = end - base; +/// \endcode +/// +/// In the case we index by an induction variable, as long as the induction +/// variable has a constant int increment, we can replace all such indvars +/// with the closed form computation of strlen +/// +/// \code{.c} +/// size_t i = 0; +/// while (str[i] != '\0') +/// ++i; +/// size_t result = i; +/// \endcode +/// +/// Will be replaced by +/// +/// \code{.c} +/// size_t i = 0 + strlen(str); +/// size_t result = i; +/// \endcode +/// bool LoopIdiomRecognize::recognizeAndInsertStrLen() { if (DisableLIRP::All) return false; From dee251862f9a4a4dcc88634a57e84629a4f98042 Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Wed, 5 Feb 2025 11:26:37 -0500 Subject: [PATCH 14/15] address review comments Co-authored-by: Michael Kruse --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 580cc9d71d9fd..3b9200096017d 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -97,7 +97,7 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores"); -STATISTIC(NumStrLen, "Number of strlen's formed from loop loads"); +STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads"); STATISTIC( NumShiftUntilBitTest, "Number of uncountable loops recognized as 'shift until bitttest' idiom"); @@ -1669,9 +1669,9 @@ class StrlenVerifier { if (!AddRecEv || !AddRecEv->isAffine()) return false; - // We only want RecAddExpr with recurrence step that are constant. This - // is good enough for all the idioms we want to recognize. Later we expand - // the recurrence as {base,+,a} -> (base + a * strlen) and materialize + // We only want RecAddExpr with recurrence step that is constant. This + // is good enough for all the idioms we want to recognize. Later we expand and materialize + // the recurrence as {base,+,a} -> (base + a * strlen) if (!dyn_cast(AddRecEv->getStepRecurrence(*SE))) return false; } @@ -1724,7 +1724,7 @@ class StrlenVerifier { /// size_t result = str - base; /// \endcode /// -/// will be transformed as as follow: The idiom will be replaced by a strlen +/// will be transformed as follows: The idiom will be replaced by a strlen /// computation to compute the address of the null terminator of the string. /// /// \code{.c} @@ -1764,7 +1764,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { BasicBlock *LoopExitBB = CurLoop->getExitBlock(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), "scev"); + SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), "strlen_idiom"); Value *MaterialzedBase = Expander.expandCodeFor( Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(), Builder.GetInsertPoint()); @@ -1810,9 +1810,8 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned // up by later passes - for (PHINode *PN : Cleanup) { + for (PHINode *PN : Cleanup) RecursivelyDeleteDeadPHINode(PN); - } SE->forgetLoop(CurLoop); ++NumStrLen; From 528daead9940800f8a8c5b1850c2792efa4dc7cd Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Mon, 10 Feb 2025 12:16:24 -0500 Subject: [PATCH 15/15] address fmt --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 23 +++++++++++-------- llvm/test/Transforms/LoopIdiom/wcslen16.ll | 2 +- llvm/test/Transforms/LoopIdiom/wcslen32.ll | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3b9200096017d..22d1165b37b83 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -135,13 +135,17 @@ static cl::opt cl::location(DisableLIRP::Strlen), cl::init(false), cl::ReallyHidden); +/// Some target libraries have a significant call overhead for `wcslen`, +/// which can degrade performance when the input string is not long enough +/// to justify the cost. To avoid unnecessary performance penalties, +/// we disable it by default. bool DisableLIRP::Wcslen; static cl::opt - DisableLIRPWcslen("disable-loop-idiom-wcslen", - cl::desc("Proceed with loop idiom recognize pass, but do " - "not convert loop(s) to wcslen."), - cl::location(DisableLIRP::Wcslen), cl::init(false), - cl::ReallyHidden); + EnableLIRPWcslen("enable-loop-idiom-wcslen", + cl::desc("Proceed with loop idiom recognize pass, " + "enable conversion of loop(s) to wcslen."), + cl::location(DisableLIRP::Wcslen), cl::init(true), + cl::ReallyHidden); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -1670,8 +1674,8 @@ class StrlenVerifier { return false; // We only want RecAddExpr with recurrence step that is constant. This - // is good enough for all the idioms we want to recognize. Later we expand and materialize - // the recurrence as {base,+,a} -> (base + a * strlen) + // is good enough for all the idioms we want to recognize. Later we expand + // and materialize the recurrence as {base,+,a} -> (base + a * strlen) if (!dyn_cast(AddRecEv->getStepRecurrence(*SE))) return false; } @@ -1764,7 +1768,8 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { BasicBlock *LoopExitBB = CurLoop->getExitBlock(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), "strlen_idiom"); + SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), + "strlen_idiom"); Value *MaterialzedBase = Expander.expandCodeFor( Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(), Builder.GetInsertPoint()); @@ -1810,7 +1815,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() { // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned // up by later passes - for (PHINode *PN : Cleanup) + for (PHINode *PN : Cleanup) RecursivelyDeleteDeadPHINode(PN); SE->forgetLoop(CurLoop); diff --git a/llvm/test/Transforms/LoopIdiom/wcslen16.ll b/llvm/test/Transforms/LoopIdiom/wcslen16.ll index d3b0b8d208cd8..4f53442a9a0a4 100644 --- a/llvm/test/Transforms/LoopIdiom/wcslen16.ll +++ b/llvm/test/Transforms/LoopIdiom/wcslen16.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='loop(loop-idiom),verify' < %s -S | FileCheck %s +; RUN: opt -passes='loop(loop-idiom),verify' -enable-loop-idiom-wcslen < %s -S | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/wcslen32.ll b/llvm/test/Transforms/LoopIdiom/wcslen32.ll index 76936b537fad7..d083b534d660a 100644 --- a/llvm/test/Transforms/LoopIdiom/wcslen32.ll +++ b/llvm/test/Transforms/LoopIdiom/wcslen32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s +; RUN: opt -passes='loop(loop-idiom),verify' -enable-loop-idiom-wcslen < %s -S | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"