diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 93b0a8d3cef04..ae86f353f16d9 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -336,7 +336,11 @@ LLVM_ABI std::optional getInliningCostFeatures( ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); -/// Minimal filter to detect invalid constructs for inlining. +/// Check if it is mechanically possible to inline the function \p Callee, based +/// on the contents of the function. +/// +/// See also \p CanInlineCallSite as an additional precondition necessary to +/// perform a valid inline in a particular use context. LLVM_ABI InlineResult isInlineViable(Function &Callee); // This pass is used to annotate instructions during the inline process for diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 6b56230a6e1d4..cfa06a5be79fd 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -279,6 +279,9 @@ class InlineFunctionInfo { /// `InlinedCalls` above is used. SmallVector InlinedCallSites; + Value *ConvergenceControlToken = nullptr; + Instruction *CallSiteEHPad = nullptr; + /// Update profile for callee as well as cloned version. We need to do this /// for regular inlining, but not for inlining from sample profile loader. bool UpdateProfile; @@ -287,9 +290,36 @@ class InlineFunctionInfo { StaticAllocas.clear(); InlinedCalls.clear(); InlinedCallSites.clear(); + ConvergenceControlToken = nullptr; + CallSiteEHPad = nullptr; } }; +/// Check if it is legal to perform inlining of the function called by \p CB +/// into the caller at this particular use, and sets fields in \p IFI. +/// +/// This does not consider whether it is possible for the function callee itself +/// to be inlined; for that see isInlineViable. +LLVM_ABI InlineResult CanInlineCallSite(const CallBase &CB, + InlineFunctionInfo &IFI); + +/// This should generally not be used, use InlineFunction instead. +/// +/// Perform mechanical inlining of \p CB into the caller. +/// +/// This does not perform any legality or profitability checks for the +/// inlining. This assumes that CanInlineCallSite was already called, populated +/// \p IFI, and returned InlineResult::success. +/// +/// Also assumes that isInlineViable returned InlineResult::success for the +/// called function. +LLVM_ABI void InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, + bool MergeAttributes = false, + AAResults *CalleeAAR = nullptr, + bool InsertLifetime = true, + Function *ForwardVarArgsTo = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); + /// This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -328,7 +358,8 @@ LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes = false, AAResults *CalleeAAR = nullptr, bool InsertLifetime = true, - Function *ForwardVarArgsTo = nullptr); + Function *ForwardVarArgsTo = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); /// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index ed3dca2f7c307..59a47a9bb0d3e 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2361,15 +2361,13 @@ remapIndices(Function &Caller, BasicBlock *StartBB, // Updating the contextual profile after an inlining means, at a high level, // copying over the data of the callee, **intentionally without any value // scaling**, and copying over the callees of the inlined callee. -llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, - PGOContextualProfile &CtxProf, - bool MergeAttributes, - AAResults *CalleeAAR, - bool InsertLifetime, - Function *ForwardVarArgsTo) { +llvm::InlineResult llvm::InlineFunction( + CallBase &CB, InlineFunctionInfo &IFI, PGOContextualProfile &CtxProf, + bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime, + Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) { if (!CtxProf.isInSpecializedModule()) return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, - ForwardVarArgsTo); + ForwardVarArgsTo, ORE); auto &Caller = *CB.getCaller(); auto &Callee = *CB.getCalledFunction(); @@ -2387,7 +2385,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, const auto NumCalleeCallsites = CtxProf.getNumCallsites(Callee); auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, - ForwardVarArgsTo); + ForwardVarArgsTo, ORE); if (!Ret.isSuccess()) return Ret; @@ -2457,20 +2455,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, return Ret; } -/// This function inlines the called function into the basic block of the -/// caller. This returns false if it is not possible to inline this call. -/// The program is still in a well defined state if this occurs though. -/// -/// Note that this only does one level of inlining. For example, if the -/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -/// exists in the instruction stream. Similarly this will inline a recursive -/// function by one level. -llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, - bool MergeAttributes, - AAResults *CalleeAAR, - bool InsertLifetime, - Function *ForwardVarArgsTo, - OptimizationRemarkEmitter *ORE) { +llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB, + InlineFunctionInfo &IFI) { assert(CB.getParent() && CB.getFunction() && "Instruction not in function!"); // FIXME: we don't inline callbr yet. @@ -2487,7 +2473,6 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // The inliner does not know how to inline through calls with operand bundles // in general ... - Value *ConvergenceControlToken = nullptr; if (CB.hasOperandBundles()) { for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { auto OBUse = CB.getOperandBundleAt(i); @@ -2503,7 +2488,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, if (Tag == LLVMContext::OB_kcfi) continue; if (Tag == LLVMContext::OB_convergencectrl) { - ConvergenceControlToken = OBUse.Inputs[0].get(); + IFI.ConvergenceControlToken = OBUse.Inputs[0].get(); continue; } @@ -2521,28 +2506,22 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // fully implements convergence control tokens, there is no mixing of // controlled and uncontrolled convergent operations in the whole program. if (CB.isConvergent()) { - if (!ConvergenceControlToken && + if (!IFI.ConvergenceControlToken && getConvergenceEntry(CalledFunc->getEntryBlock())) { return InlineResult::failure( "convergent call needs convergencectrl operand"); } } - // If the call to the callee cannot throw, set the 'nounwind' flag on any - // calls that we inline. - bool MarkNoUnwind = CB.doesNotThrow(); - - BasicBlock *OrigBB = CB.getParent(); - Function *Caller = OrigBB->getParent(); + const BasicBlock *OrigBB = CB.getParent(); + const Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { - if (!Caller->hasGC()) - Caller->setGC(CalledFunc->getGC()); - else if (CalledFunc->getGC() != Caller->getGC()) + if (Caller->hasGC() && CalledFunc->getGC() != Caller->getGC()) return InlineResult::failure("incompatible GC"); } @@ -2560,34 +2539,31 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, ? Caller->getPersonalityFn()->stripPointerCasts() : nullptr; if (CalledPersonality) { - if (!CallerPersonality) - Caller->setPersonalityFn(CalledPersonality); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. - else if (CalledPersonality != CallerPersonality) + if (CallerPersonality && CalledPersonality != CallerPersonality) return InlineResult::failure("incompatible personality"); } // We need to figure out which funclet the callsite was in so that we may // properly nest the callee. - Instruction *CallSiteEHPad = nullptr; if (CallerPersonality) { EHPersonality Personality = classifyEHPersonality(CallerPersonality); if (isScopedEHPersonality(Personality)) { std::optional ParentFunclet = CB.getOperandBundle(LLVMContext::OB_funclet); if (ParentFunclet) - CallSiteEHPad = cast(ParentFunclet->Inputs.front()); + IFI.CallSiteEHPad = cast(ParentFunclet->Inputs.front()); // OK, the inlining site is legal. What about the target function? - if (CallSiteEHPad) { + if (IFI.CallSiteEHPad) { if (Personality == EHPersonality::MSVC_CXX) { // The MSVC personality cannot tolerate catches getting inlined into // cleanup funclets. - if (isa(CallSiteEHPad)) { + if (isa(IFI.CallSiteEHPad)) { // Ok, the call site is within a cleanuppad. Let's check the callee // for catchpads. for (const BasicBlock &CalledBB : *CalledFunc) { @@ -2607,13 +2583,34 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } } + return InlineResult::success(); +} + +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. +/// +/// Note that this only does one level of inlining. For example, if the +/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +/// exists in the instruction stream. Similarly this will inline a recursive +/// function by one level. +void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI, + bool MergeAttributes, AAResults *CalleeAAR, + bool InsertLifetime, Function *ForwardVarArgsTo, + OptimizationRemarkEmitter *ORE) { + BasicBlock *OrigBB = CB.getParent(); + Function *Caller = OrigBB->getParent(); + Function *CalledFunc = CB.getCalledFunction(); + assert(CalledFunc && !CalledFunc->isDeclaration() && + "CanInlineCallSite should have verified direct call to definition"); + // Determine if we are dealing with a call in an EHPad which does not unwind // to caller. bool EHPadForCallUnwindsLocally = false; - if (CallSiteEHPad && isa(CB)) { + if (IFI.CallSiteEHPad && isa(CB)) { UnwindDestMemoTy FuncletUnwindMap; Value *CallSiteUnwindDestToken = - getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); + getUnwindDestToken(IFI.CallSiteEHPad, FuncletUnwindMap); EHPadForCallUnwindsLocally = CallSiteUnwindDestToken && @@ -2630,6 +2627,30 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; + // GC poses two hazards to inlining, which only occur when the callee has GC: + // 1. If the caller has no GC, then the callee's GC must be propagated to the + // caller. + // 2. If the caller has a differing GC, it is invalid to inline. + if (CalledFunc->hasGC()) { + if (!Caller->hasGC()) + Caller->setGC(CalledFunc->getGC()); + else { + assert(CalledFunc->getGC() == Caller->getGC() && + "CanInlineCallSite should have verified compatible GCs"); + } + } + + if (CalledFunc->hasPersonalityFn()) { + Constant *CalledPersonality = + CalledFunc->getPersonalityFn()->stripPointerCasts(); + if (!Caller->hasPersonalityFn()) { + Caller->setPersonalityFn(CalledPersonality); + } else + assert(Caller->getPersonalityFn()->stripPointerCasts() == + CalledPersonality && + "CanInlineCallSite should have verified compatible personality"); + } + { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; struct ByValInit { @@ -2819,10 +2840,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, IFI.GetAssumptionCache(*Caller).registerAssumption(II); } - if (ConvergenceControlToken) { + if (IFI.ConvergenceControlToken) { IntrinsicInst *IntrinsicCall = getConvergenceEntry(*FirstNewBlock); if (IntrinsicCall) { - IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); + IntrinsicCall->replaceAllUsesWith(IFI.ConvergenceControlToken); IntrinsicCall->eraseFromParent(); } } @@ -2869,6 +2890,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } } + // If the call to the callee cannot throw, set the 'nounwind' flag on any + // calls that we inline. + bool MarkNoUnwind = CB.doesNotThrow(); + SmallVector VarArgsToForward; SmallVector VarArgsAttrs; for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); @@ -3055,12 +3080,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Update the lexical scopes of the new funclets and callsites. // Anything that had 'none' as its parent is now nested inside the callsite's // EHPad. - if (CallSiteEHPad) { + if (IFI.CallSiteEHPad) { for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) { // Add bundle operands to inlined call sites. - PropagateOperandBundles(BB, CallSiteEHPad); + PropagateOperandBundles(BB, IFI.CallSiteEHPad); // It is problematic if the inlinee has a cleanupret which unwinds to // caller and we inline it into a call site which doesn't unwind but into @@ -3076,11 +3101,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, if (auto *CatchSwitch = dyn_cast(I)) { if (isa(CatchSwitch->getParentPad())) - CatchSwitch->setParentPad(CallSiteEHPad); + CatchSwitch->setParentPad(IFI.CallSiteEHPad); } else { auto *FPI = cast(I); if (isa(FPI->getParentPad())) - FPI->setParentPad(CallSiteEHPad); + FPI->setParentPad(IFI.CallSiteEHPad); } } } @@ -3236,7 +3261,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); // We are now done with the inlining. - return InlineResult::success(); + return; } // Otherwise, we have the normal case, of more than one block to inline or @@ -3404,6 +3429,19 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, if (MergeAttributes) AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); +} - return InlineResult::success(); +llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, + bool MergeAttributes, + AAResults *CalleeAAR, + bool InsertLifetime, + Function *ForwardVarArgsTo, + OptimizationRemarkEmitter *ORE) { + llvm::InlineResult Result = CanInlineCallSite(CB, IFI); + if (Result.isSuccess()) { + InlineFunctionImpl(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, + ForwardVarArgsTo, ORE); + } + + return Result; } diff --git a/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll b/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll new file mode 100644 index 0000000000000..fc25ca45824dc --- /dev/null +++ b/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll @@ -0,0 +1,95 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=inline-call-sites -reduce-callsite-inline-threshold=3 --test FileCheck --test-arg --check-prefix=CHECK --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,CHECK %s < %t + +declare void @extern_b() +declare void @extern_a() + +; RESULT: @gv_init = global ptr @no_inline_noncall_user +@gv_init = global ptr @no_inline_noncall_user + + +; CHECK-LABEL: define void @no_inline_noncall_user( +define void @no_inline_noncall_user() { + call void @extern_a() + call void @extern_a() + call void @extern_a() + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @noncall_user_call() { +; RESULT-NEXT: call void @no_inline_noncall_user() +; RESULT-NEXT: ret void +define void @noncall_user_call() { + call void @no_inline_noncall_user() + ret void +} + +; RESULT-LABEL: define void @big_callee_small_caller_callee() { +define void @big_callee_small_caller_callee() { + call void @extern_a() + call void @extern_a() + call void @extern_a() + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @big_callee_small_caller_caller() { +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: ret void +define void @big_callee_small_caller_caller() { + call void @extern_b() + call void @big_callee_small_caller_callee() + ret void +} + +; RESULT-LABEL: define void @small_callee_big_caller_callee() { +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: ret void +define void @small_callee_big_caller_callee() { + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @small_callee_big_caller_caller() { +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_a() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: ret void +define void @small_callee_big_caller_caller() { + call void @extern_b() + call void @small_callee_big_caller_callee() + call void @extern_b() + call void @extern_b() + ret void +} + +; RESULT-LABEL: define void @big_callee_big_caller_callee() { +define void @big_callee_big_caller_callee() { + call void @extern_a() + call void @extern_a() + call void @extern_a() + call void @extern_a() + ret void +} + +; RESULT-LABEL: define void @big_callee_big_caller_caller() { +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @big_callee_big_caller_callee() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: call void @extern_b() +; RESULT-NEXT: ret void +define void @big_callee_big_caller_caller() { + call void @extern_b() + call void @big_callee_big_caller_callee() + call void @extern_b() + call void @extern_b() + call void @extern_b() + ret void +} diff --git a/llvm/test/tools/llvm-reduce/inline-call-sites.ll b/llvm/test/tools/llvm-reduce/inline-call-sites.ll new file mode 100644 index 0000000000000..0e7cf79df784c --- /dev/null +++ b/llvm/test/tools/llvm-reduce/inline-call-sites.ll @@ -0,0 +1,765 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=inline-call-sites -reduce-callsite-inline-threshold=-1 --test FileCheck --test-arg --check-prefixes=CHECK,INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,CHECK %s < %t + +; RESULT: @gv = global [2 x ptr] [ptr @only_gv_user, ptr @simple_callee] +@gv = global [2 x ptr] [ptr @only_gv_user, ptr @simple_callee] + +; RESULT: @indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@callee_with_indirectbr, %L1), ptr blockaddress(@callee_with_indirectbr, %L2), ptr null], align 8 +@indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@callee_with_indirectbr, %L1), ptr blockaddress(@callee_with_indirectbr, %L2), ptr null], align 8 + + +; CHECK-LABEL: define void @simple_callee( +; RESULT-NEXT: store i32 123, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @simple_callee(ptr %arg) { + store i32 123, ptr %arg + ret void +} + +; CHECK-LABEL: define void @simple_caller( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @simple_caller(ptr %outer.arg) { + call void @simple_callee(ptr %outer.arg) + ret void +} + +; CHECK-LABEL: define void @multi_simple_caller( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: store i32 123, ptr null, align 4 +; RESULT-NEXT: ret void +define void @multi_simple_caller(ptr %outer.arg) { + call void @simple_callee(ptr %outer.arg) + call void @simple_callee(ptr %outer.arg) + call void @simple_callee(ptr null) + ret void +} + +; CHECK-LABEL: define void @only_gv_user( +; RESULT-NEXT: store i32 666, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @only_gv_user(ptr %arg) { + store i32 666, ptr %arg + ret void +} + +; CHECK-LABEL: define void @recursive( +; RESULT-NEXT: call void @recursive(ptr %arg) +; RESULT-NEXT: ret void +define void @recursive(ptr %arg) { + call void @recursive(ptr %arg) + ret void +} + +; CHECK-LABEL: define void @recursive_with_wrong_callsite_type( +; RESULT-NEXT: call void @recursive_with_wrong_callsite_type(ptr %arg, i32 2) +; RESULT-NEXT: ret void +define void @recursive_with_wrong_callsite_type(ptr %arg) { + call void @recursive_with_wrong_callsite_type(ptr %arg, i32 2) + ret void +} + +; CHECK-LABEL: define void @non_callee_use( +; RESULT-NEXT: store i32 567, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @non_callee_use(ptr %arg) { + store i32 567, ptr %arg + ret void +} + +declare void @extern_ptr_use(ptr) + +; CHECK-LABEL: define void @non_callee_user( +; RESULT-NEXT: call void @extern_ptr_use(ptr @non_callee_use) +; RESULT-NEXT: ret void +define void @non_callee_user() { + call void @extern_ptr_use(ptr @non_callee_use) + ret void +} + +; CHECK-LABEL: define void @non_call_inst_use( +define void @non_call_inst_use(ptr %arg) { + store i32 999, ptr %arg + ret void +} + +; CHECK-LABEL: define void @non_call_inst_user( +; RESULT-NEXT: store ptr @non_call_inst_use, ptr %arg, align 8 +; RESULT-NEXT: ret void +define void @non_call_inst_user(ptr %arg) { + store ptr @non_call_inst_use, ptr %arg + ret void +} + +; CHECK-LABEL: define i32 @used_wrong_call_type( +; RESULT-NEXT: store i32 123, ptr %arg, align 4 +; RESULT-NEXT: ret i32 8 +define i32 @used_wrong_call_type(ptr %arg) { + store i32 123, ptr %arg + ret i32 8 +} + +; Inlining doesn't support the UB cases +; CHECK-LABEL: define void @use_wrong_call_type( +; RESULT-NEXT: call void @used_wrong_call_type(ptr %outer.arg) +; RESULT-NEXT: ret void +define void @use_wrong_call_type(ptr %outer.arg) { + call void @used_wrong_call_type(ptr %outer.arg) + ret void +} + +; INTERESTING-LABEL: define void @incompatible_gc_callee( + +; RESULT-LABEL: define void @incompatible_gc_callee(ptr %arg) gc "gc0" { +; RESULT-NEXT: store i32 10000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @incompatible_gc_callee(ptr %arg) gc "gc0" { + store i32 10000, ptr %arg + ret void +} + +; INTERESTING-LABEL: define void @incompatible_gc_caller( + +; RESULT-LABEL: define void @incompatible_gc_caller(ptr %outer.arg) gc "gc1" { +; RESULT-NEXT: call void @incompatible_gc_callee(ptr %outer.arg) +; RESULT-NEXT: ret void +define void @incompatible_gc_caller(ptr %outer.arg) gc "gc1" { + call void @incompatible_gc_callee(ptr %outer.arg) + ret void +} + +; INTERESTING-LABEL: define void @propagate_callee_gc( + +; RESULT-LABEL: define void @propagate_callee_gc(ptr %arg) gc "propagate-gc" { +; RESULT-NEXT: store i32 10000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_callee_gc(ptr %arg) gc "propagate-gc" { + store i32 10000, ptr %arg + ret void +} + +; INTERESTING-LABEL: define void @propagate_caller_gc( + +; RESULT-LABEL: define void @propagate_caller_gc(ptr %arg) gc "propagate-gc" { +; RESULT-NEXT: store i32 10000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_caller_gc(ptr %arg) { + call void @propagate_callee_gc(ptr %arg) + ret void +} + +declare i32 @__gxx_personality_v0(...) + +; INTERESTING-LABEL: define void @propagate_callee_personality( + +; RESULT-LABEL: define void @propagate_callee_personality(ptr %arg) personality ptr @__gxx_personality_v0 { +; RESULT-NEXT: store i32 2000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_callee_personality(ptr %arg) personality ptr @__gxx_personality_v0 { + store i32 2000, ptr %arg + ret void +} + +; INTERESTING-LABEL: define void @propagate_caller_personality( + +; RESULT-LABEL: define void @propagate_caller_personality(ptr %arg) personality ptr @__gxx_personality_v0 { +; RESULT-NEXT: store i32 2000, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @propagate_caller_personality(ptr %arg) { + call void @propagate_callee_personality(ptr %arg) + ret void +} + +; CHECK-LABEL: define void @callee_with_indirectbr( +define void @callee_with_indirectbr() { +entry: + br label %L1 + +L1: ; preds = %entry, %L1 + %i = phi i32 [ 0, %entry ], [ %inc, %L1 ] + %inc = add i32 %i, 1 + %idxprom = zext i32 %i to i64 + %arrayidx = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom + %brtarget = load ptr, ptr %arrayidx, align 8 + indirectbr ptr %brtarget, [label %L1, label %L2] + +L2: ; preds = %L1 + ret void +} + +; CHECK-LABEL: define void @calls_func_with_indirectbr( + +; RESULT: L1.i: +; RESULT-NEXT: %i.i = phi i32 [ 0, %call ], [ %inc.i, %L1.i ] +; RESULT-NEXT: %inc.i = add i32 %i.i, 1 +; RESULT-NEXT: %idxprom.i = zext i32 %i.i to i64 +; RESULT-NEXT: %arrayidx.i = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom.i +; RESULT-NEXT: %brtarget.i = load ptr, ptr %arrayidx.i, align 8 +; RESULT-NEXT: indirectbr ptr %brtarget.i, [label %L1.i, label %callee_with_indirectbr.exit] + +define void @calls_func_with_indirectbr(i1 %arg0) { +entry: + br i1 %arg0, label %call, label %ret + +call: + call void @callee_with_indirectbr() + br label %ret + +ret: + ret void +} + + +; CHECK-LABEL: define ptr @callee_with_blockaddress_use( +; RESULT: L2: +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L2), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca, align 8 +; RESULT-NEXT: %cond1 = load volatile i1, ptr addrspace(1) null +; RESULT-NEXT: br i1 %cond1, label %L1, label %L3 +define ptr @callee_with_blockaddress_use() { +entry: + %alloca = alloca ptr + %cond0 = load volatile i1, ptr addrspace(1) null + br i1 %cond0, label %L1, label %L2 + +L1: + br label %L2 + +L2: + ; reference an earlier block + store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca + + ; reference the block itself from the block + store ptr blockaddress(@callee_with_blockaddress_use, %L2), ptr %alloca + + ; reference a later block + store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca + + %cond1 = load volatile i1, ptr addrspace(1) null + br i1 %cond1, label %L1, label %L3 + +L3: + %load = load ptr, ptr %alloca + ret ptr %load +} + +; FIXME: This is not correctly remapping the blockaddress use +; CHECK-LABEL: define void @calls_func_with_blockaddress_use( +; RESULT: entry: +; RESULT-NEXT: %alloca.i = alloca ptr, align 8 +; RESULT-NEXT: store i32 1000, ptr null, align 4 +; RESULT-NEXT: br i1 %arg0, label %call, label %ret + +; RESULT: call: +; RESULT-NEXT: store i32 2000, ptr null, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %alloca.i) +; RESULT-NEXT: %cond0.i = load volatile i1, ptr addrspace(1) null, align 1 +; RESULT-NEXT: br i1 %cond0.i, label %L1.i, label %L2.i + +; RESULT: L1.i: ; preds = %L2.i, %call +; RESULT-NEXT: br label %L2.i + +; RESULT: L2.i: ; preds = %L1.i, %call +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@calls_func_with_blockaddress_use, %L2.i), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca.i, align 8 +; RESULT-NEXT: %cond1.i = load volatile i1, ptr addrspace(1) null, align 1 +; RESULT-NEXT: br i1 %cond1.i, label %L1.i, label %callee_with_blockaddress_use.exit + +; RESULT: callee_with_blockaddress_use.exit: ; preds = %L2.i +; RESULT-NEXT: %load.i = load ptr, ptr %alloca.i, align 8 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %alloca.i) +; RESULT-NEXT: store i32 3000, ptr null, align 4 +; RESULT-NEXT: br label %ret + +; RESULT: ret: ; preds = %callee_with_blockaddress_use.exit, %entry +; RESULT-NEXT: store i32 4000, ptr null, align 4 +; RESULT-NEXT: ret void +define void @calls_func_with_blockaddress_use(i1 %arg0) { +entry: + store i32 1000, ptr null + br i1 %arg0, label %call, label %ret + +call: + store i32 2000, ptr null + call ptr @callee_with_blockaddress_use() + store i32 3000, ptr null + br label %ret + +ret: + store i32 4000, ptr null + ret void +} + +; CHECK-LABEL: define void @callee_with_fallthrough_blockaddress_use( +; RESULT: L2: +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L1), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L2), ptr %alloca, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca, align 8 +; RESULT-NEXT: br label %L3 +define void @callee_with_fallthrough_blockaddress_use() { +entry: + %alloca = alloca ptr + br label %L1 + +L1: + store i32 999, ptr null + br label %L2 + +L2: ; preds = %entry, %L1 + ; reference a block before this block + store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L1), ptr %alloca + + ; reference the block itself from the block + store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L2), ptr %alloca + + ; reference a block after this block + store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca + br label %L3 + +L3: ; preds = %L1 + %load = load ptr, ptr %alloca + ret void +} + + +; CHECK-LABEL: define void @calls_func_with_fallthrough_blockaddress_use( +; RESULT: entry: +; RESULT-NEXT: %alloca.i = alloca ptr, align 8 +; RESULT-NEXT: store i32 1000, ptr null +; RESULT-NEXT: br i1 %arg0, label %call, label %ret + +; RESULT: call: +; RESULT-NEXT: store i32 2000, ptr null, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %alloca.i) +; RESULT-NEXT: br label %L1.i + +; RESULT: L1.i: ; preds = %call +; RESULT-NEXT: store i32 999, ptr null, align 4 +; RESULT-NEXT: br label %L2.i + +; RESULT: L2.i: +; RESULT-NEXT: store ptr blockaddress(@calls_func_with_fallthrough_blockaddress_use, %L1.i), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@calls_func_with_fallthrough_blockaddress_use, %L2.i), ptr %alloca.i, align 8 +; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca.i, align 8 +; RESULT-NEXT: br label %callee_with_fallthrough_blockaddress_use.exit + +; RESULT: callee_with_fallthrough_blockaddress_use.exit: ; preds = %L2.i +; RESULT-NEXT: %load.i = load ptr, ptr %alloca.i, align 8 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %alloca.i) +; RESULT-NEXT: store i32 3000, ptr null, align 4 +; RESULT-NEXT: br label %ret + +; RESULT: ret: +; RESULT-NEXT: store i32 4000, ptr null, align 4 +; RESULT-NEXT: ret void +define void @calls_func_with_fallthrough_blockaddress_use(i1 %arg0) { +entry: + store i32 1000, ptr null + br i1 %arg0, label %call, label %ret + +call: + store i32 2000, ptr null + call void @callee_with_fallthrough_blockaddress_use() + store i32 3000, ptr null + br label %ret + +ret: + store i32 4000, ptr null + ret void +} + +declare i32 @extern_returns_twice() returns_twice + +; CHECK-LABEL: define i32 @callee_returns_twice( +; RESULT-NEXT: %call = call i32 @extern_returns_twice() +; RESULT-NEXT: %add = add nsw i32 1, %call +; RESULT-NEXT: ret i32 %add +define i32 @callee_returns_twice() { + %call = call i32 @extern_returns_twice() + %add = add nsw i32 1, %call + ret i32 %add +} + +; CHECK-LABEL: define i32 @caller_returns_twice_calls_callee_returns_twice( +; RESULT-NEXT: %call.i = call i32 @extern_returns_twice() +; RESULT-NEXT: %add.i = add nsw i32 1, %call.i +; RESULT-NEXT: %add = add nsw i32 1, %add.i +; RESULT-NEXT: ret i32 %add + define i32 @caller_returns_twice_calls_callee_returns_twice() returns_twice { + %call = call i32 @callee_returns_twice() + %add = add nsw i32 1, %call + ret i32 %add +} + +; Inliner usually blocks inlining of returns_twice functions into +; non-returns_twice functions +; CHECK-LABEL: define i32 @regular_caller_calls_callee_returns_twice() { +; RESULT-NEXT: %call.i = call i32 @extern_returns_twice() +; RESULT-NEXT: %add.i = add nsw i32 1, %call.i +; RESULT-NEXT: %add = add nsw i32 1, %add.i +; RESULT-NEXT: ret i32 %add +define i32 @regular_caller_calls_callee_returns_twice() { + %call = call i32 @callee_returns_twice() + %add = add nsw i32 1, %call + ret i32 %add +} + +; CHECK-LABEL: define void @caller_with_vastart( +; RESULT-NEXT: %ap = alloca ptr, align 4 +; RESULT-NEXT: %ap2 = alloca ptr, align 4 +; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +; RESULT-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap) +; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +; RESULT-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap) +; RESULT-NEXT: ret void +define void @caller_with_vastart(ptr noalias nocapture readnone %args, ...) { + %ap = alloca ptr, align 4 + %ap2 = alloca ptr, align 4 + call void @llvm.va_start.p0(ptr nonnull %ap) + call fastcc void @callee_with_vaend(ptr nonnull %ap) + call void @llvm.va_start.p0(ptr nonnull %ap) + call fastcc void @callee_with_vaend_alwaysinline(ptr nonnull %ap) + ret void +} + +; CHECK-LABEL: define fastcc void @callee_with_vaend( +; RESULT-NEXT: tail call void @llvm.va_end.p0(ptr %a) +; RESULT-NEXT: ret void +define fastcc void @callee_with_vaend(ptr %a) { + tail call void @llvm.va_end.p0(ptr %a) + ret void +} + +; CHECK-LABEL: define internal fastcc void @callee_with_vaend_alwaysinline( +; RESULT-NEXT: tail call void @llvm.va_end.p0(ptr %a) +; RESULT-NEXT: ret void +define internal fastcc void @callee_with_vaend_alwaysinline(ptr %a) alwaysinline { + tail call void @llvm.va_end.p0(ptr %a) + ret void +} + +; CHECK-LABEL: define i32 @callee_with_va_start( +define i32 @callee_with_va_start(ptr %a, ...) { + %vargs = alloca ptr, align 8 + tail call void @llvm.va_start.p0(ptr %a) + %va1 = va_arg ptr %vargs, i32 + call void @llvm.va_end(ptr %vargs) + ret i32 %va1 +} + +; CHECK-LABEL: define i32 @callee_vastart_caller( +; RESULT-NEXT: %vargs.i = alloca ptr, align 8 +; RESULT-NEXT: %ap = alloca ptr, align 4 +; RESULT-NEXT: %b = load i32, ptr null, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vargs.i) +; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +; RESULT-NEXT: %va1.i = va_arg ptr %vargs.i, i32 +; RESULT-NEXT: call void @llvm.va_end.p0(ptr %vargs.i) +; RESULT-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vargs.i) +; RESULT-NEXT: ret i32 %va1.i +define i32 @callee_vastart_caller(ptr noalias nocapture readnone %args, ...) { + %ap = alloca ptr, align 4 + %b = load i32, ptr null + %result = call i32 (ptr, ...) @callee_with_va_start(ptr nonnull %ap, i32 %b) + ret i32 %result +} + +declare void @llvm.localescape(...) + +; CHECK-LABEL: define internal void @callee_uses_localrecover( +define internal void @callee_uses_localrecover(ptr %fp) { + %a.i8 = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp, i32 0) + store i32 42, ptr %a.i8 + ret void +} + +; CHECK-LABEL: define i32 @callee_uses_localescape( +; RESULT-NEXT: %a = alloca i32, align 4 +; RESULT-NEXT: call void (...) @llvm.localescape(ptr %a) +; RESULT-NEXT: %fp = call ptr @llvm.frameaddress.p0(i32 0) +; RESULT-NEXT: %a.i8.i = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp, i32 0) +; RESULT-NEXT: store i32 42, ptr %a.i8.i, align 4 +; RESULT-NEXT: %r = load i32, ptr %a, align 4 +; RESULT-NEXT: ret i32 %r +define i32 @callee_uses_localescape() alwaysinline { + %a = alloca i32 + call void (...) @llvm.localescape(ptr %a) + %fp = call ptr @llvm.frameaddress(i32 0) + tail call void @callee_uses_localrecover(ptr %fp) + %r = load i32, ptr %a + ret i32 %r +} + +; CHECK-LABEL: define i32 @callee_uses_localescape_caller( +; RESULT-NEXT: %a.i = alloca i32, align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %a.i) +; RESULT-NEXT: call void (...) @llvm.localescape(ptr %a.i) +; RESULT-NEXT: %fp.i = call ptr @llvm.frameaddress.p0(i32 0) +; RESULT-NEXT: %a.i8.i.i = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp.i, i32 0) +; RESULT-NEXT: store i32 42, ptr %a.i8.i.i, align 4 +; RESULT-NEXT: %r.i = load i32, ptr %a.i, align 4 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %a.i) +; RESULT-NEXT: ret i32 %r.i +define i32 @callee_uses_localescape_caller() { + %r = tail call i32 @callee_uses_localescape() + ret i32 %r +} + +declare void @llvm.icall.branch.funnel(...) + +; CHECK-LABEL: define void @callee_uses_branch_funnel( +; RESULT-NEXT: musttail call void (...) @llvm.icall.branch.funnel(...) +; RESULT-NEXT: ret void +define void @callee_uses_branch_funnel(...) { + musttail call void (...) @llvm.icall.branch.funnel(...) + ret void +} + +; FIXME: This should fail the verifier after inlining +; CHECK-LABEL: define void @callee_branch_funnel_musttail_caller( +; RESULT-NEXT: call void (...) @llvm.icall.branch.funnel() +; RESULT-NEXT: ret void +define void @callee_branch_funnel_musttail_caller() { + call void (...) @callee_uses_branch_funnel() + ret void +} + +; Ignore noinline on the callee function +; CHECK-LABEL: define void @noinline_callee( +; RESULT-NEXT: store i32 123, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @noinline_callee(ptr %arg) { + store i32 123, ptr %arg + ret void +} + +; CHECK-LABEL: define void @calls_noinline_func( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @calls_noinline_func(ptr %outer.arg) { + call void @noinline_callee(ptr %outer.arg) + ret void +} + +; Ignore noinline on the callsite +; CHECK-LABEL: define void @calls_noinline_callsite( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @calls_noinline_callsite(ptr %outer.arg) { + call void @simple_callee(ptr %outer.arg) noinline + ret void +} + +; Ignore optnone +; CHECK-LABEL: define void @optnone_callee( +; RESULT-NEXT: store i32 5555, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @optnone_callee(ptr %arg) optnone noinline { + store i32 5555, ptr %arg + ret void +} + +; CHECK-LABEL: define void @calls_optnone_callee( +; RESULT-NEXT: store i32 5555, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @calls_optnone_callee(ptr %outer.arg) { + call void @optnone_callee(ptr %outer.arg) + ret void +} + +; CHECK-LABEL: define void @optnone_caller( +; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4 +; RESULT-NEXT: ret void +define void @optnone_caller(ptr %outer.arg) optnone noinline { + call void @simple_callee(ptr %outer.arg) + ret void +} + +; CHECK-LABEL: define weak void @interposable_callee( +; RESULT-NEXT: store i32 2024, ptr %arg, align 4 +; RESULT-NEXT: ret void +define weak void @interposable_callee(ptr %arg) { + store i32 2024, ptr %arg + ret void +} + +; Ignore interposable linkage +; CHECK-LABEL: @calls_interposable_callee( +; RESULT-NEXT: store i32 2024, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @calls_interposable_callee(ptr %arg) { + call void @interposable_callee(ptr %arg) + ret void +} + +; Ignore null_pointer_is_valid +; CHECK-LABEL: @null_pointer_is_valid_callee( +; RESULT-NEXT: store i32 42069, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @null_pointer_is_valid_callee(ptr %arg) null_pointer_is_valid { + store i32 42069, ptr %arg + ret void +} + +; CHECK-LABEL: @calls_null_pointer_is_valid_callee( +; RESULT-NEXT: store i32 42069, ptr %arg, align 4 +; RESULT-NEXT: ret void +define void @calls_null_pointer_is_valid_callee(ptr %arg) { + call void @null_pointer_is_valid_callee(ptr %arg) + ret void +} + +; CHECK-LABEL: @byval_arg_uses_non_alloca_addrspace( +; RESULT-NEXT: %load = load i32, ptr addrspace(1) %arg, align 4 +; RESULT-NEXT: ret i32 %load +define i32 @byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) byval(i32) %arg) { + %load = load i32, ptr addrspace(1) %arg + ret i32 %load +} + +; CHECK-LABEL: @calls_byval_arg_uses_non_alloca_addrspace( +; RESULT-NEXT: %arg1 = alloca i32, align 4, addrspace(1) +; RESULT-NEXT: call void @llvm.lifetime.start.p1(i64 4, ptr addrspace(1) %arg1) +; RESULT-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %arg1, ptr addrspace(1) %arg, i64 4, i1 false) +; RESULT-NEXT: %load.i = load i32, ptr addrspace(1) %arg1, align 4 +; RESULT-NEXT: call void @llvm.lifetime.end.p1(i64 4, ptr addrspace(1) %arg1) +; RESULT-NEXT: ret i32 %load.i +define i32 @calls_byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) %arg) { + %call = call i32 @byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) byval(i32) %arg) + ret i32 %call +} + +; CHECK-LABEL: define void @callee_stacksize( +; RESULT-NEXT: %alloca = alloca [4096 x i32] +; RESULT-NEXT: store i32 12345678, ptr %arg +; RESULT-NEXT: store i32 0, ptr %alloca +; RESULT-NEXT: ret void +define void @callee_stacksize(ptr %arg) "inline-max-stacksize"="4" { + %alloca = alloca [4096 x i32] + store i32 12345678, ptr %arg + store i32 0, ptr %alloca + ret void +} + +; CHECK-LABEL: define void @caller_stacksize( +; RESULT-NEXT: %alloca.i = alloca [4096 x i32], align 4 +; RESULT-NEXT: call void @llvm.lifetime.start.p0(i64 16384, ptr %alloca.i) +; RESULT-NEXT: store i32 12345678, ptr %arg, align 4 +; RESULT-NEXT: store i32 0, ptr %alloca.i, align 4 +; RESULT-NEXT: call void @llvm.lifetime.end.p0(i64 16384, ptr %alloca.i) +; RESULT-NEXT: ret void +define void @caller_stacksize(ptr %arg) { + call void @callee_stacksize(ptr %arg) + ret void +} + +; CHECK-LABEL: define void @callee_dynamic_alloca( +; RESULT-NEXT: %alloca = alloca i32, i32 %n, align 4 +; RESULT-NEXT: store i32 12345678, ptr %arg, align 4 +; RESULT-NEXT: store i32 0, ptr %alloca, align 4 +; RESULT-NEXT: ret void +define void @callee_dynamic_alloca(ptr %arg, i32 %n) "inline-max-stacksize"="4" { + %alloca = alloca i32, i32 %n + store i32 12345678, ptr %arg + store i32 0, ptr %alloca + ret void +} + +; CHECK-LABEL: define void @caller_dynamic_alloca( +; RESULT-NEXT: %savedstack = call ptr @llvm.stacksave.p0() +; RESULT-NEXT: %alloca.i = alloca i32, i32 %size, align 4 +; RESULT-NEXT: store i32 12345678, ptr %arg, align 4 +; RESULT-NEXT: store i32 0, ptr %alloca.i, align 4 +; RESULT-NEXT: call void @llvm.stackrestore.p0(ptr %savedstack) +; RESULT-NEXT: ret void +define void @caller_dynamic_alloca(ptr %arg, i32 %size) { + call void @callee_dynamic_alloca(ptr %arg, i32 %size) + ret void +} + +declare void @extern_noduplicate() noduplicate + +; CHECK-LABEL: define void @callee_noduplicate_calls( +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: ret void +define void @callee_noduplicate_calls() { + call void @extern_noduplicate() + call void @extern_noduplicate() + ret void +} + +; Ignore noduplicate restrictions +; CHECK-LABEL: define void @caller_noduplicate_calls_callee( +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: call void @extern_noduplicate() +; RESULT-NEXT: ret void +define void @caller_noduplicate_calls_callee() { + call void @callee_noduplicate_calls() + call void @callee_noduplicate_calls() + ret void +} + +; CHECK-LABEL: define void @sanitize_address_callee( +; RESULT-NEXT: store i32 333, ptr %arg +; RESULT-NEXT: ret void +define void @sanitize_address_callee(ptr %arg) sanitize_address { + store i32 333, ptr %arg + ret void +} + +; CHECK-LABEL: define void @no_sanitize_address_caller( +; RESULT-NEXT: store i32 333, ptr %arg +; RESULT-NEXT: ret void +define void @no_sanitize_address_caller(ptr %arg) { + call void @sanitize_address_callee(ptr %arg) + ret void +} + +; CHECK-LABEL: define float @nonstrictfp_callee( +; RESULT-NEXT: %add = fadd float %a, %a +; RESULT-NEXT: ret float %add +define float @nonstrictfp_callee(float %a) { + %add = fadd float %a, %a + ret float %add +} + +; CHECK-LABEL: define float @strictfp_caller( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: ret float %add +define float @strictfp_caller(float %a) strictfp { + %call = call float @nonstrictfp_callee(float %a) strictfp + %add = call float @llvm.experimental.constrained.fadd.f32(float %call, float 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %add +} + +; CHECK-LABEL: define float @strictfp_callee( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: ret float +define float @strictfp_callee(float %a) strictfp { + %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %add +} + +; FIXME: This should not inline. The inlined case should fail the +; verifier, but it does not. +; CHECK-LABEL: define float @nonstrictfp_caller( +; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32( +; RESULT-NEXT: fadd float +; RESULT-NEXT: ret float +define float @nonstrictfp_caller(float %a) { + %call = call float @strictfp_callee(float %a) + %add1 = fadd float %call, 2.0 + ret float %add1 +} + +define void @caller_also_has_non_callee_use() { + call void @simple_callee(ptr @simple_callee) + ret void +} diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt index 7be90bc87392b..c8673b42bee74 100644 --- a/llvm/tools/llvm-reduce/CMakeLists.txt +++ b/llvm/tools/llvm-reduce/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_tool(llvm-reduce deltas/ReduceGlobalValues.cpp deltas/ReduceGlobalVarInitializers.cpp deltas/ReduceGlobalVars.cpp + deltas/ReduceInlineCallSites.cpp deltas/ReduceInstructions.cpp deltas/ReduceInstructionFlags.cpp deltas/ReduceInvokes.cpp diff --git a/llvm/tools/llvm-reduce/DeltaManager.cpp b/llvm/tools/llvm-reduce/DeltaManager.cpp index f5c62768e99ef..9b13202f9fec4 100644 --- a/llvm/tools/llvm-reduce/DeltaManager.cpp +++ b/llvm/tools/llvm-reduce/DeltaManager.cpp @@ -28,6 +28,7 @@ #include "deltas/ReduceGlobalVarInitializers.h" #include "deltas/ReduceGlobalVars.h" #include "deltas/ReduceIRReferences.h" +#include "deltas/ReduceInlineCallSites.h" #include "deltas/ReduceInstructionFlags.h" #include "deltas/ReduceInstructionFlagsMIR.h" #include "deltas/ReduceInstructions.h" diff --git a/llvm/tools/llvm-reduce/DeltaPasses.def b/llvm/tools/llvm-reduce/DeltaPasses.def index 3aed0ccd74b84..845b1061592ef 100644 --- a/llvm/tools/llvm-reduce/DeltaPasses.def +++ b/llvm/tools/llvm-reduce/DeltaPasses.def @@ -58,7 +58,7 @@ DELTA_PASS_IR("volatile", reduceVolatileInstructionsDeltaPass, "Reducing Volatil DELTA_PASS_IR("atomic-ordering", reduceAtomicOrderingDeltaPass, "Reducing Atomic Ordering") DELTA_PASS_IR("syncscopes", reduceAtomicSyncScopesDeltaPass, "Reducing Atomic Sync Scopes") DELTA_PASS_IR("instruction-flags", reduceInstructionFlagsDeltaPass, "Reducing Instruction Flags") - +DELTA_PASS_IR("inline-call-sites", reduceInlineCallSitesDeltaPass, "Inlining callsites") #ifndef DELTA_PASS_MIR #define DELTA_PASS_MIR(NAME, FUNC, DESC) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInlineCallSites.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInlineCallSites.cpp new file mode 100644 index 0000000000000..cfef3672355ab --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceInlineCallSites.cpp @@ -0,0 +1,103 @@ +//===- ReduceInlineCallSites.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ReduceInlineCallSites.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +extern cl::OptionCategory LLVMReduceOptions; + +static cl::opt CallsiteInlineThreshold( + "reduce-callsite-inline-threshold", + cl::desc("Number of instructions in a function to unconditionally inline " + "(-1 for inline all)"), + cl::init(5), cl::cat(LLVMReduceOptions)); + +static bool functionHasMoreThanNonTerminatorInsts(const Function &F, + uint64_t NumInsts) { + uint64_t InstCount = 0; + for (const BasicBlock &BB : F) { + for (const Instruction &I : make_range(BB.begin(), std::prev(BB.end()))) { + (void)I; + if (InstCount++ > NumInsts) + return true; + } + } + + return false; +} + +static bool hasOnlyOneCallUse(const Function &F) { + unsigned UseCount = 0; + for (const Use &U : F.uses()) { + const CallBase *CB = dyn_cast(U.getUser()); + if (!CB || !CB->isCallee(&U)) + return false; + if (UseCount++ > 1) + return false; + } + + return UseCount == 1; +} + +// TODO: This could use more thought. +static bool inlineWillReduceComplexity(const Function &Caller, + const Function &Callee) { + // Backdoor to force all possible inlining. + if (CallsiteInlineThreshold < 0) + return true; + + if (!hasOnlyOneCallUse(Callee)) + return false; + + // Permit inlining small functions into big functions, or big functions into + // small functions. + if (!functionHasMoreThanNonTerminatorInsts(Callee, CallsiteInlineThreshold) && + !functionHasMoreThanNonTerminatorInsts(Caller, CallsiteInlineThreshold)) + return true; + + return false; +} + +static void reduceCallSites(Oracle &O, Function &F) { + std::vector> CallSitesToInline; + + for (Use &U : F.uses()) { + if (CallBase *CB = dyn_cast(U.getUser())) { + // Ignore callsites with wrong call type. + if (!CB->isCallee(&U)) + continue; + + // We do not consider isInlineViable here. It is overly conservative in + // cases that the inliner should handle correctly (e.g. disallowing inline + // of of functions with indirectbr). Some of the other cases are for other + // correctness issues which we do need to worry about here. + + // TODO: Should we delete the function body? + InlineFunctionInfo IFI; + if (CanInlineCallSite(*CB, IFI).isSuccess() && + inlineWillReduceComplexity(*CB->getFunction(), F) && !O.shouldKeep()) + CallSitesToInline.emplace_back(CB, std::move(IFI)); + } + } + + // TODO: InlineFunctionImpl will implicitly perform some simplifications / + // optimizations which we should be able to opt-out of. + for (auto [CB, IFI] : CallSitesToInline) + InlineFunctionImpl(*CB, IFI); +} + +void llvm::reduceInlineCallSitesDeltaPass(Oracle &O, ReducerWorkItem &Program) { + for (Function &F : Program.getModule()) { + if (!F.isDeclaration()) + reduceCallSites(O, F); + } +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInlineCallSites.h b/llvm/tools/llvm-reduce/deltas/ReduceInlineCallSites.h new file mode 100644 index 0000000000000..1df31a1cd22e1 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceInlineCallSites.h @@ -0,0 +1,18 @@ +//===- ReduceInlineCallSites.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINLINECALLSITES_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINLINECALLSITES_H + +#include "Delta.h" + +namespace llvm { +void reduceInlineCallSitesDeltaPass(Oracle &O, ReducerWorkItem &Program); +} // namespace llvm + +#endif