1212// ===----------------------------------------------------------------------===//
1313
1414#include " llvm/Transforms/IPO/AlwaysInliner.h"
15+ #include " llvm/ADT/DenseMap.h"
16+ #include " llvm/ADT/MapVector.h"
1517#include " llvm/ADT/SetVector.h"
18+ #include " llvm/ADT/SmallPtrSet.h"
19+ #include " llvm/ADT/Statistic.h"
1620#include " llvm/Analysis/AliasAnalysis.h"
1721#include " llvm/Analysis/AssumptionCache.h"
22+ #include " llvm/Analysis/DominanceFrontier.h"
1823#include " llvm/Analysis/InlineAdvisor.h"
1924#include " llvm/Analysis/InlineCost.h"
2025#include " llvm/Analysis/OptimizationRemarkEmitter.h"
2126#include " llvm/Analysis/ProfileSummaryInfo.h"
27+ #include " llvm/Analysis/ValueTracking.h"
28+ #include " llvm/IR/BasicBlock.h"
29+ #include " llvm/IR/Dominators.h"
2230#include " llvm/IR/Module.h"
31+ #include " llvm/IR/ValueHandle.h"
2332#include " llvm/InitializePasses.h"
33+ #include " llvm/Support/CommandLine.h"
2434#include " llvm/Transforms/Utils/Cloning.h"
2535#include " llvm/Transforms/Utils/ModuleUtils.h"
36+ #include " llvm/Transforms/Utils/PromoteMemToReg.h"
37+
2638
2739using namespace llvm ;
2840
2941#define DEBUG_TYPE " inline"
42+ static cl::opt<bool > EnableMem2RegInterleaving (
43+ " enable-always-inliner-mem2reg" , cl::init(true ), cl::Hidden,
44+ cl::desc(" Enable interleaving always-inlining with alloca promotion" ));
45+
46+ STATISTIC (NumAllocasPromoted,
47+ " Number of allocas promoted to registers after inlining" );
3048
3149namespace {
3250
51+ bool canInlineCallBase (CallBase *CB) {
52+ return CB->hasFnAttr (Attribute::AlwaysInline) &&
53+ !CB->getAttributes ().hasFnAttr (Attribute::NoInline);
54+ }
55+
56+ bool attemptInlineFunction (
57+ Function &F, CallBase *CB, bool InsertLifetime,
58+ function_ref<AAResults &(Function &)> &GetAAR,
59+ function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
60+ ProfileSummaryInfo &PSI) {
61+ Function *Caller = CB->getCaller ();
62+ OptimizationRemarkEmitter ORE (Caller);
63+ DebugLoc DLoc = CB->getDebugLoc ();
64+ BasicBlock *Block = CB->getParent ();
65+
66+ InlineFunctionInfo IFI (GetAssumptionCache, &PSI, nullptr , nullptr );
67+ InlineResult Res = InlineFunction (*CB, IFI, /* MergeAttributes=*/ true ,
68+ &GetAAR (F), InsertLifetime);
69+ if (!Res.isSuccess ()) {
70+ ORE.emit ([&]() {
71+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotInlined" , DLoc, Block)
72+ << " '" << ore::NV (" Callee" , &F) << " ' is not inlined into '"
73+ << ore::NV (" Caller" , Caller)
74+ << " ': " << ore::NV (" Reason" , Res.getFailureReason ());
75+ });
76+ return false ;
77+ }
78+
79+ emitInlinedIntoBasedOnCost (ORE, DLoc, Block, F, *Caller,
80+ InlineCost::getAlways (" always inline attribute" ),
81+ /* ForProfileContext=*/ false , DEBUG_TYPE);
82+
83+ return true ;
84+ }
85+ // / This function inlines all functions that are marked with the always_inline
86+ // / attribute. It also removes the inlined functions if they are dead after the
87+ // / inlining process.
3388bool AlwaysInlineImpl (
3489 Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
3590 FunctionAnalysisManager *FAM,
@@ -50,36 +105,13 @@ bool AlwaysInlineImpl(
50105
51106 for (User *U : F.users ())
52107 if (auto *CB = dyn_cast<CallBase>(U))
53- if (CB->getCalledFunction () == &F &&
54- CB->hasFnAttr (Attribute::AlwaysInline) &&
55- !CB->getAttributes ().hasFnAttr (Attribute::NoInline))
108+ if (CB->getCalledFunction () == &F && canInlineCallBase (CB))
56109 Calls.insert (CB);
57110
58111 for (CallBase *CB : Calls) {
59112 Function *Caller = CB->getCaller ();
60- OptimizationRemarkEmitter ORE (Caller);
61- DebugLoc DLoc = CB->getDebugLoc ();
62- BasicBlock *Block = CB->getParent ();
63-
64- InlineFunctionInfo IFI (GetAssumptionCache, &PSI, nullptr , nullptr );
65- InlineResult Res = InlineFunction (*CB, IFI, /* MergeAttributes=*/ true ,
66- &GetAAR (F), InsertLifetime);
67- if (!Res.isSuccess ()) {
68- ORE.emit ([&]() {
69- return OptimizationRemarkMissed (DEBUG_TYPE, " NotInlined" , DLoc, Block)
70- << " '" << ore::NV (" Callee" , &F) << " ' is not inlined into '"
71- << ore::NV (" Caller" , Caller)
72- << " ': " << ore::NV (" Reason" , Res.getFailureReason ());
73- });
74- continue ;
75- }
76-
77- emitInlinedIntoBasedOnCost (
78- ORE, DLoc, Block, F, *Caller,
79- InlineCost::getAlways (" always inline attribute" ),
80- /* ForProfileContext=*/ false , DEBUG_TYPE);
81-
82- Changed = true ;
113+ Changed |= attemptInlineFunction (F, CB, InsertLifetime, GetAAR,
114+ GetAssumptionCache, PSI);
83115 if (FAM)
84116 FAM->invalidate (*Caller, PreservedAnalyses::none ());
85117 }
@@ -115,6 +147,245 @@ bool AlwaysInlineImpl(
115147 return Changed;
116148}
117149
150+ // / Promote allocas to registers if possible.
151+ static void promoteAllocas (
152+ Function *Caller, SmallPtrSetImpl<AllocaInst *> &AllocasToPromote,
153+ function_ref<AssumptionCache &(Function &)> &GetAssumptionCache) {
154+ if (AllocasToPromote.empty ())
155+ return ;
156+
157+ SmallVector<AllocaInst *, 4 > PromotableAllocas;
158+ llvm::copy_if (AllocasToPromote, std::back_inserter (PromotableAllocas),
159+ isAllocaPromotable);
160+ if (PromotableAllocas.empty ())
161+ return ;
162+
163+ DominatorTree DT (*Caller);
164+ AssumptionCache &AC = GetAssumptionCache (*Caller);
165+ PromoteMemToReg (PromotableAllocas, DT, &AC);
166+ NumAllocasPromoted += PromotableAllocas.size ();
167+ // Emit a remark for the promotion.
168+ OptimizationRemarkEmitter ORE (Caller);
169+ DebugLoc DLoc = Caller->getEntryBlock ().getTerminator ()->getDebugLoc ();
170+ ORE.emit ([&]() {
171+ return OptimizationRemark (DEBUG_TYPE, " PromoteAllocas" , DLoc,
172+ &Caller->getEntryBlock ())
173+ << " Promoting " << ore::NV (" NumAlloca" , PromotableAllocas.size ())
174+ << " allocas to SSA registers in function '"
175+ << ore::NV (" Function" , Caller) << " '" ;
176+ });
177+ LLVM_DEBUG (dbgs () << " Promoted " << PromotableAllocas.size ()
178+ << " allocas to registers in function " << Caller->getName ()
179+ << " \n " );
180+ }
181+
182+ // / We use a different visitation order of functions here to solve a phase
183+ // / ordering problem. After inlining, a caller function may have allocas that
184+ // / were previously used for passing reference arguments to the callee that
185+ // / are now promotable to registers, using SROA/mem2reg. However if we just let
186+ // / the AlwaysInliner continue inlining everything at once, the later SROA pass
187+ // / in the pipeline will end up placing phis for these allocas into blocks along
188+ // / the dominance frontier which may extend further than desired (e.g. loop
189+ // / headers). This can happen when the caller is then inlined into another
190+ // / caller, and the allocas end up hoisted further before SROA is run.
191+ // /
192+ // / Instead what we want is to try to do, as best as we can, is to inline leaf
193+ // / functions into callers, and then run PromoteMemToReg() on the allocas that
194+ // / were passed into the callee before it was inlined.
195+ // /
196+ // / We want to do this *before* the caller is inlined into another caller
197+ // / because we want the alloca promotion to happen before its scope extends too
198+ // / far because of further inlining.
199+ // /
200+ // / Here's a simple pseudo-example:
201+ // / outermost_caller() {
202+ // / for (...) {
203+ // / middle_caller();
204+ // / }
205+ // / }
206+ // /
207+ // / middle_caller() {
208+ // / int stack_var;
209+ // / inner_callee(&stack_var);
210+ // / }
211+ // /
212+ // / inner_callee(int *x) {
213+ // / // Do something with x.
214+ // / }
215+ // /
216+ // / In this case, we want to inline inner_callee() into middle_caller() and
217+ // / then promote stack_var to a register before we inline middle_caller() into
218+ // / outermost_caller(). The regular always_inliner would inline everything at
219+ // / once, and then SROA/mem2reg would promote stack_var to a register but in
220+ // / the context of outermost_caller() which is not what we want.
221+ bool AlwaysInlineInterleavedMem2RegImpl (
222+ Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
223+ FunctionAnalysisManager &FAM,
224+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
225+ function_ref<AAResults &(Function &)> GetAAR) {
226+
227+ bool Changed = false ;
228+
229+ // Use SetVector as we may rely on the deterministic iteration order for
230+ // finding candidates later.
231+ SetVector<Function *> AlwaysInlineFunctions;
232+
233+ MapVector<Function *, SmallVector<WeakVH>> CalleeToCallSites;
234+ // Incoming always-inline calls for a function.
235+ DenseMap<Function *, unsigned > IncomingAICount;
236+ // Outgoing always-inline calls for a function.
237+ DenseMap<Function *, unsigned > OutgoingAICount;
238+ // First collect all always_inline functions.
239+ for (Function &F : M) {
240+ if (F.isDeclaration () || !F.hasFnAttribute (Attribute::AlwaysInline) ||
241+ !isInlineViable (F).isSuccess ())
242+ continue ;
243+ if (F.isPresplitCoroutine ())
244+ continue ;
245+ AlwaysInlineFunctions.insert (&F);
246+ }
247+
248+ DenseSet<Function *> ProcessedFunctions;
249+ SmallVector<Function *> InlinedComdatFns;
250+ // Build the call graph of always_inline functions.
251+ for (Function *F : AlwaysInlineFunctions) {
252+ for (User *U : F->users ()) {
253+ if (auto *CB = dyn_cast<CallBase>(U)) {
254+ if (CB->getCalledFunction () != F || !canInlineCallBase (CB))
255+ continue ;
256+ CalleeToCallSites[F].push_back (WeakVH (CB));
257+ // Keep track of the number of incoming calls to this function.
258+ // This is used to determine the order in which we inline functions.
259+ IncomingAICount[F]++;
260+ if (AlwaysInlineFunctions.count (CB->getCaller ()))
261+ OutgoingAICount[CB->getCaller ()]++;
262+ }
263+ }
264+ }
265+
266+ SmallVector<Function *, 16 > Worklist;
267+ for (Function *F : AlwaysInlineFunctions) {
268+ // If this is a always_inline leaf function, we select it for inlining.
269+ if (OutgoingAICount.lookup (F) == 0 )
270+ Worklist.push_back (F);
271+ }
272+
273+ while (!Worklist.empty ()) {
274+ Function *Callee = Worklist.pop_back_val ();
275+ auto &Calls = CalleeToCallSites[Callee];
276+
277+ // Group the calls by their caller. This allows us to collect all allocas
278+ // which need to be promoted together.
279+ MapVector<Function *, SmallVector<WeakVH>> CallerToCalls;
280+
281+ for (WeakVH &WH : Calls)
282+ if (auto *CB = dyn_cast_or_null<CallBase>(WH))
283+ CallerToCalls[CB->getCaller ()].push_back (WH);
284+
285+ // Now collect the allocas.
286+ for (auto &CallerAndCalls : CallerToCalls) {
287+ Function *Caller = CallerAndCalls.first ;
288+ SmallVector<WeakVH> &CallerCalls = CallerAndCalls.second ;
289+ SmallPtrSet<AllocaInst *, 4 > AllocasToPromote;
290+
291+ for (WeakVH &WH : CallerCalls) {
292+ if (auto *CB = dyn_cast_or_null<CallBase>(WH)) {
293+ for (Value *Arg : CB->args ())
294+ if (auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject (Arg)))
295+ AllocasToPromote.insert (AI);
296+ }
297+ }
298+
299+ // Do the actual inlining.
300+ bool InlinedAny = false ;
301+ SmallVector<WeakVH> SuccessfullyInlinedCalls;
302+
303+ for (WeakVH &WH : CallerCalls) {
304+ if (auto *CB = dyn_cast_or_null<CallBase>(WH)) {
305+ if (attemptInlineFunction (*Callee, CB, InsertLifetime, GetAAR,
306+ GetAssumptionCache, PSI)) {
307+ Changed = true ;
308+ InlinedAny = true ;
309+ SuccessfullyInlinedCalls.push_back (WH);
310+ }
311+ }
312+ }
313+
314+ if (!InlinedAny)
315+ continue ;
316+
317+ // Promote any allocas that were used by the just-inlined call site.
318+ promoteAllocas (Caller, AllocasToPromote, GetAssumptionCache);
319+
320+ unsigned InlinedCountForCaller = SuccessfullyInlinedCalls.size ();
321+ if (!AlwaysInlineFunctions.contains (Caller))
322+ continue ; // Caller wasn't part of our always-inline call graph.
323+ unsigned OldOutgoing = OutgoingAICount[Caller];
324+ assert (OldOutgoing >= InlinedCountForCaller &&
325+ " Inlined more calls than we had outgoing calls!" );
326+ OutgoingAICount[Caller] = OldOutgoing - InlinedCountForCaller;
327+ // If these were the last outgoing calls in the caller, we can now
328+ // consider it a leaf function and add it to the worklist.
329+ if (OutgoingAICount[Caller] == 0 && !ProcessedFunctions.count (Caller))
330+ Worklist.push_back (Caller);
331+ }
332+
333+ ProcessedFunctions.insert (Callee);
334+ AlwaysInlineFunctions.remove (Callee);
335+ CalleeToCallSites.erase (Callee);
336+
337+ Callee->removeDeadConstantUsers ();
338+ if (Callee->hasFnAttribute (Attribute::AlwaysInline) &&
339+ Callee->isDefTriviallyDead ()) {
340+ if (Callee->hasComdat ()) {
341+ InlinedComdatFns.push_back (Callee);
342+ } else {
343+ M.getFunctionList ().erase (Callee);
344+ Changed = true ;
345+ }
346+ }
347+
348+ if (AlwaysInlineFunctions.empty ())
349+ break ;
350+
351+ // If we have no more leaf functions to inline, we use a greedy heuristic
352+ // that selects the function with the most incoming calls. The intuition is
353+ // inlining this function will eliminate the most call sites and give the
354+ // highest chance of creating new leaf functions.
355+ if (Worklist.empty ()) {
356+ Function *BestFunc = nullptr ;
357+ unsigned MaxIncoming = 0 ;
358+ for (Function *F : AlwaysInlineFunctions) {
359+ if (ProcessedFunctions.count (F))
360+ continue ;
361+
362+ unsigned CurrentIncoming = IncomingAICount.lookup (F);
363+ if (!BestFunc || CurrentIncoming > MaxIncoming) {
364+ BestFunc = F;
365+ MaxIncoming = CurrentIncoming;
366+ }
367+ }
368+ Worklist.push_back (BestFunc);
369+ }
370+ }
371+
372+ if (!InlinedComdatFns.empty ()) {
373+ filterDeadComdatFunctions (InlinedComdatFns);
374+ for (Function *F : InlinedComdatFns) {
375+ M.getFunctionList ().erase (F);
376+ Changed = true ;
377+ }
378+ }
379+
380+ // We may have missed some call sites that were marked as always_inline but
381+ // for which the callee function itself wasn't always_inline. Call the
382+ // standard handler here to deal with those.
383+ Changed |= AlwaysInlineImpl (M, InsertLifetime, PSI, &FAM, GetAssumptionCache,
384+ GetAAR);
385+ return Changed;
386+ }
387+
388+
118389struct AlwaysInlinerLegacyPass : public ModulePass {
119390 bool InsertLifetime;
120391
@@ -177,8 +448,14 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
177448 };
178449 auto &PSI = MAM.getResult <ProfileSummaryAnalysis>(M);
179450
180- bool Changed = AlwaysInlineImpl (M, InsertLifetime, PSI, &FAM,
181- GetAssumptionCache, GetAAR);
451+ bool Changed = false ;
452+ if (EnableMem2RegInterleaving) {
453+ Changed = AlwaysInlineInterleavedMem2RegImpl (M, InsertLifetime, PSI, FAM,
454+ GetAssumptionCache, GetAAR);
455+ } else {
456+ Changed = AlwaysInlineImpl (M, InsertLifetime, PSI, &FAM, GetAssumptionCache,
457+ GetAAR);
458+ }
182459 if (!Changed)
183460 return PreservedAnalyses::all ();
184461
0 commit comments