4343#include " llvm/Analysis/CallGraph.h"
4444#include " llvm/Analysis/TargetTransformInfo.h"
4545#include " llvm/IR/Function.h"
46+ #include " llvm/IR/InstIterator.h"
4647#include " llvm/IR/Instruction.h"
4748#include " llvm/IR/Module.h"
4849#include " llvm/IR/User.h"
@@ -103,6 +104,11 @@ static cl::opt<bool> NoExternalizeGlobals(
103104 cl::desc (" disables externalization of global variable with local linkage; "
104105 " may cause globals to be duplicated which increases binary size" ));
105106
107+ static cl::opt<bool > NoExternalizeOnAddrTaken (
108+ " amdgpu-module-splitting-no-externalize-address-taken" , cl::Hidden,
109+ cl::desc (
110+ " disables externalization of functions whose addresses are taken" ));
111+
106112static cl::opt<std::string>
107113 ModuleDotCfgOutput (" amdgpu-module-splitting-print-module-dotcfg" ,
108114 cl::Hidden,
@@ -482,6 +488,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
482488 dbgs ()
483489 << " [build graph] constructing graph representation of the input\n " );
484490
491+ // FIXME(?): Is the callgraph really worth using if we have to iterate the
492+ // function again whenever it fails to give us enough information?
493+
485494 // We build the graph by just iterating all functions in the module and
486495 // working on their direct callees. At the end, all nodes should be linked
487496 // together as expected.
@@ -492,29 +501,52 @@ void SplitGraph::buildGraph(CallGraph &CG) {
492501 continue ;
493502
494503 // Look at direct callees and create the necessary edges in the graph.
495- bool HasIndirectCall = false ;
496- Node &N = getNode (Cache, Fn) ;
504+ SetVector< const Function *> DirectCallees ;
505+ bool CallsExternal = false ;
497506 for (auto &CGEntry : *CG[&Fn]) {
498507 auto *CGNode = CGEntry.second ;
499- auto *Callee = CGNode->getFunction ();
500- if (!Callee) {
501- // TODO: Don't consider inline assembly as indirect calls.
502- if (CGNode == CG.getCallsExternalNode ())
503- HasIndirectCall = true ;
504- continue ;
505- }
506-
507- if (!Callee->isDeclaration ())
508- createEdge (N, getNode (Cache, *Callee), EdgeKind::DirectCall);
508+ if (auto *Callee = CGNode->getFunction ()) {
509+ if (!Callee->isDeclaration ())
510+ DirectCallees.insert (Callee);
511+ } else if (CGNode == CG.getCallsExternalNode ())
512+ CallsExternal = true ;
509513 }
510514
511515 // Keep track of this function if it contains an indirect call and/or if it
512516 // can be indirectly called.
513- if (HasIndirectCall) {
514- LLVM_DEBUG (dbgs () << " indirect call found in " << Fn.getName () << " \n " );
515- FnsWithIndirectCalls.push_back (&Fn);
517+ if (CallsExternal) {
518+ LLVM_DEBUG (dbgs () << " [!] callgraph is incomplete for " ;
519+ Fn.printAsOperand (dbgs ());
520+ dbgs () << " - analyzing function\n " );
521+
522+ bool HasIndirectCall = false ;
523+ for (const auto &Inst : instructions (Fn)) {
524+ // look at all calls without a direct callee.
525+ if (const auto *CB = dyn_cast<CallBase>(&Inst);
526+ CB && !CB->getCalledFunction ()) {
527+ // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
528+ // true.
529+ if (CB->isInlineAsm ()) {
530+ LLVM_DEBUG (dbgs () << " found inline assembly\n " );
531+ continue ;
532+ }
533+
534+ // everything else is handled conservatively.
535+ HasIndirectCall = true ;
536+ break ;
537+ }
538+ }
539+
540+ if (HasIndirectCall) {
541+ LLVM_DEBUG (dbgs () << " indirect call found\n " );
542+ FnsWithIndirectCalls.push_back (&Fn);
543+ }
516544 }
517545
546+ Node &N = getNode (Cache, Fn);
547+ for (const auto *Callee : DirectCallees)
548+ createEdge (N, getNode (Cache, *Callee), EdgeKind::DirectCall);
549+
518550 if (canBeIndirectlyCalled (Fn))
519551 IndirectlyCallableFns.push_back (&Fn);
520552 }
@@ -1326,13 +1358,21 @@ static void splitAMDGPUModule(
13261358 //
13271359 // Additionally, it guides partitioning to not duplicate this function if it's
13281360 // called directly at some point.
1329- for (auto &Fn : M) {
1330- if (Fn.hasAddressTaken ()) {
1331- if (Fn.hasLocalLinkage ()) {
1332- LLVM_DEBUG (dbgs () << " [externalize] " << Fn.getName ()
1333- << " because its address is taken\n " );
1361+ //
1362+ // TODO: Could we be smarter about this ? This makes all functions whose
1363+ // addresses are taken non-copyable. We should probably model this type of
1364+ // constraint in the graph and use it to guide splitting, instead of
1365+ // externalizing like this. Maybe non-copyable should really mean "keep one
1366+ // visible copy, then internalize all other copies" for some functions?
1367+ if (!NoExternalizeOnAddrTaken) {
1368+ for (auto &Fn : M) {
1369+ // TODO: Should aliases count? Probably not but they're so rare I'm not
1370+ // sure it's worth fixing.
1371+ if (Fn.hasLocalLinkage () && Fn.hasAddressTaken ()) {
1372+ LLVM_DEBUG (dbgs () << " [externalize] " ; Fn.printAsOperand (dbgs ());
1373+ dbgs () << " because its address is taken\n " );
1374+ externalize (Fn);
13341375 }
1335- externalize (Fn);
13361376 }
13371377 }
13381378
@@ -1368,7 +1408,8 @@ static void splitAMDGPUModule(
13681408 dbgs () << " [graph] nodes:\n " ;
13691409 for (const SplitGraph::Node *N : SG.nodes ()) {
13701410 dbgs () << " - [" << N->getID () << " ]: " << N->getName () << " "
1371- << (N->isGraphEntryPoint () ? " (entry)" : " " ) << " \n " ;
1411+ << (N->isGraphEntryPoint () ? " (entry)" : " " ) << " "
1412+ << (N->isNonCopyable () ? " (noncopyable)" : " " ) << " \n " ;
13721413 }
13731414 });
13741415
0 commit comments