2929#include " llvm/ADT/SmallSet.h"
3030#include " llvm/ADT/SmallVector.h"
3131#include " llvm/ADT/Statistic.h"
32+ #include " llvm/ADT/StringExtras.h"
3233#include " llvm/Analysis/MemoryProfileInfo.h"
3334#include " llvm/Analysis/ModuleSummaryAnalysis.h"
3435#include " llvm/Analysis/OptimizationRemarkEmitter.h"
4041#include " llvm/Support/CommandLine.h"
4142#include " llvm/Support/GraphWriter.h"
4243#include " llvm/Support/InterleavedRange.h"
44+ #include " llvm/Support/SHA1.h"
4345#include " llvm/Support/raw_ostream.h"
4446#include " llvm/Transforms/IPO.h"
4547#include " llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
6062 " Number of function clones created during ThinLTO backend" );
6163STATISTIC (FunctionsClonedThinBackend,
6264 " Number of functions that had clones created during ThinLTO backend" );
65+ STATISTIC (
66+ FunctionCloneDuplicatesThinBackend,
67+ " Number of function clone duplicates detected during ThinLTO backend" );
6368STATISTIC (AllocTypeNotCold, " Number of not cold static allocations (possibly "
6469 " cloned) during whole program analysis" );
6570STATISTIC (AllocTypeCold, " Number of cold static allocations (possibly cloned) "
@@ -5186,19 +5191,129 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
51865191 return Changed;
51875192}
51885193
5194+ // Compute a SHA1 hash of the callsite and alloc version information of clone I
5195+ // in the summary, to use in detection of duplicate clones.
5196+ std::string ComputeHash (StringMap<Function *> &HashToFunc, FunctionSummary *FS,
5197+ unsigned I) {
5198+ SHA1 Hasher;
5199+ // Update hash with any callsites that call non-default (non-zero) callee
5200+ // versions.
5201+ for (auto &SN : FS->callsites ()) {
5202+ // In theory all callsites and allocs in this function should have the same
5203+ // number of clone entries, but handle any discrepancies gracefully below
5204+ // for NDEBUG builds.
5205+ assert (
5206+ SN.Clones .size () > I &&
5207+ " Callsite summary has fewer entries than other summaries in function" );
5208+ if (SN.Clones .size () <= I || !SN.Clones [I])
5209+ continue ;
5210+ uint8_t Data[4 ];
5211+ support::endian::write32le (Data, SN.Clones [I]);
5212+ Hasher.update (Data);
5213+ }
5214+ // Update hash with any allocs that have non-default (non-None) hints.
5215+ for (auto &AN : FS->allocs ()) {
5216+ // In theory all callsites and allocs in this function should have the same
5217+ // number of clone entries, but handle any discrepancies gracefully below
5218+ // for NDEBUG builds.
5219+ assert (AN.Versions .size () > I &&
5220+ " Alloc summary has fewer entries than other summaries in function" );
5221+ if (AN.Versions .size () <= I ||
5222+ (AllocationType)AN.Versions [I] == AllocationType::None)
5223+ continue ;
5224+ Hasher.update (ArrayRef<uint8_t >(&AN.Versions [I], 1 ));
5225+ }
5226+ return toHex (Hasher.result ());
5227+ }
5228+
51895229static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4 > createFunctionClones (
51905230 Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
51915231 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1 >>
5192- &FuncToAliasMap) {
5232+ &FuncToAliasMap,
5233+ FunctionSummary *FS) {
5234+ auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
5235+ // We might have created this when adjusting callsite in another
5236+ // function. It should be a declaration.
5237+ assert (DeclGV->isDeclaration ());
5238+ NewGV->takeName (DeclGV);
5239+ DeclGV->replaceAllUsesWith (NewGV);
5240+ DeclGV->eraseFromParent ();
5241+ };
5242+
5243+ // Handle aliases to this function, and create analogous alias clones to the
5244+ // provided clone of this function.
5245+ auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
5246+ if (!FuncToAliasMap.count (&F))
5247+ return ;
5248+ for (auto *A : FuncToAliasMap[&F]) {
5249+ std::string AliasName = getMemProfFuncName (A->getName (), I);
5250+ auto *PrevA = M.getNamedAlias (AliasName);
5251+ auto *NewA = GlobalAlias::create (A->getValueType (),
5252+ A->getType ()->getPointerAddressSpace (),
5253+ A->getLinkage (), AliasName, NewF);
5254+ NewA->copyAttributesFrom (A);
5255+ if (PrevA)
5256+ TakeDeclNameAndReplace (PrevA, NewA);
5257+ }
5258+ };
5259+
51935260 // The first "clone" is the original copy, we should only call this if we
51945261 // needed to create new clones.
51955262 assert (NumClones > 1 );
51965263 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4 > VMaps;
51975264 VMaps.reserve (NumClones - 1 );
51985265 FunctionsClonedThinBackend++;
5266+
5267+ // Map of hash of callsite/alloc versions to the instantiated function clone
5268+ // (possibly the original) implementing those calls. Used to avoid
5269+ // instantiating duplicate function clones.
5270+ // FIXME: Ideally the thin link would not generate such duplicate clones to
5271+ // start with, but right now it happens due to phase ordering in the function
5272+ // assignment and possible new clones that produces. We simply make each
5273+ // duplicate an alias to the matching instantiated clone recorded in the map
5274+ // (except for available_externally which are made declarations as they would
5275+ // be aliases in the prevailing module, and available_externally aliases are
5276+ // not well supported right now).
5277+ StringMap<Function *> HashToFunc;
5278+
5279+ // Save the hash of the original function version.
5280+ auto Hash = ComputeHash (HashToFunc, FS, 0 );
5281+ HashToFunc[Hash] = &F;
5282+
51995283 for (unsigned I = 1 ; I < NumClones; I++) {
52005284 VMaps.emplace_back (std::make_unique<ValueToValueMapTy>());
5285+ std::string Name = getMemProfFuncName (F.getName (), I);
5286+ auto Hash = ComputeHash (HashToFunc, FS, I);
5287+ // If this clone would duplicate a previously seen clone, don't generate the
5288+ // duplicate clone body, just make an alias to satisfy any (potentially
5289+ // cross-module) references.
5290+ if (HashToFunc.contains (Hash)) {
5291+ FunctionCloneDuplicatesThinBackend++;
5292+ auto *Func = HashToFunc[Hash];
5293+ if (Func->hasAvailableExternallyLinkage ()) {
5294+ // Skip these as EliminateAvailableExternallyPass does not handle
5295+ // available_externally aliases correctly and we end up with an
5296+ // available_externally alias to a declaration. Just create a
5297+ // declaration for now as we know we will have a definition in another
5298+ // module.
5299+ auto Decl = M.getOrInsertFunction (Name, Func->getFunctionType ());
5300+ ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemprofClone" , &F)
5301+ << " created clone decl " << ore::NV (" Decl" , Decl.getCallee ()));
5302+ continue ;
5303+ }
5304+ auto *PrevF = M.getFunction (Name);
5305+ auto *Alias = GlobalAlias::create (Name, Func);
5306+ if (PrevF)
5307+ TakeDeclNameAndReplace (PrevF, Alias);
5308+ ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemprofClone" , &F)
5309+ << " created clone alias " << ore::NV (" Alias" , Alias));
5310+
5311+ // Now handle aliases to this function, and clone those as well.
5312+ CloneFuncAliases (Func, I);
5313+ continue ;
5314+ }
52015315 auto *NewF = CloneFunction (&F, *VMaps.back ());
5316+ HashToFunc[Hash] = NewF;
52025317 FunctionClonesThinBackend++;
52035318 // Strip memprof and callsite metadata from clone as they are no longer
52045319 // needed.
@@ -5208,40 +5323,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
52085323 Inst.setMetadata (LLVMContext::MD_callsite, nullptr );
52095324 }
52105325 }
5211- std::string Name = getMemProfFuncName (F.getName (), I);
52125326 auto *PrevF = M.getFunction (Name);
5213- if (PrevF) {
5214- // We might have created this when adjusting callsite in another
5215- // function. It should be a declaration.
5216- assert (PrevF->isDeclaration ());
5217- NewF->takeName (PrevF);
5218- PrevF->replaceAllUsesWith (NewF);
5219- PrevF->eraseFromParent ();
5220- } else
5327+ if (PrevF)
5328+ TakeDeclNameAndReplace (PrevF, NewF);
5329+ else
52215330 NewF->setName (Name);
52225331 updateSubprogramLinkageName (NewF, Name);
52235332 ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemprofClone" , &F)
52245333 << " created clone " << ore::NV (" NewFunction" , NewF));
52255334
52265335 // Now handle aliases to this function, and clone those as well.
5227- if (!FuncToAliasMap.count (&F))
5228- continue ;
5229- for (auto *A : FuncToAliasMap[&F]) {
5230- std::string Name = getMemProfFuncName (A->getName (), I);
5231- auto *PrevA = M.getNamedAlias (Name);
5232- auto *NewA = GlobalAlias::create (A->getValueType (),
5233- A->getType ()->getPointerAddressSpace (),
5234- A->getLinkage (), Name, NewF);
5235- NewA->copyAttributesFrom (A);
5236- if (PrevA) {
5237- // We might have created this when adjusting callsite in another
5238- // function. It should be a declaration.
5239- assert (PrevA->isDeclaration ());
5240- NewA->takeName (PrevA);
5241- PrevA->replaceAllUsesWith (NewA);
5242- PrevA->eraseFromParent ();
5243- }
5244- }
5336+ CloneFuncAliases (NewF, I);
52455337 }
52465338 return VMaps;
52475339}
@@ -5401,7 +5493,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54015493 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4 > VMaps;
54025494 bool ClonesCreated = false ;
54035495 unsigned NumClonesCreated = 0 ;
5404- auto CloneFuncIfNeeded = [&](unsigned NumClones) {
5496+ auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS ) {
54055497 // We should at least have version 0 which is the original copy.
54065498 assert (NumClones > 0 );
54075499 // If only one copy needed use original.
@@ -5415,7 +5507,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54155507 assert (NumClonesCreated == NumClones);
54165508 return ;
54175509 }
5418- VMaps = createFunctionClones (F, NumClones, M, ORE, FuncToAliasMap);
5510+ VMaps = createFunctionClones (F, NumClones, M, ORE, FuncToAliasMap, FS );
54195511 // The first "clone" is the original copy, which doesn't have a VMap.
54205512 assert (VMaps.size () == NumClones - 1 );
54215513 Changed = true ;
@@ -5424,9 +5516,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54245516 };
54255517
54265518 auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
5427- Function *CalledFunction) {
5519+ Function *CalledFunction, FunctionSummary *FS ) {
54285520 // Perform cloning if not yet done.
5429- CloneFuncIfNeeded (/* NumClones=*/ StackNode.Clones .size ());
5521+ CloneFuncIfNeeded (/* NumClones=*/ StackNode.Clones .size (), FS );
54305522
54315523 assert (!isMemProfClone (*CalledFunction));
54325524
@@ -5448,6 +5540,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54485540 // below.
54495541 auto CalleeOrigName = CalledFunction->getName ();
54505542 for (unsigned J = 0 ; J < StackNode.Clones .size (); J++) {
5543+ if (J > 0 && VMaps[J - 1 ]->empty ())
5544+ continue ;
54515545 // Do nothing if this version calls the original version of its
54525546 // callee.
54535547 if (!StackNode.Clones [J])
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
55915685#endif
55925686
55935687 // Perform cloning if not yet done.
5594- CloneFuncIfNeeded (/* NumClones=*/ AllocNode.Versions .size ());
5688+ CloneFuncIfNeeded (/* NumClones=*/ AllocNode.Versions .size (), FS );
55955689
55965690 OrigAllocsThinBackend++;
55975691 AllocVersionsThinBackend += AllocNode.Versions .size ();
@@ -5624,6 +5718,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
56245718
56255719 // Update the allocation types per the summary info.
56265720 for (unsigned J = 0 ; J < AllocNode.Versions .size (); J++) {
5721+ if (J > 0 && VMaps[J - 1 ]->empty ())
5722+ continue ;
56275723 // Ignore any that didn't get an assigned allocation type.
56285724 if (AllocNode.Versions [J] == (uint8_t )AllocationType::None)
56295725 continue ;
@@ -5671,7 +5767,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
56715767 // we don't need to do ICP, but might need to clone this
56725768 // function as it is the target of other cloned calls.
56735769 if (NumClones)
5674- CloneFuncIfNeeded (NumClones);
5770+ CloneFuncIfNeeded (NumClones, FS );
56755771 }
56765772
56775773 else {
@@ -5691,7 +5787,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
56915787 }
56925788#endif
56935789
5694- CloneCallsite (StackNode, CB, CalledFunction);
5790+ CloneCallsite (StackNode, CB, CalledFunction, FS );
56955791 }
56965792 } else if (CB->isTailCall () && CalledFunction) {
56975793 // Locate the synthesized callsite info for the callee VI, if any was
@@ -5701,7 +5797,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
57015797 if (CalleeVI && MapTailCallCalleeVIToCallsite.count (CalleeVI)) {
57025798 auto Callsite = MapTailCallCalleeVIToCallsite.find (CalleeVI);
57035799 assert (Callsite != MapTailCallCalleeVIToCallsite.end ());
5704- CloneCallsite (Callsite->second , CB, CalledFunction);
5800+ CloneCallsite (Callsite->second , CB, CalledFunction, FS );
57055801 }
57065802 }
57075803 }
@@ -5847,6 +5943,8 @@ void MemProfContextDisambiguation::performICP(
58475943 // check.
58485944 CallBase *CBClone = CB;
58495945 for (unsigned J = 0 ; J < NumClones; J++) {
5946+ if (J > 0 && VMaps[J - 1 ]->empty ())
5947+ continue ;
58505948 // Copy 0 is the original function.
58515949 if (J > 0 )
58525950 CBClone = cast<CallBase>((*VMaps[J - 1 ])[CB]);
@@ -5892,6 +5990,8 @@ void MemProfContextDisambiguation::performICP(
58925990 // TotalCount and the number promoted.
58935991 CallBase *CBClone = CB;
58945992 for (unsigned J = 0 ; J < NumClones; J++) {
5993+ if (J > 0 && VMaps[J - 1 ]->empty ())
5994+ continue ;
58955995 // Copy 0 is the original function.
58965996 if (J > 0 )
58975997 CBClone = cast<CallBase>((*VMaps[J - 1 ])[CB]);
0 commit comments