Skip to content

Commit d322ebd

Browse files
[MemProf] Suppress duplicate clones in the LTO backend (#161551)
In some cases due to phase ordering issues with re-cloning during function assignment, we may end up with duplicate clones in the summaries (calling the same set of callee clones and/or allocation hints). Ideally we would fix this in the thin link, but for now, detect and suppress these in the LTO backend. In order to satisfy possibly cross-module references, make each duplicate an alias to the first identical copy, which gets materialized. This reduces ThinLTO backend compile times.
1 parent 4368616 commit d322ebd

File tree

3 files changed

+285
-41
lines changed

3 files changed

+285
-41
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 142 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/ADT/SmallSet.h"
3030
#include "llvm/ADT/SmallVector.h"
3131
#include "llvm/ADT/Statistic.h"
32+
#include "llvm/ADT/StringExtras.h"
3233
#include "llvm/Analysis/MemoryProfileInfo.h"
3334
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
3435
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -40,6 +41,7 @@
4041
#include "llvm/Support/CommandLine.h"
4142
#include "llvm/Support/GraphWriter.h"
4243
#include "llvm/Support/InterleavedRange.h"
44+
#include "llvm/Support/SHA1.h"
4345
#include "llvm/Support/raw_ostream.h"
4446
#include "llvm/Transforms/IPO.h"
4547
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
6062
"Number of function clones created during ThinLTO backend");
6163
STATISTIC(FunctionsClonedThinBackend,
6264
"Number of functions that had clones created during ThinLTO backend");
65+
STATISTIC(
66+
FunctionCloneDuplicatesThinBackend,
67+
"Number of function clone duplicates detected during ThinLTO backend");
6368
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
6469
"cloned) during whole program analysis");
6570
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
@@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
51865191
return Changed;
51875192
}
51885193

5194+
// Compute a SHA1 hash of the callsite and alloc version information of clone I
5195+
// in the summary, to use in detection of duplicate clones.
5196+
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) {
5197+
SHA1 Hasher;
5198+
// Update hash with any callsites that call non-default (non-zero) callee
5199+
// versions.
5200+
for (auto &SN : FS->callsites()) {
5201+
// In theory all callsites and allocs in this function should have the same
5202+
// number of clone entries, but handle any discrepancies gracefully below
5203+
// for NDEBUG builds.
5204+
assert(
5205+
SN.Clones.size() > I &&
5206+
"Callsite summary has fewer entries than other summaries in function");
5207+
if (SN.Clones.size() <= I || !SN.Clones[I])
5208+
continue;
5209+
uint8_t Data[sizeof(SN.Clones[I])];
5210+
support::endian::write32le(Data, SN.Clones[I]);
5211+
Hasher.update(Data);
5212+
}
5213+
// Update hash with any allocs that have non-default (non-None) hints.
5214+
for (auto &AN : FS->allocs()) {
5215+
// In theory all callsites and allocs in this function should have the same
5216+
// number of clone entries, but handle any discrepancies gracefully below
5217+
// for NDEBUG builds.
5218+
assert(AN.Versions.size() > I &&
5219+
"Alloc summary has fewer entries than other summaries in function");
5220+
if (AN.Versions.size() <= I ||
5221+
(AllocationType)AN.Versions[I] == AllocationType::None)
5222+
continue;
5223+
Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
5224+
}
5225+
return support::endian::read64le(Hasher.result().data());
5226+
}
5227+
51895228
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
51905229
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
51915230
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5192-
&FuncToAliasMap) {
5231+
&FuncToAliasMap,
5232+
FunctionSummary *FS) {
5233+
auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
5234+
// We might have created this when adjusting callsite in another
5235+
// function. It should be a declaration.
5236+
assert(DeclGV->isDeclaration());
5237+
NewGV->takeName(DeclGV);
5238+
DeclGV->replaceAllUsesWith(NewGV);
5239+
DeclGV->eraseFromParent();
5240+
};
5241+
5242+
// Handle aliases to this function, and create analogous alias clones to the
5243+
// provided clone of this function.
5244+
auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
5245+
if (!FuncToAliasMap.count(&F))
5246+
return;
5247+
for (auto *A : FuncToAliasMap[&F]) {
5248+
std::string AliasName = getMemProfFuncName(A->getName(), I);
5249+
auto *PrevA = M.getNamedAlias(AliasName);
5250+
auto *NewA = GlobalAlias::create(A->getValueType(),
5251+
A->getType()->getPointerAddressSpace(),
5252+
A->getLinkage(), AliasName, NewF);
5253+
NewA->copyAttributesFrom(A);
5254+
if (PrevA)
5255+
TakeDeclNameAndReplace(PrevA, NewA);
5256+
}
5257+
};
5258+
51935259
// The first "clone" is the original copy, we should only call this if we
51945260
// needed to create new clones.
51955261
assert(NumClones > 1);
51965262
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
51975263
VMaps.reserve(NumClones - 1);
51985264
FunctionsClonedThinBackend++;
5265+
5266+
// Map of hash of callsite/alloc versions to the instantiated function clone
5267+
// (possibly the original) implementing those calls. Used to avoid
5268+
// instantiating duplicate function clones.
5269+
// FIXME: Ideally the thin link would not generate such duplicate clones to
5270+
// start with, but right now it happens due to phase ordering in the function
5271+
// assignment and possible new clones that produces. We simply make each
5272+
// duplicate an alias to the matching instantiated clone recorded in the map
5273+
// (except for available_externally which are made declarations as they would
5274+
// be aliases in the prevailing module, and available_externally aliases are
5275+
// not well supported right now).
5276+
DenseMap<uint64_t, Function *> HashToFunc;
5277+
5278+
// Save the hash of the original function version.
5279+
HashToFunc[ComputeHash(FS, 0)] = &F;
5280+
51995281
for (unsigned I = 1; I < NumClones; I++) {
52005282
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
5283+
std::string Name = getMemProfFuncName(F.getName(), I);
5284+
auto Hash = ComputeHash(FS, I);
5285+
// If this clone would duplicate a previously seen clone, don't generate the
5286+
// duplicate clone body, just make an alias to satisfy any (potentially
5287+
// cross-module) references.
5288+
if (HashToFunc.contains(Hash)) {
5289+
FunctionCloneDuplicatesThinBackend++;
5290+
auto *Func = HashToFunc[Hash];
5291+
if (Func->hasAvailableExternallyLinkage()) {
5292+
// Skip these as EliminateAvailableExternallyPass does not handle
5293+
// available_externally aliases correctly and we end up with an
5294+
// available_externally alias to a declaration. Just create a
5295+
// declaration for now as we know we will have a definition in another
5296+
// module.
5297+
auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5298+
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
5299+
<< "created clone decl " << ore::NV("Decl", Decl.getCallee()));
5300+
continue;
5301+
}
5302+
auto *PrevF = M.getFunction(Name);
5303+
auto *Alias = GlobalAlias::create(Name, Func);
5304+
if (PrevF)
5305+
TakeDeclNameAndReplace(PrevF, Alias);
5306+
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
5307+
<< "created clone alias " << ore::NV("Alias", Alias));
5308+
5309+
// Now handle aliases to this function, and clone those as well.
5310+
CloneFuncAliases(Func, I);
5311+
continue;
5312+
}
52015313
auto *NewF = CloneFunction(&F, *VMaps.back());
5314+
HashToFunc[Hash] = NewF;
52025315
FunctionClonesThinBackend++;
52035316
// Strip memprof and callsite metadata from clone as they are no longer
52045317
// needed.
@@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
52085321
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
52095322
}
52105323
}
5211-
std::string Name = getMemProfFuncName(F.getName(), I);
52125324
auto *PrevF = M.getFunction(Name);
5213-
if (PrevF) {
5214-
// We might have created this when adjusting callsite in another
5215-
// function. It should be a declaration.
5216-
assert(PrevF->isDeclaration());
5217-
NewF->takeName(PrevF);
5218-
PrevF->replaceAllUsesWith(NewF);
5219-
PrevF->eraseFromParent();
5220-
} else
5325+
if (PrevF)
5326+
TakeDeclNameAndReplace(PrevF, NewF);
5327+
else
52215328
NewF->setName(Name);
52225329
updateSubprogramLinkageName(NewF, Name);
52235330
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
52245331
<< "created clone " << ore::NV("NewFunction", NewF));
52255332

52265333
// Now handle aliases to this function, and clone those as well.
5227-
if (!FuncToAliasMap.count(&F))
5228-
continue;
5229-
for (auto *A : FuncToAliasMap[&F]) {
5230-
std::string Name = getMemProfFuncName(A->getName(), I);
5231-
auto *PrevA = M.getNamedAlias(Name);
5232-
auto *NewA = GlobalAlias::create(A->getValueType(),
5233-
A->getType()->getPointerAddressSpace(),
5234-
A->getLinkage(), Name, NewF);
5235-
NewA->copyAttributesFrom(A);
5236-
if (PrevA) {
5237-
// We might have created this when adjusting callsite in another
5238-
// function. It should be a declaration.
5239-
assert(PrevA->isDeclaration());
5240-
NewA->takeName(PrevA);
5241-
PrevA->replaceAllUsesWith(NewA);
5242-
PrevA->eraseFromParent();
5243-
}
5244-
}
5334+
CloneFuncAliases(NewF, I);
52455335
}
52465336
return VMaps;
52475337
}
@@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54015491
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
54025492
bool ClonesCreated = false;
54035493
unsigned NumClonesCreated = 0;
5404-
auto CloneFuncIfNeeded = [&](unsigned NumClones) {
5494+
auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
54055495
// We should at least have version 0 which is the original copy.
54065496
assert(NumClones > 0);
54075497
// If only one copy needed use original.
@@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54155505
assert(NumClonesCreated == NumClones);
54165506
return;
54175507
}
5418-
VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
5508+
VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
54195509
// The first "clone" is the original copy, which doesn't have a VMap.
54205510
assert(VMaps.size() == NumClones - 1);
54215511
Changed = true;
@@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54245514
};
54255515

54265516
auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
5427-
Function *CalledFunction) {
5517+
Function *CalledFunction, FunctionSummary *FS) {
54285518
// Perform cloning if not yet done.
5429-
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
5519+
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);
54305520

54315521
assert(!isMemProfClone(*CalledFunction));
54325522

@@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
54485538
// below.
54495539
auto CalleeOrigName = CalledFunction->getName();
54505540
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
5541+
// If the VMap is empty, this clone was a duplicate of another and was
5542+
// created as an alias or a declaration.
5543+
if (J > 0 && VMaps[J - 1]->empty())
5544+
continue;
54515545
// Do nothing if this version calls the original version of its
54525546
// callee.
54535547
if (!StackNode.Clones[J])
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
55915685
#endif
55925686

55935687
// Perform cloning if not yet done.
5594-
CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
5688+
CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);
55955689

55965690
OrigAllocsThinBackend++;
55975691
AllocVersionsThinBackend += AllocNode.Versions.size();
@@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
56245718

56255719
// Update the allocation types per the summary info.
56265720
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5721+
// If the VMap is empty, this clone was a duplicate of another and
5722+
// was created as an alias or a declaration.
5723+
if (J > 0 && VMaps[J - 1]->empty())
5724+
continue;
56275725
// Ignore any that didn't get an assigned allocation type.
56285726
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
56295727
continue;
@@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
56705768
// we don't need to do ICP, but might need to clone this
56715769
// function as it is the target of other cloned calls.
56725770
if (NumClones)
5673-
CloneFuncIfNeeded(NumClones);
5771+
CloneFuncIfNeeded(NumClones, FS);
56745772
}
56755773

56765774
else {
@@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
56905788
}
56915789
#endif
56925790

5693-
CloneCallsite(StackNode, CB, CalledFunction);
5791+
CloneCallsite(StackNode, CB, CalledFunction, FS);
56945792
}
56955793
} else if (CB->isTailCall() && CalledFunction) {
56965794
// Locate the synthesized callsite info for the callee VI, if any was
@@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
57005798
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
57015799
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
57025800
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
5703-
CloneCallsite(Callsite->second, CB, CalledFunction);
5801+
CloneCallsite(Callsite->second, CB, CalledFunction, FS);
57045802
}
57055803
}
57065804
}
@@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP(
58465944
// check.
58475945
CallBase *CBClone = CB;
58485946
for (unsigned J = 0; J < NumClones; J++) {
5947+
// If the VMap is empty, this clone was a duplicate of another and was
5948+
// created as an alias or a declaration.
5949+
if (J > 0 && VMaps[J - 1]->empty())
5950+
continue;
58495951
// Copy 0 is the original function.
58505952
if (J > 0)
58515953
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
@@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP(
58915993
// TotalCount and the number promoted.
58925994
CallBase *CBClone = CB;
58935995
for (unsigned J = 0; J < NumClones; J++) {
5996+
// If the VMap is empty, this clone was a duplicate of another and was
5997+
// created as an alias or a declaration.
5998+
if (J > 0 && VMaps[J - 1]->empty())
5999+
continue;
58946000
// Copy 0 is the original function.
58956001
if (J > 0)
58966002
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);

0 commit comments

Comments
 (0)