29
29
#include " llvm/ADT/SmallSet.h"
30
30
#include " llvm/ADT/SmallVector.h"
31
31
#include " llvm/ADT/Statistic.h"
32
+ #include " llvm/ADT/StringExtras.h"
32
33
#include " llvm/Analysis/MemoryProfileInfo.h"
33
34
#include " llvm/Analysis/ModuleSummaryAnalysis.h"
34
35
#include " llvm/Analysis/OptimizationRemarkEmitter.h"
40
41
#include " llvm/Support/CommandLine.h"
41
42
#include " llvm/Support/GraphWriter.h"
42
43
#include " llvm/Support/InterleavedRange.h"
44
+ #include " llvm/Support/SHA1.h"
43
45
#include " llvm/Support/raw_ostream.h"
44
46
#include " llvm/Transforms/IPO.h"
45
47
#include " llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
60
62
" Number of function clones created during ThinLTO backend" );
61
63
STATISTIC (FunctionsClonedThinBackend,
62
64
" Number of functions that had clones created during ThinLTO backend" );
65
+ STATISTIC (
66
+ FunctionCloneDuplicatesThinBackend,
67
+ " Number of function clone duplicates detected during ThinLTO backend" );
63
68
STATISTIC (AllocTypeNotCold, " Number of not cold static allocations (possibly "
64
69
" cloned) during whole program analysis" );
65
70
STATISTIC (AllocTypeCold, " Number of cold static allocations (possibly cloned) "
@@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
5186
5191
return Changed;
5187
5192
}
5188
5193
5194
+ // Compute a SHA1 hash of the callsite and alloc version information of clone I
5195
+ // in the summary, to use in detection of duplicate clones.
5196
+ uint64_t ComputeHash (const FunctionSummary *FS, unsigned I) {
5197
+ SHA1 Hasher;
5198
+ // Update hash with any callsites that call non-default (non-zero) callee
5199
+ // versions.
5200
+ for (auto &SN : FS->callsites ()) {
5201
+ // In theory all callsites and allocs in this function should have the same
5202
+ // number of clone entries, but handle any discrepancies gracefully below
5203
+ // for NDEBUG builds.
5204
+ assert (
5205
+ SN.Clones .size () > I &&
5206
+ " Callsite summary has fewer entries than other summaries in function" );
5207
+ if (SN.Clones .size () <= I || !SN.Clones [I])
5208
+ continue ;
5209
+ uint8_t Data[sizeof (SN.Clones [I])];
5210
+ support::endian::write32le (Data, SN.Clones [I]);
5211
+ Hasher.update (Data);
5212
+ }
5213
+ // Update hash with any allocs that have non-default (non-None) hints.
5214
+ for (auto &AN : FS->allocs ()) {
5215
+ // In theory all callsites and allocs in this function should have the same
5216
+ // number of clone entries, but handle any discrepancies gracefully below
5217
+ // for NDEBUG builds.
5218
+ assert (AN.Versions .size () > I &&
5219
+ " Alloc summary has fewer entries than other summaries in function" );
5220
+ if (AN.Versions .size () <= I ||
5221
+ (AllocationType)AN.Versions [I] == AllocationType::None)
5222
+ continue ;
5223
+ Hasher.update (ArrayRef<uint8_t >(&AN.Versions [I], 1 ));
5224
+ }
5225
+ return support::endian::read64le (Hasher.result ().data ());
5226
+ }
5227
+
5189
5228
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4 > createFunctionClones (
5190
5229
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
5191
5230
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1 >>
5192
- &FuncToAliasMap) {
5231
+ &FuncToAliasMap,
5232
+ FunctionSummary *FS) {
5233
+ auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
5234
+ // We might have created this when adjusting callsite in another
5235
+ // function. It should be a declaration.
5236
+ assert (DeclGV->isDeclaration ());
5237
+ NewGV->takeName (DeclGV);
5238
+ DeclGV->replaceAllUsesWith (NewGV);
5239
+ DeclGV->eraseFromParent ();
5240
+ };
5241
+
5242
+ // Handle aliases to this function, and create analogous alias clones to the
5243
+ // provided clone of this function.
5244
+ auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
5245
+ if (!FuncToAliasMap.count (&F))
5246
+ return ;
5247
+ for (auto *A : FuncToAliasMap[&F]) {
5248
+ std::string AliasName = getMemProfFuncName (A->getName (), I);
5249
+ auto *PrevA = M.getNamedAlias (AliasName);
5250
+ auto *NewA = GlobalAlias::create (A->getValueType (),
5251
+ A->getType ()->getPointerAddressSpace (),
5252
+ A->getLinkage (), AliasName, NewF);
5253
+ NewA->copyAttributesFrom (A);
5254
+ if (PrevA)
5255
+ TakeDeclNameAndReplace (PrevA, NewA);
5256
+ }
5257
+ };
5258
+
5193
5259
// The first "clone" is the original copy, we should only call this if we
5194
5260
// needed to create new clones.
5195
5261
assert (NumClones > 1 );
5196
5262
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4 > VMaps;
5197
5263
VMaps.reserve (NumClones - 1 );
5198
5264
FunctionsClonedThinBackend++;
5265
+
5266
+ // Map of hash of callsite/alloc versions to the instantiated function clone
5267
+ // (possibly the original) implementing those calls. Used to avoid
5268
+ // instantiating duplicate function clones.
5269
+ // FIXME: Ideally the thin link would not generate such duplicate clones to
5270
+ // start with, but right now it happens due to phase ordering in the function
5271
+ // assignment and possible new clones that produces. We simply make each
5272
+ // duplicate an alias to the matching instantiated clone recorded in the map
5273
+ // (except for available_externally which are made declarations as they would
5274
+ // be aliases in the prevailing module, and available_externally aliases are
5275
+ // not well supported right now).
5276
+ DenseMap<uint64_t , Function *> HashToFunc;
5277
+
5278
+ // Save the hash of the original function version.
5279
+ HashToFunc[ComputeHash (FS, 0 )] = &F;
5280
+
5199
5281
for (unsigned I = 1 ; I < NumClones; I++) {
5200
5282
VMaps.emplace_back (std::make_unique<ValueToValueMapTy>());
5283
+ std::string Name = getMemProfFuncName (F.getName (), I);
5284
+ auto Hash = ComputeHash (FS, I);
5285
+ // If this clone would duplicate a previously seen clone, don't generate the
5286
+ // duplicate clone body, just make an alias to satisfy any (potentially
5287
+ // cross-module) references.
5288
+ if (HashToFunc.contains (Hash)) {
5289
+ FunctionCloneDuplicatesThinBackend++;
5290
+ auto *Func = HashToFunc[Hash];
5291
+ if (Func->hasAvailableExternallyLinkage ()) {
5292
+ // Skip these as EliminateAvailableExternallyPass does not handle
5293
+ // available_externally aliases correctly and we end up with an
5294
+ // available_externally alias to a declaration. Just create a
5295
+ // declaration for now as we know we will have a definition in another
5296
+ // module.
5297
+ auto Decl = M.getOrInsertFunction (Name, Func->getFunctionType ());
5298
+ ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemprofClone" , &F)
5299
+ << " created clone decl " << ore::NV (" Decl" , Decl.getCallee ()));
5300
+ continue ;
5301
+ }
5302
+ auto *PrevF = M.getFunction (Name);
5303
+ auto *Alias = GlobalAlias::create (Name, Func);
5304
+ if (PrevF)
5305
+ TakeDeclNameAndReplace (PrevF, Alias);
5306
+ ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemprofClone" , &F)
5307
+ << " created clone alias " << ore::NV (" Alias" , Alias));
5308
+
5309
+ // Now handle aliases to this function, and clone those as well.
5310
+ CloneFuncAliases (Func, I);
5311
+ continue ;
5312
+ }
5201
5313
auto *NewF = CloneFunction (&F, *VMaps.back ());
5314
+ HashToFunc[Hash] = NewF;
5202
5315
FunctionClonesThinBackend++;
5203
5316
// Strip memprof and callsite metadata from clone as they are no longer
5204
5317
// needed.
@@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
5208
5321
Inst.setMetadata (LLVMContext::MD_callsite, nullptr );
5209
5322
}
5210
5323
}
5211
- std::string Name = getMemProfFuncName (F.getName (), I);
5212
5324
auto *PrevF = M.getFunction (Name);
5213
- if (PrevF) {
5214
- // We might have created this when adjusting callsite in another
5215
- // function. It should be a declaration.
5216
- assert (PrevF->isDeclaration ());
5217
- NewF->takeName (PrevF);
5218
- PrevF->replaceAllUsesWith (NewF);
5219
- PrevF->eraseFromParent ();
5220
- } else
5325
+ if (PrevF)
5326
+ TakeDeclNameAndReplace (PrevF, NewF);
5327
+ else
5221
5328
NewF->setName (Name);
5222
5329
updateSubprogramLinkageName (NewF, Name);
5223
5330
ORE.emit (OptimizationRemark (DEBUG_TYPE, " MemprofClone" , &F)
5224
5331
<< " created clone " << ore::NV (" NewFunction" , NewF));
5225
5332
5226
5333
// Now handle aliases to this function, and clone those as well.
5227
- if (!FuncToAliasMap.count (&F))
5228
- continue ;
5229
- for (auto *A : FuncToAliasMap[&F]) {
5230
- std::string Name = getMemProfFuncName (A->getName (), I);
5231
- auto *PrevA = M.getNamedAlias (Name);
5232
- auto *NewA = GlobalAlias::create (A->getValueType (),
5233
- A->getType ()->getPointerAddressSpace (),
5234
- A->getLinkage (), Name, NewF);
5235
- NewA->copyAttributesFrom (A);
5236
- if (PrevA) {
5237
- // We might have created this when adjusting callsite in another
5238
- // function. It should be a declaration.
5239
- assert (PrevA->isDeclaration ());
5240
- NewA->takeName (PrevA);
5241
- PrevA->replaceAllUsesWith (NewA);
5242
- PrevA->eraseFromParent ();
5243
- }
5244
- }
5334
+ CloneFuncAliases (NewF, I);
5245
5335
}
5246
5336
return VMaps;
5247
5337
}
@@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5401
5491
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4 > VMaps;
5402
5492
bool ClonesCreated = false ;
5403
5493
unsigned NumClonesCreated = 0 ;
5404
- auto CloneFuncIfNeeded = [&](unsigned NumClones) {
5494
+ auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS ) {
5405
5495
// We should at least have version 0 which is the original copy.
5406
5496
assert (NumClones > 0 );
5407
5497
// If only one copy needed use original.
@@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5415
5505
assert (NumClonesCreated == NumClones);
5416
5506
return ;
5417
5507
}
5418
- VMaps = createFunctionClones (F, NumClones, M, ORE, FuncToAliasMap);
5508
+ VMaps = createFunctionClones (F, NumClones, M, ORE, FuncToAliasMap, FS );
5419
5509
// The first "clone" is the original copy, which doesn't have a VMap.
5420
5510
assert (VMaps.size () == NumClones - 1 );
5421
5511
Changed = true ;
@@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5424
5514
};
5425
5515
5426
5516
auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
5427
- Function *CalledFunction) {
5517
+ Function *CalledFunction, FunctionSummary *FS ) {
5428
5518
// Perform cloning if not yet done.
5429
- CloneFuncIfNeeded (/* NumClones=*/ StackNode.Clones .size ());
5519
+ CloneFuncIfNeeded (/* NumClones=*/ StackNode.Clones .size (), FS );
5430
5520
5431
5521
assert (!isMemProfClone (*CalledFunction));
5432
5522
@@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5448
5538
// below.
5449
5539
auto CalleeOrigName = CalledFunction->getName ();
5450
5540
for (unsigned J = 0 ; J < StackNode.Clones .size (); J++) {
5541
+ // If the VMap is empty, this clone was a duplicate of another and was
5542
+ // created as an alias or a declaration.
5543
+ if (J > 0 && VMaps[J - 1 ]->empty ())
5544
+ continue ;
5451
5545
// Do nothing if this version calls the original version of its
5452
5546
// callee.
5453
5547
if (!StackNode.Clones [J])
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5591
5685
#endif
5592
5686
5593
5687
// Perform cloning if not yet done.
5594
- CloneFuncIfNeeded (/* NumClones=*/ AllocNode.Versions .size ());
5688
+ CloneFuncIfNeeded (/* NumClones=*/ AllocNode.Versions .size (), FS );
5595
5689
5596
5690
OrigAllocsThinBackend++;
5597
5691
AllocVersionsThinBackend += AllocNode.Versions .size ();
@@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5624
5718
5625
5719
// Update the allocation types per the summary info.
5626
5720
for (unsigned J = 0 ; J < AllocNode.Versions .size (); J++) {
5721
+ // If the VMap is empty, this clone was a duplicate of another and
5722
+ // was created as an alias or a declaration.
5723
+ if (J > 0 && VMaps[J - 1 ]->empty ())
5724
+ continue ;
5627
5725
// Ignore any that didn't get an assigned allocation type.
5628
5726
if (AllocNode.Versions [J] == (uint8_t )AllocationType::None)
5629
5727
continue ;
@@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5670
5768
// we don't need to do ICP, but might need to clone this
5671
5769
// function as it is the target of other cloned calls.
5672
5770
if (NumClones)
5673
- CloneFuncIfNeeded (NumClones);
5771
+ CloneFuncIfNeeded (NumClones, FS );
5674
5772
}
5675
5773
5676
5774
else {
@@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5690
5788
}
5691
5789
#endif
5692
5790
5693
- CloneCallsite (StackNode, CB, CalledFunction);
5791
+ CloneCallsite (StackNode, CB, CalledFunction, FS );
5694
5792
}
5695
5793
} else if (CB->isTailCall () && CalledFunction) {
5696
5794
// Locate the synthesized callsite info for the callee VI, if any was
@@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
5700
5798
if (CalleeVI && MapTailCallCalleeVIToCallsite.count (CalleeVI)) {
5701
5799
auto Callsite = MapTailCallCalleeVIToCallsite.find (CalleeVI);
5702
5800
assert (Callsite != MapTailCallCalleeVIToCallsite.end ());
5703
- CloneCallsite (Callsite->second , CB, CalledFunction);
5801
+ CloneCallsite (Callsite->second , CB, CalledFunction, FS );
5704
5802
}
5705
5803
}
5706
5804
}
@@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP(
5846
5944
// check.
5847
5945
CallBase *CBClone = CB;
5848
5946
for (unsigned J = 0 ; J < NumClones; J++) {
5947
+ // If the VMap is empty, this clone was a duplicate of another and was
5948
+ // created as an alias or a declaration.
5949
+ if (J > 0 && VMaps[J - 1 ]->empty ())
5950
+ continue ;
5849
5951
// Copy 0 is the original function.
5850
5952
if (J > 0 )
5851
5953
CBClone = cast<CallBase>((*VMaps[J - 1 ])[CB]);
@@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP(
5891
5993
// TotalCount and the number promoted.
5892
5994
CallBase *CBClone = CB;
5893
5995
for (unsigned J = 0 ; J < NumClones; J++) {
5996
+ // If the VMap is empty, this clone was a duplicate of another and was
5997
+ // created as an alias or a declaration.
5998
+ if (J > 0 && VMaps[J - 1 ]->empty ())
5999
+ continue ;
5894
6000
// Copy 0 is the original function.
5895
6001
if (J > 0 )
5896
6002
CBClone = cast<CallBase>((*VMaps[J - 1 ])[CB]);
0 commit comments