Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 136 additions & 36 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Expand All @@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InterleavedRange.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
Expand All @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
STATISTIC(
FunctionCloneDuplicatesThinBackend,
"Number of function clone duplicates detected during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
Expand Down Expand Up @@ -5186,19 +5191,129 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
return Changed;
}

// Compute a SHA1 hash of the callsite and alloc version information of clone I
// in the summary, to use in detection of duplicate clones.
std::string ComputeHash(StringMap<Function *> &HashToFunc, FunctionSummary *FS,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I suggest const FunctionSummary *FS?

Suggested change
std::string ComputeHash(StringMap<Function *> &HashToFunc, FunctionSummary *FS,
std::string ComputeHash(StringMap<Function *> &HashToFunc, const FunctionSummary *FS,

Now, for the return value, I'm wondering if uint64_t is sufficient instead of the full 160-bit SHA1. If you are OK with uint64_t, see the comment at the return statement below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea on return type! done

unsigned I) {
SHA1 Hasher;
// Update hash with any callsites that call non-default (non-zero) callee
// versions.
for (auto &SN : FS->callsites()) {
// In theory all callsites and allocs in this function should have the same
// number of clone entries, but handle any discrepancies gracefully below
// for NDEBUG builds.
assert(
SN.Clones.size() > I &&
"Callsite summary has fewer entries than other summaries in function");
if (SN.Clones.size() <= I || !SN.Clones[I])
continue;
uint8_t Data[4];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I suggest this?

Suggested change
uint8_t Data[4];
uint8_t Data[sizeof(SN.Clones[I])];

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

support::endian::write32le(Data, SN.Clones[I]);
Hasher.update(Data);
}
// Update hash with any allocs that have non-default (non-None) hints.
for (auto &AN : FS->allocs()) {
// In theory all callsites and allocs in this function should have the same
// number of clone entries, but handle any discrepancies gracefully below
// for NDEBUG builds.
assert(AN.Versions.size() > I &&
"Alloc summary has fewer entries than other summaries in function");
if (AN.Versions.size() <= I ||
(AllocationType)AN.Versions[I] == AllocationType::None)
continue;
Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
}
return toHex(Hasher.result());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are OK with returning uint64_t instead, may I suggest the following?:

Suggested change
return toHex(Hasher.result());
return support::endian::read64le(Hasher.result().data());

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
&FuncToAliasMap) {
&FuncToAliasMap,
FunctionSummary *FS) {
auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This lambda and the one below were refactored out of existing code to enable reuse

// We might have created this when adjusting callsite in another
// function. It should be a declaration.
assert(DeclGV->isDeclaration());
NewGV->takeName(DeclGV);
DeclGV->replaceAllUsesWith(NewGV);
DeclGV->eraseFromParent();
};

// Handle aliases to this function, and create analogous alias clones to the
// provided clone of this function.
auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
if (!FuncToAliasMap.count(&F))
return;
for (auto *A : FuncToAliasMap[&F]) {
std::string AliasName = getMemProfFuncName(A->getName(), I);
auto *PrevA = M.getNamedAlias(AliasName);
auto *NewA = GlobalAlias::create(A->getValueType(),
A->getType()->getPointerAddressSpace(),
A->getLinkage(), AliasName, NewF);
NewA->copyAttributesFrom(A);
if (PrevA)
TakeDeclNameAndReplace(PrevA, NewA);
}
};

// The first "clone" is the original copy, we should only call this if we
// needed to create new clones.
assert(NumClones > 1);
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(NumClones - 1);
FunctionsClonedThinBackend++;

// Map of hash of callsite/alloc versions to the instantiated function clone
// (possibly the original) implementing those calls. Used to avoid
// instantiating duplicate function clones.
// FIXME: Ideally the thin link would not generate such duplicate clones to
// start with, but right now it happens due to phase ordering in the function
// assignment and possible new clones that produces. We simply make each
// duplicate an alias to the matching instantiated clone recorded in the map
// (except for available_externally which are made declarations as they would
// be aliases in the prevailing module, and available_externally aliases are
// not well supported right now).
StringMap<Function *> HashToFunc;

// Save the hash of the original function version.
auto Hash = ComputeHash(HashToFunc, FS, 0);
HashToFunc[Hash] = &F;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid confusion with Hash declared in the for loop below, may I suggest the following? I don't think you use this Hash later in this function.

Suggested change
auto Hash = ComputeHash(HashToFunc, FS, 0);
HashToFunc[Hash] = &F;
HashToFunc[ComputeHash(HashToFunc, FS, 0)] = &F;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


for (unsigned I = 1; I < NumClones; I++) {
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
std::string Name = getMemProfFuncName(F.getName(), I);
auto Hash = ComputeHash(HashToFunc, FS, I);
// If this clone would duplicate a previously seen clone, don't generate the
// duplicate clone body, just make an alias to satisfy any (potentially
// cross-module) references.
if (HashToFunc.contains(Hash)) {
FunctionCloneDuplicatesThinBackend++;
auto *Func = HashToFunc[Hash];
if (Func->hasAvailableExternallyLinkage()) {
// Skip these as EliminateAvailableExternallyPass does not handle
// available_externally aliases correctly and we end up with an
// available_externally alias to a declaration. Just create a
// declaration for now as we know we will have a definition in another
// module.
auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone decl " << ore::NV("Decl", Decl.getCallee()));
continue;
}
auto *PrevF = M.getFunction(Name);
auto *Alias = GlobalAlias::create(Name, Func);
if (PrevF)
TakeDeclNameAndReplace(PrevF, Alias);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone alias " << ore::NV("Alias", Alias));

// Now handle aliases to this function, and clone those as well.
CloneFuncAliases(Func, I);
continue;
}
auto *NewF = CloneFunction(&F, *VMaps.back());
HashToFunc[Hash] = NewF;
FunctionClonesThinBackend++;
// Strip memprof and callsite metadata from clone as they are no longer
// needed.
Expand All @@ -5208,40 +5323,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
}
}
std::string Name = getMemProfFuncName(F.getName(), I);
auto *PrevF = M.getFunction(Name);
if (PrevF) {
// We might have created this when adjusting callsite in another
// function. It should be a declaration.
assert(PrevF->isDeclaration());
NewF->takeName(PrevF);
PrevF->replaceAllUsesWith(NewF);
PrevF->eraseFromParent();
} else
if (PrevF)
TakeDeclNameAndReplace(PrevF, NewF);
else
NewF->setName(Name);
updateSubprogramLinkageName(NewF, Name);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone " << ore::NV("NewFunction", NewF));

// Now handle aliases to this function, and clone those as well.
if (!FuncToAliasMap.count(&F))
continue;
for (auto *A : FuncToAliasMap[&F]) {
std::string Name = getMemProfFuncName(A->getName(), I);
auto *PrevA = M.getNamedAlias(Name);
auto *NewA = GlobalAlias::create(A->getValueType(),
A->getType()->getPointerAddressSpace(),
A->getLinkage(), Name, NewF);
NewA->copyAttributesFrom(A);
if (PrevA) {
// We might have created this when adjusting callsite in another
// function. It should be a declaration.
assert(PrevA->isDeclaration());
NewA->takeName(PrevA);
PrevA->replaceAllUsesWith(NewA);
PrevA->eraseFromParent();
}
}
CloneFuncAliases(NewF, I);
}
return VMaps;
}
Expand Down Expand Up @@ -5401,7 +5493,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
bool ClonesCreated = false;
unsigned NumClonesCreated = 0;
auto CloneFuncIfNeeded = [&](unsigned NumClones) {
auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
// We should at least have version 0 which is the original copy.
assert(NumClones > 0);
// If only one copy needed use original.
Expand All @@ -5415,7 +5507,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
assert(NumClonesCreated == NumClones);
return;
}
VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
// The first "clone" is the original copy, which doesn't have a VMap.
assert(VMaps.size() == NumClones - 1);
Changed = true;
Expand All @@ -5424,9 +5516,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
};

auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
Function *CalledFunction) {
Function *CalledFunction, FunctionSummary *FS) {
// Perform cloning if not yet done.
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);

assert(!isMemProfClone(*CalledFunction));

Expand All @@ -5448,6 +5540,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// below.
auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
if (J > 0 && VMaps[J - 1]->empty())
continue;
Comment on lines +5543 to +5544
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if this small block of code warrants a comment:

// If the VMap is empty, this clone was a duplicate of another and was created
// as an alias or a declaration.

The same applies to the three more instances of if statements below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// Do nothing if this version calls the original version of its
// callee.
if (!StackNode.Clones[J])
Expand Down Expand Up @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
#endif

// Perform cloning if not yet done.
CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);

OrigAllocsThinBackend++;
AllocVersionsThinBackend += AllocNode.Versions.size();
Expand Down Expand Up @@ -5624,6 +5718,8 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {

// Update the allocation types per the summary info.
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
if (J > 0 && VMaps[J - 1]->empty())
continue;
// Ignore any that didn't get an assigned allocation type.
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
continue;
Expand Down Expand Up @@ -5671,7 +5767,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// we don't need to do ICP, but might need to clone this
// function as it is the target of other cloned calls.
if (NumClones)
CloneFuncIfNeeded(NumClones);
CloneFuncIfNeeded(NumClones, FS);
}

else {
Expand All @@ -5691,7 +5787,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif

CloneCallsite(StackNode, CB, CalledFunction);
CloneCallsite(StackNode, CB, CalledFunction, FS);
}
} else if (CB->isTailCall() && CalledFunction) {
// Locate the synthesized callsite info for the callee VI, if any was
Expand All @@ -5701,7 +5797,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
CloneCallsite(Callsite->second, CB, CalledFunction);
CloneCallsite(Callsite->second, CB, CalledFunction, FS);
}
}
}
Expand Down Expand Up @@ -5847,6 +5943,8 @@ void MemProfContextDisambiguation::performICP(
// check.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
if (J > 0 && VMaps[J - 1]->empty())
continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
Expand Down Expand Up @@ -5892,6 +5990,8 @@ void MemProfContextDisambiguation::performICP(
// TotalCount and the number promoted.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
if (J > 0 && VMaps[J - 1]->empty())
continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
Expand Down
Loading