Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/ModuleSummaryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,12 @@ struct ValueInfo {
return getRef()->second.SummaryList;
}

// Even if the index is built with GVs available, we may not have one for
// summary entries synthesized for profiled indirect call targets.
bool hasName() const { return !haveGVs() || getValue(); }

StringRef name() const {
assert(!haveGVs() || getRef()->second.U.GV);
return haveGVs() ? getRef()->second.U.GV->getName()
: getRef()->second.U.Name;
}
Expand Down
45 changes: 44 additions & 1 deletion llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H

#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <functional>

namespace llvm {
Expand All @@ -36,15 +38,56 @@ class MemProfContextDisambiguation
/// the IR.
bool applyImport(Module &M);

// Builds the symtab and analysis used for ICP during ThinLTO backends.
bool initializeIndirectCallPromotionInfo(Module &M);

// Data structure for saving indirect call profile info for use in ICP with
// cloning.
struct ICallAnalysisData {
CallBase *CB;
std::vector<InstrProfValueData> CandidateProfileData;
uint32_t NumCandidates;
uint64_t TotalCount;
size_t CallsiteInfoStartIndex;
};

// Record information needed for ICP of an indirect call, depending on its
// profile information and the clone information recorded in the corresponding
// CallsiteInfo records. The SI iterator point to the current iteration point
// through AllCallsites in this function, and will be updated in this method
// as we iterate through profiled targets. The number of clones recorded for
// this indirect call is returned. The necessary information is recorded in
// the ICallAnalysisInfo list for later ICP.
unsigned recordICPInfo(CallBase *CB, ArrayRef<CallsiteInfo> AllCallsites,
ArrayRef<CallsiteInfo>::iterator &SI,
SmallVector<ICallAnalysisData> &ICallAnalysisInfo);

// Actually performs any needed ICP in the function, using the information
// recorded in the ICallAnalysisInfo list.
void performICP(Module &M, ArrayRef<CallsiteInfo> AllCallsites,
SmallVectorImpl<std::unique_ptr<ValueToValueMapTy>> &VMaps,
SmallVector<ICallAnalysisData> &ICallAnalysisInfo,
OptimizationRemarkEmitter &ORE);

/// Import summary containing cloning decisions for the ThinLTO backend.
const ModuleSummaryIndex *ImportSummary;

// Owns the import summary specified by internal options for testing the
// ThinLTO backend via opt (to simulate distributed ThinLTO).
std::unique_ptr<ModuleSummaryIndex> ImportSummaryForTesting;

// Whether we are building with SamplePGO. This is needed for correctly
// updating profile metadata on speculatively promoted calls.
bool isSamplePGO;

// Used when performing indirect call analysis and promotion when cloning in
// the ThinLTO backend during applyImport.
std::unique_ptr<InstrProfSymtab> Symtab;
std::unique_ptr<ICallPromotionAnalysis> ICallAnalysis;

public:
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary = nullptr);
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary = nullptr,
bool isSamplePGO = false);

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

Expand Down
59 changes: 36 additions & 23 deletions llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ static cl::opt<std::string> ModuleSummaryDotFile(
"module-summary-dot-file", cl::Hidden, cl::value_desc("filename"),
cl::desc("File to emit dot graph of new summary into"));

static cl::opt<bool> EnableMemProfIndirectCallSupport(
"enable-memprof-indirect-call-support", cl::init(true), cl::Hidden,
cl::desc(
"Enable MemProf support for summarizing and cloning indirect calls"));

extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;

extern cl::opt<unsigned> MaxNumVTableAnnotations;
Expand Down Expand Up @@ -404,6 +409,11 @@ static void computeFunctionSummary(
if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
HasInlineAsmMaybeReferencingInternal = true;

// Compute this once per indirect call.
uint32_t NumCandidates = 0;
uint64_t TotalCount = 0;
MutableArrayRef<InstrProfValueData> CandidateProfileData;

auto *CalledValue = CB->getCalledOperand();
auto *CalledFunction = CB->getCalledFunction();
if (CalledValue && !CalledFunction) {
Expand Down Expand Up @@ -481,9 +491,7 @@ static void computeFunctionSummary(
}
}

uint32_t NumCandidates;
uint64_t TotalCount;
auto CandidateProfileData =
CandidateProfileData =
ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount,
NumCandidates);
for (const auto &Candidate : CandidateProfileData)
Expand All @@ -495,16 +503,6 @@ static void computeFunctionSummary(
if (!IsThinLTO)
continue;

// TODO: Skip indirect calls for now. Need to handle these better, likely
// by creating multiple Callsites, one per target, then speculatively
// devirtualize while applying clone info in the ThinLTO backends. This
// will also be important because we will have a different set of clone
// versions per target. This handling needs to match that in the ThinLTO
// backend so we handle things consistently for matching of callsite
// summaries to instructions.
if (!CalledFunction)
continue;

// Ensure we keep this analysis in sync with the handling in the ThinLTO
// backend (see MemProfContextDisambiguation::applyImport). Save this call
// so that we can skip it in checking the reverse case later.
Expand Down Expand Up @@ -555,13 +553,24 @@ static void computeFunctionSummary(
SmallVector<unsigned> StackIdIndices;
for (auto StackId : InstCallsite)
StackIdIndices.push_back(Index.addOrGetStackIdIndex(StackId));
// Use the original CalledValue, in case it was an alias. We want
// to record the call edge to the alias in that case. Eventually
// an alias summary will be created to associate the alias and
// aliasee.
auto CalleeValueInfo =
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
Callsites.push_back({CalleeValueInfo, StackIdIndices});
if (CalledFunction) {
// Use the original CalledValue, in case it was an alias. We want
// to record the call edge to the alias in that case. Eventually
// an alias summary will be created to associate the alias and
// aliasee.
auto CalleeValueInfo =
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
Callsites.push_back({CalleeValueInfo, StackIdIndices});
} else if (EnableMemProfIndirectCallSupport) {
// For indirect callsites, create multiple Callsites, one per target.
// This enables having a different set of clone versions per target,
// and we will apply the cloning decisions while speculatively
// devirtualizing in the ThinLTO backends.
for (const auto &Candidate : CandidateProfileData) {
auto CalleeValueInfo = Index.getOrInsertValueInfo(Candidate.Value);
Callsites.push_back({CalleeValueInfo, StackIdIndices});
}
}
}
}
}
Expand Down Expand Up @@ -1214,9 +1223,13 @@ bool llvm::mayHaveMemprofSummary(const CallBase *CB) {
if (CI && CalledFunction->isIntrinsic())
return false;
} else {
// TODO: For now skip indirect calls. See comments in
// computeFunctionSummary for what is needed to handle this.
return false;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
return false;
// Skip direct calls via Constant.
if (!CalledValue || isa<Constant>(CalledValue))
return false;
return true;
}
return true;
}
4 changes: 2 additions & 2 deletions llvm/lib/IR/AsmWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3613,7 +3613,7 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {

void AssemblyWriter::printSummaryInfo(unsigned Slot, const ValueInfo &VI) {
Out << "^" << Slot << " = gv: (";
if (!VI.name().empty())
if (VI.hasName() && !VI.name().empty())
Out << "name: \"" << VI.name() << "\"";
else
Out << "guid: " << VI.getGUID();
Expand All @@ -3627,7 +3627,7 @@ void AssemblyWriter::printSummaryInfo(unsigned Slot, const ValueInfo &VI) {
Out << ")";
}
Out << ")";
if (!VI.name().empty())
if (VI.hasName() && !VI.name().empty())
Out << " ; guid = " << VI.getGUID();
Out << "\n";
}
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,7 +1714,8 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
// For ThinLTO we must apply the context disambiguation decisions early, to
// ensure we can correctly match the callsites to summary data.
if (EnableMemProfContextDisambiguation)
MPM.addPass(MemProfContextDisambiguation(ImportSummary));
MPM.addPass(MemProfContextDisambiguation(
ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));

// These passes import type identifier resolutions for whole-program
// devirtualization and CFI. They must run early because other passes may
Expand Down Expand Up @@ -1927,7 +1928,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// amount of additional cloning required to distinguish the allocation
// contexts.
if (EnableMemProfContextDisambiguation)
MPM.addPass(MemProfContextDisambiguation());
MPM.addPass(MemProfContextDisambiguation(
/*Summary=*/nullptr,
PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));

// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
Expand Down
Loading
Loading