Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 76 additions & 10 deletions llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ static cl::opt<bool>
cl::desc("Salvage stale MemProf profile"),
cl::init(false), cl::Hidden);

static cl::opt<bool> ClMemProfAttachCalleeGuids(
"memprof-attach-calleeguids",
cl::desc(
"Attach calleeguids as value profile metadata for indirect calls."),
cl::init(true), cl::Hidden);

extern cl::opt<bool> MemProfReportHintedSizes;
extern cl::opt<unsigned> MinClonedColdBytePercent;
extern cl::opt<unsigned> MinCallsiteColdBytePercent;
Expand Down Expand Up @@ -964,6 +970,41 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
UndriftCallStack(CS.Frames);
}

// Helper function to process CalleeGuids and create value profile metadata
static void addVPMetadata(Module &M, Instruction &I,
ArrayRef<GlobalValue::GUID> CalleeGuids) {
if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
return;

if (I.getMetadata(LLVMContext::MD_prof)) {
uint64_t Unused;
auto ExistingVD =
getValueProfDataFromInst(I, IPVK_IndirectCallTarget, ~0U, Unused);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document constant parameter

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

// We don't know how to merge value profile data yet.
if (!ExistingVD.empty()) {
return;
}
}

SmallVector<InstrProfValueData, 4> VDs;
uint64_t TotalCount = 0;

for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
InstrProfValueData VD;
VD.Value = CalleeGUID;
// For MemProf, we don't have actual call counts, so we assign
// a weight of 1 to each potential target.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally this could be used to drive ICP too, but I'm not sure a weight of 1 will be hot enough. Should it either be a configurable value, or add a FIXME or TODO to adjust this to allow for ICP?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added TODO.

VD.Count = 1;
VDs.push_back(VD);
TotalCount += VD.Count;
}

if (!VDs.empty()) {
annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
VDs.size());
}
}

static void
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
Expand Down Expand Up @@ -1031,15 +1072,32 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// Build maps of the location hash to all profile data with that leaf location
// (allocation info and the callsites).
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
// A hash function for std::unordered_set<ArrayRef<Frame>> to work.
struct CallStackHash {
size_t operator()(ArrayRef<Frame> CS) const {
return computeFullStackId(CS);

// Helper struct for maintaining refs to callsite data. As an alternative we
// could store a pointer to the CallSiteInfo struct but we also need the frame
// index. Using ArrayRefs instead makes it a little easier to read.
struct CallSiteEntry {
// Subset of frames for the corresponding CallSiteInfo.
ArrayRef<Frame> Frames;
// Potential targets for indirect calls.
ArrayRef<GlobalValue::GUID> CalleeGuids;

// Only compare Frame contents.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe comment why you aren't using ArrayRef operator==. I guess that does elementwise equality and here you are looking for the underlying pointer being the same?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, added comment. It's also faster this way.

bool operator==(const CallSiteEntry &Other) const {
return Frames.data() == Other.Frames.data() &&
Frames.size() == Other.Frames.size();
}
};

struct CallSiteEntryHash {
size_t operator()(const CallSiteEntry &Entry) const {
return computeFullStackId(Entry.Frames);
}
};

// For the callsites we need to record slices of the frame array (see comments
// below where the map entries are added).
std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
// below where the map entries are added) along with their CalleeGuids.
std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
LocHashToCallSites;
for (auto &AI : MemProfRec->AllocSites) {
NumOfMemProfAllocContextProfiles++;
Expand All @@ -1057,8 +1115,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
unsigned Idx = 0;
for (auto &StackFrame : CS.Frames) {
uint64_t StackId = computeStackId(StackFrame);
LocHashToCallSites[StackId].insert(
ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});

ProfileHasColumns |= StackFrame.Column;
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
Expand Down Expand Up @@ -1202,13 +1262,19 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// Otherwise, add callsite metadata. If we reach here then we found the
// instruction's leaf location in the callsites map and not the allocation
// map.
for (auto CallStackIdx : CallSitesIter->second) {
for (const auto &CallSiteEntry : CallSitesIter->second) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack(CallStackIdx,
if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
InlinedCallStack)) {
NumOfMemProfMatchedCallSites++;
addCallsiteMetadata(I, InlinedCallStack, Ctx);

// Try to attach indirect call metadata if possible.
if (!CalledFunction) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: remove braces around single line body

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
}

// Only need to find one with a matching call stack and add a single
// callsite metadata.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
; RUN: split-file %s %t

; COM: Basic functionality with flag toggle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why use COM here and below? Just the leading ";" means it should be interpreted as a comment, although I typically use ";;" to set off the comment. Typically COM is only needed to prevent FileCheck from misinterpreting a reference to a directive.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE

; COM: FDO conflict handling
; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO

;--- basic.yaml
---
HeapProfileRecords:
- GUID: _Z3barv
AllocSites: []
CallSites:
- Frames:
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
...

;--- basic.ll
define dso_local void @_Z3barv() !dbg !4 {
entry:
%fp = alloca ptr, align 8
%0 = load ptr, ptr %fp, align 8
call void %0(), !dbg !5
; CHECK-ENABLE: call void %0(), {{.*}} !prof !6
; CHECK-DISABLE-NOT: !prof
ret void
}

; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}

!llvm.module.flags = !{!2, !3}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
!1 = !DIFile(filename: "t", directory: "/")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
!5 = !DILocation(line: 4, column: 5, scope: !4)

;--- fdo_conflict.yaml
---
HeapProfileRecords:
- GUID: _Z3foov
AllocSites: []
CallSites:
- Frames:
- { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false }
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
- Frames:
- { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false }
CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1]
...

;--- fdo_conflict.ll
define dso_local void @_Z3foov() !dbg !14 {
entry:
%fp = alloca ptr, align 8
%0 = load ptr, ptr %fp, align 8
; This call already has FDO value profile metadata - should NOT be modified
; CHECK-FDO: call void %0(), {{.*}} !prof !6
; CHECK-FDO-NOT: call void %0(), {{.*}} !prof !9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the NOT check needed, given the preceding check for the call?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.

call void %0(), !dbg !15, !prof !16

%1 = load ptr, ptr %fp, align 8
; This call does NOT have existing metadata - should get MemProf annotation
; CHECK-FDO: call void %1(), {{.*}} !prof !9
call void %1(), !dbg !17
ret void
}

!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}

; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}

!llvm.module.flags = !{!12, !13}

!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11)
!11 = !DIFile(filename: "t", directory: "/")
!12 = !{i32 7, !"Dwarf Version", i32 5}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10)
!15 = !DILocation(line: 4, column: 5, scope: !14)
!17 = !DILocation(line: 6, column: 5, scope: !14)
Loading