Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 41 additions & 42 deletions llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,59 +504,59 @@ struct CallSiteEntry {
ArrayRef<Frame> Frames;
// Potential targets for indirect calls.
ArrayRef<GlobalValue::GUID> CalleeGuids;

// Only compare Frame contents.
// Use pointer-based equality instead of ArrayRef's operator== which does
// element-wise comparison. We want to check if it's the same slice of the
// underlying array, not just equivalent content.
bool operator==(const CallSiteEntry &Other) const {
return Frames.data() == Other.Frames.data() &&
Frames.size() == Other.Frames.size();
}
};

struct CallSiteEntryHash {
size_t operator()(const CallSiteEntry &Entry) const {
return computeFullStackId(Entry.Frames);
}
};

static void handleCallSite(
Instruction &I, const Function *CalledFunction,
ArrayRef<uint64_t> InlinedCallStack,
const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites,
OptimizationRemarkEmitter &ORE) {
static void handleCallSite(Instruction &I, const Function *CalledFunction,
ArrayRef<uint64_t> InlinedCallStack,
const std::vector<CallSiteEntry> &CallSiteEntries,
Module &M,
std::set<std::vector<uint64_t>> &MatchedCallSites,
OptimizationRemarkEmitter &ORE) {
auto &Ctx = M.getContext();
// Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
// to support cases where the instuction's inlined frames match multiple call
// site entries, which can happen if the profile was collected from a binary
// where this instruction was eventually inlined into multiple callers.
SetVector<GlobalValue::GUID> CalleeGuids;
bool CallsiteMDAdded = false;
for (const auto &CallSiteEntry : CallSiteEntries) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
InlinedCallStack)) {
NumOfMemProfMatchedCallSites++;
addCallsiteMetadata(I, InlinedCallStack, Ctx);

// Try to attach indirect call metadata if possible.
if (!CalledFunction)
addVPMetadata(M, I, CallSiteEntry.CalleeGuids);

// Only need to find one with a matching call stack and add a single
// callsite metadata.

// Accumulate call site matching information upon request.
if (ClPrintMemProfMatchInfo) {
std::vector<uint64_t> CallStack;
append_range(CallStack, InlinedCallStack);
MatchedCallSites.insert(std::move(CallStack));
if (!CallsiteMDAdded) {
addCallsiteMetadata(I, InlinedCallStack, Ctx);

// Accumulate call site matching information upon request.
if (ClPrintMemProfMatchInfo) {
std::vector<uint64_t> CallStack;
append_range(CallStack, InlinedCallStack);
MatchedCallSites.insert(std::move(CallStack));
}
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
<< ore::NV("CallSite", &I) << " in function "
<< ore::NV("Caller", I.getFunction())
<< " matched callsite with frame count "
<< ore::NV("Frames", InlinedCallStack.size()));

// If this is a direct call, we're done.
if (CalledFunction)
break;
CallsiteMDAdded = true;
}
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
<< ore::NV("CallSite", &I) << " in function "
<< ore::NV("Caller", I.getFunction())
<< " matched callsite with frame count "
<< ore::NV("Frames", InlinedCallStack.size()));
break;

assert(!CalledFunction && "Didn't expect direct call");

// Collect Callee GUIDs from all matching CallSiteEntries.
CalleeGuids.insert(CallSiteEntry.CalleeGuids.begin(),
CallSiteEntry.CalleeGuids.end());
}
}
// Try to attach indirect call metadata if possible.
addVPMetadata(M, I, CalleeGuids.getArrayRef());
}

static void readMemprof(Module &M, Function &F,
Expand Down Expand Up @@ -631,8 +631,7 @@ static void readMemprof(Module &M, Function &F,

// For the callsites we need to record slices of the frame array (see comments
// below where the map entries are added) along with their CalleeGuids.
std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
LocHashToCallSites;
std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
for (auto &AI : MemProfRec->AllocSites) {
NumOfMemProfAllocContextProfiles++;
// Associate the allocation info with the leaf frame. The later matching
Expand All @@ -651,7 +650,7 @@ static void readMemprof(Module &M, Function &F,
uint64_t StackId = computeStackId(StackFrame);
ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
LocHashToCallSites[StackId].push_back({FrameSlice, CalleeGuids});

ProfileHasColumns |= StackFrame.Column;
// Once we find this function, we can stop recording.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
;; Basic functionality with flag toggle
; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE --dump-input-filter=all

;; FDO conflict handling
; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
Expand All @@ -18,6 +18,18 @@ HeapProfileRecords:
- Frames:
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
# The next 2 sets of frames simulates the case where this function was
# eventually inlined into multiple callers. We would have propagated the
# resulting frames and callee guids here for matching with they not yet
# inlined bar. We should aggregate all callee guids into the metadata.
- Frames:
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true }
- { Function: _Z3foov, LineOffset: 1, Column: 6, IsInlineFrame: false }
CalleeGuids: [0x1234, 0x2345]
- Frames:
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true }
- { Function: _Z3foov, LineOffset: 10, Column: 7, IsInlineFrame: false }
CalleeGuids: [0x3456, 0x4567]
...

;--- basic.ll
Expand All @@ -31,7 +43,7 @@ entry:
ret void
}

; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 6, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1, i64 4660, i64 1, i64 9029, i64 1, i64 13398, i64 1, i64 17767, i64 1}

!llvm.module.flags = !{!2, !3}

Expand Down
Loading