Skip to content

Commit e57315e

Browse files
[MemProf] Fix discarding of noncold contexts after inlining (#149599)
When we rebuild the call site tries after inlining of an allocation with MD_memprof metadata, we don't want to reapply the discarding of small non-cold contexts (under -memprof-callsite-cold-threshold=) because we have either no context size info (without -memprof-report-hinted-sizes or another option that causes us to keep that as metadata), and even with that information in the metadata, we have imperfect information at that point as we have already discarded some contexts during matching. The first case was even worse because we didn't guard our check by whether the number of cold bytes was 0, leading to very aggressive pruning during post-inline metadata rebuilding without the context size information.
1 parent e1ac57c commit e57315e

File tree

4 files changed

+333
-13
lines changed

4 files changed

+333
-13
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ class CallStackTrie {
102102
// The maximum size of a cold allocation context, from the profile summary.
103103
uint64_t MaxColdSize;
104104

105+
// Tracks whether we have built the Trie from existing MD_memprof metadata. We
106+
// apply different heuristics for determining whether to discard non-cold
107+
// contexts when rebuilding as we have lost information available during the
108+
// original profile match.
109+
bool BuiltFromExistingMetadata = false;
110+
105111
void deleteTrieNode(CallStackTrieNode *Node) {
106112
if (!Node)
107113
return;

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ void CallStackTrie::addCallStack(
157157
}
158158

159159
void CallStackTrie::addCallStack(MDNode *MIB) {
160+
// Note that we are building this from existing MD_memprof metadata.
161+
BuiltFromExistingMetadata = true;
160162
MDNode *StackMD = getMIBStackNode(MIB);
161163
assert(StackMD);
162164
std::vector<uint64_t> CallStack;
@@ -187,8 +189,9 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
187189
static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
188190
AllocationType AllocType,
189191
ArrayRef<ContextTotalSize> ContextSizeInfo,
190-
const uint64_t MaxColdSize, uint64_t &TotalBytes,
191-
uint64_t &ColdBytes) {
192+
const uint64_t MaxColdSize,
193+
bool BuiltFromExistingMetadata,
194+
uint64_t &TotalBytes, uint64_t &ColdBytes) {
192195
SmallVector<Metadata *> MIBPayload(
193196
{buildCallstackMetadata(MIBCallStack, Ctx)});
194197
MIBPayload.push_back(
@@ -197,8 +200,9 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
197200
if (ContextSizeInfo.empty()) {
198201
// The profile matcher should have provided context size info if there was a
199202
// MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
200-
// handle a user-provided percent larger than 100.
201-
assert(MinCallsiteColdBytePercent >= 100);
203+
// handle a user-provided percent larger than 100. However, we may not have
204+
// this information if we built the Trie from existing MD_memprof metadata.
205+
assert(BuiltFromExistingMetadata || MinCallsiteColdBytePercent >= 100);
202206
return MDNode::get(Ctx, MIBPayload);
203207
}
204208

@@ -252,9 +256,19 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
252256
static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
253257
std::vector<Metadata *> &SavedMIBNodes,
254258
unsigned CallerContextLength,
255-
uint64_t TotalBytes, uint64_t ColdBytes) {
259+
uint64_t TotalBytes, uint64_t ColdBytes,
260+
bool BuiltFromExistingMetadata) {
256261
const bool MostlyCold =
257-
MinCallsiteColdBytePercent < 100 &&
262+
// If we have built the Trie from existing MD_memprof metadata, we may or
263+
// may not have context size information (in which case ColdBytes and
264+
// TotalBytes are 0, which is not also guarded against below). Even if we
265+
// do have some context size information from the the metadata, we have
266+
// already gone through a round of discarding of small non-cold contexts
267+
// during matching, and it would be overly aggressive to do it again, and
268+
// we also want to maintain the same behavior with and without reporting
269+
// of hinted bytes enabled.
270+
!BuiltFromExistingMetadata && MinCallsiteColdBytePercent < 100 &&
271+
ColdBytes > 0 &&
258272
ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
259273

260274
// In the simplest case, with pruning disabled, keep all the new MIB nodes.
@@ -386,9 +400,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
386400
if (hasSingleAllocType(Node->AllocTypes)) {
387401
std::vector<ContextTotalSize> ContextSizeInfo;
388402
collectContextSizeInfo(Node, ContextSizeInfo);
389-
MIBNodes.push_back(
390-
createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes,
391-
ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes));
403+
MIBNodes.push_back(createMIBNode(
404+
Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo,
405+
MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes));
392406
return true;
393407
}
394408

@@ -416,7 +430,8 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
416430
// Pass in the stack length of the MIB nodes added for the immediate caller,
417431
// which is the current stack length plus 1.
418432
saveFilteredNewMIBNodes(NewMIBNodes, MIBNodes, MIBCallStack.size() + 1,
419-
CallerTotalBytes, CallerColdBytes);
433+
CallerTotalBytes, CallerColdBytes,
434+
BuiltFromExistingMetadata);
420435
TotalBytes += CallerTotalBytes;
421436
ColdBytes += CallerColdBytes;
422437

@@ -441,9 +456,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
441456
return false;
442457
std::vector<ContextTotalSize> ContextSizeInfo;
443458
collectContextSizeInfo(Node, ContextSizeInfo);
444-
MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold,
445-
ContextSizeInfo, MaxColdSize, TotalBytes,
446-
ColdBytes));
459+
MIBNodes.push_back(createMIBNode(
460+
Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize,
461+
BuiltFromExistingMetadata, TotalBytes, ColdBytes));
447462
return true;
448463
}
449464

llvm/test/Transforms/Inline/memprof_inline2.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
;; }
3939

4040
; RUN: opt -passes=inline %s -S | FileCheck %s
41+
;; We should not perform additional discarding of non-cold contexts when
42+
;; rebuilding the tries after inlining, even with a very low threshold.
43+
; RUN: opt -passes=inline -memprof-callsite-cold-threshold=1 %s -S | FileCheck %s
4144

4245
; ModuleID = 'memprof_inline2.cc'
4346
source_filename = "memprof_inline2.cc"

0 commit comments

Comments
 (0)