Skip to content

Commit c725a95

Browse files
[MemProf] Convert Hot contexts to NotCold early (#124219)
While we convert hot contexts to notcold contexts during the cloning step, their existence was greatly limiting the context trimming performed when we add the MemProf profile to the IR. To address this, any hot contexts are converted to notcold contexts immediately after first checking for unambiguous allocation types, and before checking it again and before adding metadata while performing context trimming. Note that hot hints are now disabled by default, however, this avoids adding unnecessary overhead if they are re-enabled.
1 parent 48657bf commit c725a95

File tree

4 files changed

+62
-63
lines changed

4 files changed

+62
-63
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ class CallStackTrie {
6565
std::map<uint64_t, CallStackTrieNode *> Callers;
6666
CallStackTrieNode(AllocationType Type)
6767
: AllocTypes(static_cast<uint8_t>(Type)) {}
68+
void addAllocType(AllocationType AllocType) {
69+
AllocTypes |= static_cast<uint8_t>(AllocType);
70+
}
71+
void removeAllocType(AllocationType AllocType) {
72+
AllocTypes &= ~static_cast<uint8_t>(AllocType);
73+
}
74+
bool hasAllocType(AllocationType AllocType) const {
75+
return AllocTypes & static_cast<uint8_t>(AllocType);
76+
}
6877
};
6978

7079
// The node for the allocation at the root.
@@ -85,6 +94,11 @@ class CallStackTrie {
8594
void collectContextSizeInfo(CallStackTrieNode *Node,
8695
std::vector<ContextTotalSize> &ContextSizeInfo);
8796

97+
// Recursively convert hot allocation types to notcold, since we don't
98+
// actually do any cloning for hot contexts, to facilitate more aggressive
99+
// pruning of contexts.
100+
void convertHotToNotCold(CallStackTrieNode *Node);
101+
88102
// Recursive helper to trim contexts and create metadata nodes.
89103
bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
90104
std::vector<uint64_t> &MIBCallStack,

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ void CallStackTrie::addCallStack(
147147
First = false;
148148
if (Alloc) {
149149
assert(AllocStackId == StackId);
150-
Alloc->AllocTypes |= static_cast<uint8_t>(AllocType);
150+
Alloc->addAllocType(AllocType);
151151
} else {
152152
AllocStackId = StackId;
153153
Alloc = new CallStackTrieNode(AllocType);
@@ -159,7 +159,7 @@ void CallStackTrie::addCallStack(
159159
auto Next = Curr->Callers.find(StackId);
160160
if (Next != Curr->Callers.end()) {
161161
Curr = Next->second;
162-
Curr->AllocTypes |= static_cast<uint8_t>(AllocType);
162+
Curr->addAllocType(AllocType);
163163
continue;
164164
}
165165
// Otherwise add a new caller node.
@@ -228,6 +228,15 @@ void CallStackTrie::collectContextSizeInfo(
228228
collectContextSizeInfo(Caller.second, ContextSizeInfo);
229229
}
230230

231+
void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
232+
if (Node->hasAllocType(AllocationType::Hot)) {
233+
Node->removeAllocType(AllocationType::Hot);
234+
Node->addAllocType(AllocationType::NotCold);
235+
}
236+
for (auto &Caller : Node->Callers)
237+
convertHotToNotCold(Caller.second);
238+
}
239+
231240
// Recursive helper to trim contexts and create metadata nodes.
232241
// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
233242
// caller makes it simpler to handle the many early returns in this method.
@@ -307,6 +316,22 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
307316
"single");
308317
return false;
309318
}
319+
// If there were any hot allocation contexts, the Alloc trie node would have
320+
// the Hot type set. If so, because we don't currently support cloning for hot
321+
// contexts, they should be converted to NotCold. This happens in the cloning
322+
// support anyway, however, doing this now enables more aggressive context
323+
// trimming when building the MIB metadata (and possibly may make the
324+
// allocation have a single NotCold allocation type), greatly reducing
325+
// overheads in bitcode, cloning memory and cloning time.
326+
if (Alloc->hasAllocType(AllocationType::Hot)) {
327+
convertHotToNotCold(Alloc);
328+
// Check whether we now have a single alloc type.
329+
if (hasSingleAllocType(Alloc->AllocTypes)) {
330+
addSingleAllocTypeAttribute(CI, (AllocationType)Alloc->AllocTypes,
331+
"single");
332+
return false;
333+
}
334+
}
310335
auto &Ctx = CI->getContext();
311336
std::vector<uint64_t> MIBCallStack;
312337
MIBCallStack.push_back(AllocStackId);

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@
8484
; RUN: llvm-profdata merge -memprof-random-hotness -memprof-random-hotness-seed=1730170724 %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand2 2>&1 | FileCheck %s --check-prefix=RAND2
8585
; RAND2: random hotness seed = 1730170724
8686
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdatarand2>' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2,ALL,MEMPROFONLY,MEMPROFSTATS
87-
;; Check with hot hints enabled
88-
; RUN: opt < %s -memprof-use-hot-hints -passes='memprof-use<profile-filename=%t.memprofdatarand2>' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2HOT
8987

9088
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
9189
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
@@ -413,13 +411,6 @@ for.end: ; preds = %for.cond
413411
; MEMPROFRAND2: !"notcold"
414412
; MEMPROFRAND2: !"notcold"
415413

416-
;; With hot hints enabled the last 2 should be hot.
417-
; MEMPROFRAND2HOT: !"cold"
418-
; MEMPROFRAND2HOT: !"cold"
419-
; MEMPROFRAND2HOT: !"cold"
420-
; MEMPROFRAND2HOT: !"hot"
421-
; MEMPROFRAND2HOT: !"hot"
422-
423414
; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile.
424415
; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile.
425416
; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile.

llvm/unittests/Analysis/MemoryProfileInfoTest.cpp

Lines changed: 21 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ define i32* @test() {
165165
%1 = bitcast i8* %call2 to i32*
166166
%call3 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
167167
%2 = bitcast i8* %call3 to i32*
168+
%call4 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
169+
%3 = bitcast i8* %call4 to i32*
168170
ret i32* %1
169171
}
170172
declare dso_local noalias noundef i8* @malloc(i64 noundef)
@@ -204,6 +206,18 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
204206
EXPECT_FALSE(Call3->hasMetadata(LLVMContext::MD_memprof));
205207
EXPECT_TRUE(Call3->hasFnAttr("memprof"));
206208
EXPECT_EQ(Call3->getFnAttr("memprof").getValueAsString(), "hot");
209+
210+
// Fourth call has hot and non-cold contexts. These should be treated as
211+
// notcold and given a notcold attribute.
212+
CallStackTrie Trie4;
213+
Trie4.addCallStack(AllocationType::Hot, {5, 6});
214+
Trie4.addCallStack(AllocationType::NotCold, {5, 7, 8});
215+
CallBase *Call4 = findCall(*Func, "call4");
216+
Trie4.buildAndAttachMIBMetadata(Call4);
217+
218+
EXPECT_FALSE(Call4->hasMetadata(LLVMContext::MD_memprof));
219+
EXPECT_TRUE(Call4->hasFnAttr("memprof"));
220+
EXPECT_EQ(Call4->getFnAttr("memprof").getValueAsString(), "notcold");
207221
}
208222

209223
// Test CallStackTrie::addCallStack interface taking allocation type and list of
@@ -299,56 +313,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
299313
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold);
300314
else {
301315
ASSERT_EQ(StackId->getZExtValue(), 3u);
302-
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot);
303-
}
304-
}
305-
}
306-
307-
// Test CallStackTrie::addCallStack interface taking allocation type and list of
308-
// call stack ids.
309-
// Test that an allocation call reached by both non cold and hot call stacks
310-
// gets memprof metadata representing the different allocation type contexts.
311-
TEST_F(MemoryProfileInfoTest, NotColdAndHotMIB) {
312-
LLVMContext C;
313-
std::unique_ptr<Module> M = makeLLVMModule(C,
314-
R"IR(
315-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
316-
target triple = "x86_64-pc-linux-gnu"
317-
define i32* @test() {
318-
entry:
319-
%call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
320-
%0 = bitcast i8* %call to i32*
321-
ret i32* %0
322-
}
323-
declare dso_local noalias noundef i8* @malloc(i64 noundef)
324-
)IR");
325-
326-
Function *Func = M->getFunction("test");
327-
328-
CallStackTrie Trie;
329-
Trie.addCallStack(AllocationType::NotCold, {1, 2});
330-
Trie.addCallStack(AllocationType::Hot, {1, 3});
331-
332-
CallBase *Call = findCall(*Func, "call");
333-
Trie.buildAndAttachMIBMetadata(Call);
334-
335-
EXPECT_FALSE(Call->hasFnAttr("memprof"));
336-
EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof));
337-
MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
338-
ASSERT_EQ(MemProfMD->getNumOperands(), 2u);
339-
for (auto &MIBOp : MemProfMD->operands()) {
340-
MDNode *MIB = dyn_cast<MDNode>(MIBOp);
341-
MDNode *StackMD = getMIBStackNode(MIB);
342-
ASSERT_NE(StackMD, nullptr);
343-
ASSERT_EQ(StackMD->getNumOperands(), 2u);
344-
auto *StackId = mdconst::dyn_extract<ConstantInt>(StackMD->getOperand(0));
345-
ASSERT_EQ(StackId->getZExtValue(), 1u);
346-
StackId = mdconst::dyn_extract<ConstantInt>(StackMD->getOperand(1));
347-
if (StackId->getZExtValue() == 2u)
316+
// Hot contexts are converted to NotCold when building the metadata.
348317
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
349-
else {
350-
ASSERT_EQ(StackId->getZExtValue(), 3u);
351-
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot);
352318
}
353319
}
354320
}
@@ -401,7 +367,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
401367
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
402368
} else {
403369
ASSERT_EQ(StackId->getZExtValue(), 4u);
404-
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot);
370+
// Hot contexts are converted to NotCold when building the metadata.
371+
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
405372
}
406373
}
407374
}
@@ -463,7 +430,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
463430
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
464431
else {
465432
ASSERT_EQ(StackId->getZExtValue(), 8u);
466-
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot);
433+
// Hot contexts are converted to NotCold when building the metadata.
434+
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
467435
}
468436
}
469437
}
@@ -606,7 +574,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
606574
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
607575
else {
608576
ASSERT_EQ(StackId->getZExtValue(), 8u);
609-
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot);
577+
// Hot contexts are converted to NotCold when building the new metadata.
578+
EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold);
610579
}
611580
}
612581
}

0 commit comments

Comments
 (0)