diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 215139caef696..deb7ab134c161 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -65,6 +65,15 @@ class CallStackTrie { std::map Callers; CallStackTrieNode(AllocationType Type) : AllocTypes(static_cast(Type)) {} + void addAllocType(AllocationType AllocType) { + AllocTypes |= static_cast(AllocType); + } + void removeAllocType(AllocationType AllocType) { + AllocTypes &= ~static_cast(AllocType); + } + bool hasAllocType(AllocationType AllocType) const { + return AllocTypes & static_cast(AllocType); + } }; // The node for the allocation at the root. @@ -85,6 +94,11 @@ class CallStackTrie { void collectContextSizeInfo(CallStackTrieNode *Node, std::vector &ContextSizeInfo); + // Recursively convert hot allocation types to notcold, since we don't + // actually do any cloning for hot contexts, to facilitate more aggressive + // pruning of contexts. + void convertHotToNotCold(CallStackTrieNode *Node); + // Recursive helper to trim contexts and create metadata nodes. bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, std::vector &MIBCallStack, diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 52f4adbdb0429..5553a2e2dd24b 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -147,7 +147,7 @@ void CallStackTrie::addCallStack( First = false; if (Alloc) { assert(AllocStackId == StackId); - Alloc->AllocTypes |= static_cast(AllocType); + Alloc->addAllocType(AllocType); } else { AllocStackId = StackId; Alloc = new CallStackTrieNode(AllocType); @@ -159,7 +159,7 @@ void CallStackTrie::addCallStack( auto Next = Curr->Callers.find(StackId); if (Next != Curr->Callers.end()) { Curr = Next->second; - Curr->AllocTypes |= static_cast(AllocType); + Curr->addAllocType(AllocType); continue; } // Otherwise add a new caller node. @@ -228,6 +228,15 @@ void CallStackTrie::collectContextSizeInfo( collectContextSizeInfo(Caller.second, ContextSizeInfo); } +void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { + if (Node->hasAllocType(AllocationType::Hot)) { + Node->removeAllocType(AllocationType::Hot); + Node->addAllocType(AllocationType::NotCold); + } + for (auto &Caller : Node->Callers) + convertHotToNotCold(Caller.second); +} + // Recursive helper to trim contexts and create metadata nodes. // Caller should have pushed Node's loc to MIBCallStack. Doing this in the // caller makes it simpler to handle the many early returns in this method. @@ -307,6 +316,22 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { "single"); return false; } + // If there were any hot allocation contexts, the Alloc trie node would have + // the Hot type set. If so, because we don't currently support cloning for hot + // contexts, they should be converted to NotCold. This happens in the cloning + // support anyway, however, doing this now enables more aggressive context + // trimming when building the MIB metadata (and possibly may make the + // allocation have a single NotCold allocation type), greatly reducing + // overheads in bitcode, cloning memory and cloning time. + if (Alloc->hasAllocType(AllocationType::Hot)) { + convertHotToNotCold(Alloc); + // Check whether we now have a single alloc type. + if (hasSingleAllocType(Alloc->AllocTypes)) { + addSingleAllocTypeAttribute(CI, (AllocationType)Alloc->AllocTypes, + "single"); + return false; + } + } auto &Ctx = CI->getContext(); std::vector MIBCallStack; MIBCallStack.push_back(AllocStackId); diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index 367069e993fe1..6aa2d307a1dc8 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -84,8 +84,6 @@ ; RUN: llvm-profdata merge -memprof-random-hotness -memprof-random-hotness-seed=1730170724 %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand2 2>&1 | FileCheck %s --check-prefix=RAND2 ; RAND2: random hotness seed = 1730170724 ; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2,ALL,MEMPROFONLY,MEMPROFSTATS -;; Check with hot hints enabled -; RUN: opt < %s -memprof-use-hot-hints -passes='memprof-use' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2HOT ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched ; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched @@ -413,13 +411,6 @@ for.end: ; preds = %for.cond ; MEMPROFRAND2: !"notcold" ; MEMPROFRAND2: !"notcold" -;; With hot hints enabled the last 2 should be hot. -; MEMPROFRAND2HOT: !"cold" -; MEMPROFRAND2HOT: !"cold" -; MEMPROFRAND2HOT: !"cold" -; MEMPROFRAND2HOT: !"hot" -; MEMPROFRAND2HOT: !"hot" - ; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile. ; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile. ; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile. diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp index 3888faf5453d3..b4e81e69116e8 100644 --- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp +++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp @@ -165,6 +165,8 @@ define i32* @test() { %1 = bitcast i8* %call2 to i32* %call3 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) %2 = bitcast i8* %call3 to i32* + %call4 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) + %3 = bitcast i8* %call4 to i32* ret i32* %1 } declare dso_local noalias noundef i8* @malloc(i64 noundef) @@ -204,6 +206,18 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) EXPECT_FALSE(Call3->hasMetadata(LLVMContext::MD_memprof)); EXPECT_TRUE(Call3->hasFnAttr("memprof")); EXPECT_EQ(Call3->getFnAttr("memprof").getValueAsString(), "hot"); + + // Fourth call has hot and non-cold contexts. These should be treated as + // notcold and given a notcold attribute. + CallStackTrie Trie4; + Trie4.addCallStack(AllocationType::Hot, {5, 6}); + Trie4.addCallStack(AllocationType::NotCold, {5, 7, 8}); + CallBase *Call4 = findCall(*Func, "call4"); + Trie4.buildAndAttachMIBMetadata(Call4); + + EXPECT_FALSE(Call4->hasMetadata(LLVMContext::MD_memprof)); + EXPECT_TRUE(Call4->hasFnAttr("memprof")); + EXPECT_EQ(Call4->getFnAttr("memprof").getValueAsString(), "notcold"); } // Test CallStackTrie::addCallStack interface taking allocation type and list of @@ -299,56 +313,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold); else { ASSERT_EQ(StackId->getZExtValue(), 3u); - EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); - } - } -} - -// Test CallStackTrie::addCallStack interface taking allocation type and list of -// call stack ids. -// Test that an allocation call reached by both non cold and hot call stacks -// gets memprof metadata representing the different allocation type contexts. -TEST_F(MemoryProfileInfoTest, NotColdAndHotMIB) { - LLVMContext C; - std::unique_ptr M = makeLLVMModule(C, - R"IR( -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" -define i32* @test() { -entry: - %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) - %0 = bitcast i8* %call to i32* - ret i32* %0 -} -declare dso_local noalias noundef i8* @malloc(i64 noundef) -)IR"); - - Function *Func = M->getFunction("test"); - - CallStackTrie Trie; - Trie.addCallStack(AllocationType::NotCold, {1, 2}); - Trie.addCallStack(AllocationType::Hot, {1, 3}); - - CallBase *Call = findCall(*Func, "call"); - Trie.buildAndAttachMIBMetadata(Call); - - EXPECT_FALSE(Call->hasFnAttr("memprof")); - EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); - MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); - ASSERT_EQ(MemProfMD->getNumOperands(), 2u); - for (auto &MIBOp : MemProfMD->operands()) { - MDNode *MIB = dyn_cast(MIBOp); - MDNode *StackMD = getMIBStackNode(MIB); - ASSERT_NE(StackMD, nullptr); - ASSERT_EQ(StackMD->getNumOperands(), 2u); - auto *StackId = mdconst::dyn_extract(StackMD->getOperand(0)); - ASSERT_EQ(StackId->getZExtValue(), 1u); - StackId = mdconst::dyn_extract(StackMD->getOperand(1)); - if (StackId->getZExtValue() == 2u) + // Hot contexts are converted to NotCold when building the metadata. EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); - else { - ASSERT_EQ(StackId->getZExtValue(), 3u); - EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); } } } @@ -401,7 +367,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); } else { ASSERT_EQ(StackId->getZExtValue(), 4u); - EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); + // Hot contexts are converted to NotCold when building the metadata. + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); } } } @@ -463,7 +430,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); else { ASSERT_EQ(StackId->getZExtValue(), 8u); - EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); + // Hot contexts are converted to NotCold when building the metadata. + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); } } } @@ -606,7 +574,8 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); else { ASSERT_EQ(StackId->getZExtValue(), 8u); - EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); + // Hot contexts are converted to NotCold when building the new metadata. + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); } } }