From d5a5cd010a5d965291c1e8cf3a0057aafde10104 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 17 Dec 2024 10:42:56 -0800 Subject: [PATCH] [MemProf] Add option to hint allocations at a given cold byte percentage Optionally unconditionally hint allocations as cold or not cold during the matching step if the percentage of bytes allocated is at least that of the given threshold. --- .../Instrumentation/MemProfiler.cpp | 19 +++++++++++++++++++ llvm/test/Transforms/PGOProfile/memprof.ll | 15 ++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index c980869a1c0d8..497fe4b7594ea 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -173,6 +173,10 @@ static cl::opt extern cl::opt MemProfReportHintedSizes; +static cl::opt MinMatchedColdBytePercent( + "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, + cl::desc("Min percent of cold bytes matched to hint allocation cold")); + // Instrumentation statistics STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -1074,6 +1078,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, // contexts. Add them to a Trie specialized for trimming the contexts to // the minimal needed to disambiguate contexts with unique behavior. CallStackTrie AllocTrie; + uint64_t TotalSize = 0; + uint64_t TotalColdSize = 0; for (auto *AllocInfo : AllocInfoIter->second) { // Check the full inlined call stack against this one. // If we found and thus matched all frames on the call, include @@ -1085,6 +1091,9 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes) FullStackId = computeFullStackId(AllocInfo->CallStack); auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); + TotalSize += AllocInfo->Info.getTotalSize(); + if (AllocType == AllocationType::Cold) + TotalColdSize += AllocInfo->Info.getTotalSize(); // Record information about the allocation if match info printing // was requested. if (ClPrintMemProfMatchInfo) { @@ -1094,6 +1103,16 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, } } } + // If the threshold for the percent of cold bytes is less than 100%, + // and not all bytes are cold, see if we should still hint this + // allocation as cold without context sensitivity. + if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 && + TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) { + AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, + "dominant"); + continue; + } + // We might not have matched any to the full inlined call stack. // But if we did, create and attach metadata, or a function attribute if // all contexts have identical profiled behavior. diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index f1b361de0fbba..7e47c8ded4e4a 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -64,7 +64,10 @@ ; RUN: opt < %s -passes='pgo-instr-use,memprof-use' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO ;; Check that the total sizes are reported if requested. -; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES + +;; Check that we hint additional allocations with a threshold < 100% +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -memprof-matching-cold-threshold=60 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZESTHRESH60 ;; Make sure we emit a random hotness seed if requested. ; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND @@ -348,8 +351,14 @@ for.end: ; preds = %for.cond ;; For non-context sensitive allocations that get attributes we emit a message ;; with the full allocation context hash, type, and size in bytes. -; TOTALSIZES: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10 -; TOTALSIZES: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10 +; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10 +; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10 +; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10 ;; For context sensitive allocations the full context hash and size in bytes ;; are in separate metadata nodes included on the MIB metadata. ; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]}