From 5e409013dac2c5b068974c66bea75828b5dfcbe0 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 7 Jan 2025 10:55:20 -0800 Subject: [PATCH 1/3] [MemProf] Allow cloning of callsites in recursive cycles Optionally (by default) no longer mark callsite nodes as Recursive, which means they would be automatically skipped during cloning. This was too conservative as it prevents cloning of any callsite that showed up in any recursive cycle, even for non-recursive contexts. While this will enable partial cloning of recursive contexts, the recursive calls themselves will not be updated to call the correct clone, possibly leading to some unnecessary but benign cloning and affecting bytes hinted reporting. To prevent this, optional support looks for recursive cycles in contexts during cloning and removes those contexts from cloning. This requires some additional runtime overhead, so is disabled by default for now. Support for correct cloning of recursive cycles is WIP. --- .../IPO/MemProfContextDisambiguation.cpp | 48 +++++- llvm/test/ThinLTO/X86/memprof-recursive.ll | 141 ++++++++++++++++ .../MemProfContextDisambiguation/recursive.ll | 159 ++++++++++++++++++ 3 files changed, 343 insertions(+), 5 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/memprof-recursive.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 1bf7ff468d782..a16d858a5a876 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -122,6 +122,20 @@ static cl::opt cl::desc("Max depth to recursively search for missing " "frames through tail calls.")); +// By default enable cloning of callsites involved with recursive cycles +static cl::opt SkipRecursiveCallsites( + "memprof-skip-recursive-callsites", cl::init(false), cl::Hidden, + cl::desc("Prevent cloning of callsites involved in recursive cycles")); + +// When enabled, try to detect and prevent cloning of recursive contexts. +// This is only necessary until we support cloning through recursive cycles. +// Leave off by default for now, as it requires a little bit of compile time +// overhead and doesn't affect correctness, it will just inflate the cold hinted +// bytes reporting a bit when -memprof-report-hinted-sizes is enabled. +static cl::opt SkipRecursiveContexts( + "memprof-skip-recursive-contexts", cl::init(false), cl::Hidden, + cl::desc("Prevent cloning of contexts through recursive cycles")); + namespace llvm { cl::opt EnableMemProfContextDisambiguation( "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, @@ -1236,9 +1250,13 @@ void CallsiteContextGraph::addStackNodesForMIB( StackEntryIdToContextNodeMap[StackId] = StackNode; StackNode->OrigStackOrAllocId = StackId; } - auto Ins = StackIdSet.insert(StackId); - if (!Ins.second) - StackNode->Recursive = true; + // Marking a node recursive will prevent its cloning completely, even for + // non-recursive contexts flowing through it. + if (SkipRecursiveCallsites) { + auto Ins = StackIdSet.insert(StackId); + if (!Ins.second) + StackNode->Recursive = true; + } StackNode->AllocTypes |= (uint8_t)AllocType; PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); PrevNode = StackNode; @@ -1375,8 +1393,11 @@ static void checkNode(const ContextNode *Node, set_union(CallerEdgeContextIds, Edge->ContextIds); } // Node can have more context ids than callers if some contexts terminate at - // node and some are longer. - assert(NodeContextIds == CallerEdgeContextIds || + // node and some are longer. If we are not skipping recursive callsites but + // haven't enabled skipping of recursive contexts, this will be violated for + // incompletely cloned recursive cycles, so skip the checking in that case. + assert(!(SkipRecursiveCallsites || SkipRecursiveContexts) || + NodeContextIds == CallerEdgeContextIds || set_is_subset(CallerEdgeContextIds, NodeContextIds)); } if (Node->CalleeEdges.size()) { @@ -3370,6 +3391,20 @@ void CallsiteContextGraph::identifyClones( assert(Node->AllocTypes != (uint8_t)AllocationType::None); + DenseSet RecursiveContextIds; + // If we are not skipping recursive callsites, and have also enabled + // skipping of recursive contexts, look for context ids that show up in + // multiple caller edges. + if (!SkipRecursiveCallsites && SkipRecursiveContexts) { + DenseSet AllCallerContextIds; + for (auto &CE : Node->CallerEdges) { + AllCallerContextIds.reserve(CE->getContextIds().size()); + for (auto Id : CE->getContextIds()) + if (!AllCallerContextIds.insert(Id).second) + RecursiveContextIds.insert(Id); + } + } + // Iterate until we find no more opportunities for disambiguating the alloc // types via cloning. In most cases this loop will terminate once the Node // has a single allocation type, in which case no more cloning is needed. @@ -3394,6 +3429,9 @@ void CallsiteContextGraph::identifyClones( // allocation. auto CallerEdgeContextsForAlloc = set_intersection(CallerEdge->getContextIds(), AllocContextIds); + if (!RecursiveContextIds.empty()) + CallerEdgeContextsForAlloc = + set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds); if (CallerEdgeContextsForAlloc.empty()) { ++EI; continue; diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll new file mode 100644 index 0000000000000..38cc107130380 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll @@ -0,0 +1,141 @@ +;; Test recursion handling during cloning. +;; +;; See llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll for +;; information on how the test was created. + +; RUN: opt -thinlto-bc %s >%t.o + +;; By default we should enable cloning of contexts involved with recursive +;; cycles, but not through the cycle itself. I.e. until full support for +;; recursion is added, the cloned recursive call from C back to B (line 12) will +;; not be updated to call a clone. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,_Z1Dv,plx \ +; RUN: -r=%t.o,_Z1Ci,plx \ +; RUN: -r=%t.o,_Z1Bi,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS + +;; Skipping recursive callsites should result in no cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,_Z1Dv,plx \ +; RUN: -r=%t.o,_Z1Ci,plx \ +; RUN: -r=%t.o,_Z1Bi,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-skip-recursive-callsites \ +; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="marked with memprof allocation attribute cold" + +;; Skipping recursive contexts should prevent spurious call to cloned version of +;; B from the context starting at memprof_recursive.cc:19:13, which is actually +;; recursive (until that support is added). +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,_Z1Dv,plx \ +; RUN: -r=%t.o,_Z1Ci,plx \ +; RUN: -r=%t.o,_Z1Bi,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-skip-recursive-contexts \ +; RUN: -o %t.out 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS + +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 +;; We should only call the cold clone for the recursive context if we haven't +;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts. +; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; SKIP-RECUR-CONTEXTS-NOT: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z1Dv() !dbg !3 { +entry: + %call = tail call ptr @_Znam(i64 10), !dbg !6, !memprof !7, !callsite !14 + ret ptr null +} + +define ptr @_Z1Ci(i32 %n) !dbg !15 { +entry: + %call = tail call ptr @_Z1Dv(), !dbg !16, !callsite !17 + br label %return + +if.end: ; No predecessors! + %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !18, !callsite !19 + br label %return + +return: ; preds = %if.end, %entry + ret ptr null +} + +define ptr @_Z1Bi(i32 %n) !dbg !20 { +entry: + %call = tail call ptr @_Z1Ci(i32 0), !dbg !21, !callsite !22 + ret ptr null +} + +define i32 @main() { +entry: + %call = tail call ptr @_Z1Bi(i32 0), !dbg !23, !callsite !25 + %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !26, !callsite !27 + %call2 = tail call ptr @_Z1Bi(i32 0), !dbg !28, !callsite !29 + ret i32 0 +} + +declare ptr @_Znam(i64) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 7aec6dc477f8148ed066d10dfc7a012a51b6599c)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "memprof_recursive.cc", directory: ".", checksumkind: CSK_MD5, checksum: "2f15f63b187a0e0d40e7fdd18b10576a") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "D", linkageName: "_Z1Dv", scope: !1, file: !1, line: 4, type: !4, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!4 = !DISubroutineType(types: !5) +!5 = !{} +!6 = !DILocation(line: 5, column: 10, scope: !3) +!7 = !{!8, !10, !12} +!8 = !{!9, !"cold"} +!9 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 6307901912192269588} +!10 = !{!11, !"notcold"} +!11 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 8632435727821051414} +!12 = !{!13, !"cold"} +!13 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 -3421689549917153178} +!14 = !{i64 6541423618768552252} +!15 = distinct !DISubprogram(name: "C", linkageName: "_Z1Ci", scope: !1, file: !1, line: 8, type: !4, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!16 = !DILocation(line: 10, column: 12, scope: !15) +!17 = !{i64 -200552803509692312} +!18 = !DILocation(line: 12, column: 10, scope: !15) +!19 = !{i64 -7155190423157709404} +!20 = distinct !DISubprogram(name: "B", linkageName: "_Z1Bi", scope: !1, file: !1, line: 14, type: !4, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!21 = !DILocation(line: 15, column: 10, scope: !20) +!22 = !{i64 -2954124005641725917} +!23 = !DILocation(line: 18, column: 13, scope: !24) +!24 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !4, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!25 = !{i64 8632435727821051414} +!26 = !DILocation(line: 19, column: 13, scope: !24) +!27 = !{i64 -3421689549917153178} +!28 = !DILocation(line: 20, column: 13, scope: !24) +!29 = !{i64 6307901912192269588} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll new file mode 100644 index 0000000000000..42ea59be9269f --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll @@ -0,0 +1,159 @@ +;; Test recursion handling during cloning. +;; +;; Original code looks like: +;; +;; #include +;; #include +;; #include +;; __attribute((noinline)) char *D() { +;; return new char[10]; +;; } +;; __attribute((noinline)) char *B(int n); +;; __attribute((noinline)) char *C(int n) { +;; if (!n) { +;; return D(); +;; } +;; return B(n-1); +;; } +;; __attribute((noinline)) char *B(int n) { +;; return C(n); +;; } +;; int main(int argc, char **argv) { +;; char *x = B(1); +;; char *y = B(1); +;; char *z = B(0); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; free(x); +;; sleep(200); +;; free(y); +;; free(z); +;; return 0; +;; } +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +;; By default we should enable cloning of contexts involved with recursive +;; cycles, but not through the cycle itself. I.e. until full support for +;; recursion is added, the cloned recursive call from C back to B (line 12) will +;; not be updated to call a clone. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS + +;; Skipping recursive callsites should result in no cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-skip-recursive-callsites \ +; RUN: %s -S 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="marked with memprof allocation attribute cold" \ +; RUN: --check-prefix=ALL + +;; Skipping recursive contexts should prevent spurious call to cloned version of +;; B from the context starting at memprof_recursive.cc:19:13, which is actually +;; recursive (until that support is added). +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-skip-recursive-contexts \ +; RUN: %s -S 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS + +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +;; We should only call the cold clone for the recursive context if we haven't +;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts. +; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold +;; We should call the original B for the recursive context if we have +;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts. +; SKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci assigned to call function clone _Z1Bi +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:18:13: call in clone main assigned to call function clone _Z1Bi +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci +; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv +; ALL: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z1Dv() !dbg !3 { +entry: + %call = tail call ptr @_Znam(i64 10), !dbg !6, !memprof !7, !callsite !14 + ret ptr null +} + +define ptr @_Z1Ci(i32 %n) !dbg !15 { +entry: + %call = tail call ptr @_Z1Dv(), !dbg !16, !callsite !17 + br label %return + +if.end: ; No predecessors! + %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !18, !callsite !19 + br label %return + +return: ; preds = %if.end, %entry + ret ptr null +} + +define ptr @_Z1Bi(i32 %n) !dbg !20 { +entry: + %call = tail call ptr @_Z1Ci(i32 0), !dbg !21, !callsite !22 + ret ptr null +} + +define i32 @main() { +entry: + %call = tail call ptr @_Z1Bi(i32 0), !dbg !23, !callsite !25 + %call1 = tail call ptr @_Z1Bi(i32 0), !dbg !26, !callsite !27 + %call2 = tail call ptr @_Z1Bi(i32 0), !dbg !28, !callsite !29 + ret i32 0 +} + +declare ptr @_Znam(i64) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 7aec6dc477f8148ed066d10dfc7a012a51b6599c)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "memprof_recursive.cc", directory: ".", checksumkind: CSK_MD5, checksum: "2f15f63b187a0e0d40e7fdd18b10576a") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "D", linkageName: "_Z1Dv", scope: !1, file: !1, line: 4, type: !4, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!4 = !DISubroutineType(types: !5) +!5 = !{} +!6 = !DILocation(line: 5, column: 10, scope: !3) +!7 = !{!8, !10, !12} +!8 = !{!9, !"cold"} +!9 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 6307901912192269588} +!10 = !{!11, !"notcold"} +!11 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 8632435727821051414} +!12 = !{!13, !"cold"} +!13 = !{i64 6541423618768552252, i64 -200552803509692312, i64 -2954124005641725917, i64 -7155190423157709404, i64 -2954124005641725917, i64 -3421689549917153178} +!14 = !{i64 6541423618768552252} +!15 = distinct !DISubprogram(name: "C", linkageName: "_Z1Ci", scope: !1, file: !1, line: 8, type: !4, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!16 = !DILocation(line: 10, column: 12, scope: !15) +!17 = !{i64 -200552803509692312} +!18 = !DILocation(line: 12, column: 10, scope: !15) +!19 = !{i64 -7155190423157709404} +!20 = distinct !DISubprogram(name: "B", linkageName: "_Z1Bi", scope: !1, file: !1, line: 14, type: !4, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!21 = !DILocation(line: 15, column: 10, scope: !20) +!22 = !{i64 -2954124005641725917} +!23 = !DILocation(line: 18, column: 13, scope: !24) +!24 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !4, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!25 = !{i64 8632435727821051414} +!26 = !DILocation(line: 19, column: 13, scope: !24) +!27 = !{i64 -3421689549917153178} +!28 = !DILocation(line: 20, column: 13, scope: !24) +!29 = !{i64 6307901912192269588} From e4afefc14c4344229efb28c95fd823d556747a87 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 7 Jan 2025 14:53:43 -0800 Subject: [PATCH 2/3] Address comments --- .../IPO/MemProfContextDisambiguation.cpp | 18 ++++++++++-------- llvm/test/ThinLTO/X86/memprof-recursive.ll | 2 +- .../MemProfContextDisambiguation/recursive.ll | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index a16d858a5a876..9ccfcecd88d0d 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -123,9 +123,9 @@ static cl::opt "frames through tail calls.")); // By default enable cloning of callsites involved with recursive cycles -static cl::opt SkipRecursiveCallsites( - "memprof-skip-recursive-callsites", cl::init(false), cl::Hidden, - cl::desc("Prevent cloning of callsites involved in recursive cycles")); +static cl::opt AllowRecursiveCallsites( + "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, + cl::desc("Allow cloning of callsites involved in recursive cycles")); // When enabled, try to detect and prevent cloning of recursive contexts. // This is only necessary until we support cloning through recursive cycles. @@ -1252,7 +1252,7 @@ void CallsiteContextGraph::addStackNodesForMIB( } // Marking a node recursive will prevent its cloning completely, even for // non-recursive contexts flowing through it. - if (SkipRecursiveCallsites) { + if (!AllowRecursiveCallsites) { auto Ins = StackIdSet.insert(StackId); if (!Ins.second) StackNode->Recursive = true; @@ -1393,10 +1393,10 @@ static void checkNode(const ContextNode *Node, set_union(CallerEdgeContextIds, Edge->ContextIds); } // Node can have more context ids than callers if some contexts terminate at - // node and some are longer. If we are not skipping recursive callsites but + // node and some are longer. If we are allowing recursive callsites but // haven't enabled skipping of recursive contexts, this will be violated for // incompletely cloned recursive cycles, so skip the checking in that case. - assert(!(SkipRecursiveCallsites || SkipRecursiveContexts) || + assert((AllowRecursiveCallsites && !SkipRecursiveContexts) || NodeContextIds == CallerEdgeContextIds || set_is_subset(CallerEdgeContextIds, NodeContextIds)); } @@ -3392,12 +3392,14 @@ void CallsiteContextGraph::identifyClones( assert(Node->AllocTypes != (uint8_t)AllocationType::None); DenseSet RecursiveContextIds; - // If we are not skipping recursive callsites, and have also enabled + // If we are allowing recursive callsites, and have also enabled // skipping of recursive contexts, look for context ids that show up in // multiple caller edges. - if (!SkipRecursiveCallsites && SkipRecursiveContexts) { + if (AllowRecursiveCallsites && SkipRecursiveContexts) { DenseSet AllCallerContextIds; for (auto &CE : Node->CallerEdges) { + // Resize to the largest set of caller context ids, since we know the + // final set will be at least that large. AllCallerContextIds.reserve(CE->getContextIds().size()); for (auto Id : CE->getContextIds()) if (!AllCallerContextIds.insert(Id).second) diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll index 38cc107130380..16fc82eaf391a 100644 --- a/llvm/test/ThinLTO/X86/memprof-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll @@ -32,7 +32,7 @@ ; RUN: -r=%t.o,_Znam, \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ -; RUN: -memprof-skip-recursive-callsites \ +; RUN: -memprof-allow-recursive-callsites=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --implicit-check-not="created clone" \ diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll index 42ea59be9269f..63221a723edf2 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll @@ -49,7 +49,7 @@ ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ -; RUN: -memprof-skip-recursive-callsites \ +; RUN: -memprof-allow-recursive-callsites=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --implicit-check-not="created clone" \ From 14e06cc62c63d880c708a2671c8f4d5a743eabce Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 7 Jan 2025 16:31:10 -0800 Subject: [PATCH 3/3] Change other flag to positive intent too --- .../IPO/MemProfContextDisambiguation.cpp | 25 +++++++------ llvm/test/ThinLTO/X86/memprof-recursive.ll | 28 +++++++-------- .../MemProfContextDisambiguation/recursive.ll | 36 +++++++++---------- 3 files changed, 44 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 9ccfcecd88d0d..016db55c99c3e 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -127,14 +127,14 @@ static cl::opt AllowRecursiveCallsites( "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles")); -// When enabled, try to detect and prevent cloning of recursive contexts. +// When disabled, try to detect and prevent cloning of recursive contexts. // This is only necessary until we support cloning through recursive cycles. -// Leave off by default for now, as it requires a little bit of compile time -// overhead and doesn't affect correctness, it will just inflate the cold hinted -// bytes reporting a bit when -memprof-report-hinted-sizes is enabled. -static cl::opt SkipRecursiveContexts( - "memprof-skip-recursive-contexts", cl::init(false), cl::Hidden, - cl::desc("Prevent cloning of contexts through recursive cycles")); +// Leave on by default for now, as disabling requires a little bit of compile +// time overhead and doesn't affect correctness, it will just inflate the cold +// hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled. +static cl::opt AllowRecursiveContexts( + "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, + cl::desc("Allow cloning of contexts through recursive cycles")); namespace llvm { cl::opt EnableMemProfContextDisambiguation( @@ -1394,9 +1394,9 @@ static void checkNode(const ContextNode *Node, } // Node can have more context ids than callers if some contexts terminate at // node and some are longer. If we are allowing recursive callsites but - // haven't enabled skipping of recursive contexts, this will be violated for + // haven't disabled recursive contexts, this will be violated for // incompletely cloned recursive cycles, so skip the checking in that case. - assert((AllowRecursiveCallsites && !SkipRecursiveContexts) || + assert((AllowRecursiveCallsites && AllowRecursiveContexts) || NodeContextIds == CallerEdgeContextIds || set_is_subset(CallerEdgeContextIds, NodeContextIds)); } @@ -3392,10 +3392,9 @@ void CallsiteContextGraph::identifyClones( assert(Node->AllocTypes != (uint8_t)AllocationType::None); DenseSet RecursiveContextIds; - // If we are allowing recursive callsites, and have also enabled - // skipping of recursive contexts, look for context ids that show up in - // multiple caller edges. - if (AllowRecursiveCallsites && SkipRecursiveContexts) { + // If we are allowing recursive callsites, but have also disabled recursive + // contexts, look for context ids that show up in multiple caller edges. + if (AllowRecursiveCallsites && !AllowRecursiveContexts) { DenseSet AllCallerContextIds; for (auto &CE : Node->CallerEdges) { // Resize to the largest set of caller context ids, since we know the diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll index 16fc82eaf391a..2b1d7081b7610 100644 --- a/llvm/test/ThinLTO/X86/memprof-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll @@ -20,7 +20,7 @@ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS +; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS ;; Skipping recursive callsites should result in no cloning. ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ @@ -50,23 +50,23 @@ ; RUN: -r=%t.o,_Znam, \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ -; RUN: -memprof-skip-recursive-contexts \ +; RUN: -memprof-allow-recursive-contexts=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS +; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 -;; We should only call the cold clone for the recursive context if we haven't -;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts. -; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 +;; We should only call the cold clone for the recursive context if we enabled +;; recursive contexts via -memprof-allow-recursive-contexts=true (default). +; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 ; SKIP-RECUR-CONTEXTS-NOT: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll index 63221a723edf2..759d5115896c1 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll @@ -43,7 +43,7 @@ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=NOSKIP-RECUR-CONTEXTS +; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS ;; Skipping recursive callsites should result in no cloning. ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ @@ -62,28 +62,28 @@ ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ -; RUN: -memprof-skip-recursive-contexts \ +; RUN: -memprof-allow-recursive-contexts=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=ALL --check-prefix=NOSKIP-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS +; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 -;; We should only call the cold clone for the recursive context if we haven't -;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts. -; NOSKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:4:0: created clone _Z1Dv.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +;; We should only call the cold clone for the recursive context if we enabled +;; recursive contexts via -memprof-allow-recursive-contexts=true (default). +; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold ;; We should call the original B for the recursive context if we have -;; enabled skipping of recursive contexts via -memprof-skip-recursive-contexts. +;; disabled recursive contexts via -memprof-allow-recursive-contexts=false. ; SKIP-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci assigned to call function clone _Z1Bi -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:18:13: call in clone main assigned to call function clone _Z1Bi -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci -; NOSKIP-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci assigned to call function clone _Z1Bi +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:18:13: call in clone main assigned to call function clone _Z1Bi +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci +; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv ; ALL: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"