From adc76503d9b1d5a6951b1849874cbe5ab9e9b7bc Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 22 Oct 2024 09:12:41 -0700 Subject: [PATCH 1/2] [MemProf] Avoid duplicate edges between nodes The recent change to add support for cloning indirect calls inadvertantly caused duplicate edges to be created between the same caller/callee pair. This is due to the new moveCalleeEdgeToNewCaller not properly guarding the addition of a new edge (ironically I was testing for that in an assertion, but failed to handle that case specially otherwise). Now simply move the context ids over to any existing edge. This issue in turn led to some assumptions in cloning being violated, resulting in a later crash. Add a test for this case to checkNode. --- .../IPO/MemProfContextDisambiguation.cpp | 16 ++++++++- llvm/test/ThinLTO/X86/memprof-icp.ll | 34 +++++++++++++++---- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 4efd683dfca36..8c651fc8a342a 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1352,6 +1352,12 @@ static void checkNode(const ContextNode *Node, } assert(NodeContextIds == CalleeEdgeContextIds); } + // Make sure we don't end up with duplicate edges between the same caller and + // callee. + DenseSet *> NodeSet; + for (const auto &E : Node->CalleeEdges) + NodeSet.insert(E->Callee); + assert(NodeSet.size() == Node->CalleeEdges.size()); } template @@ -3125,7 +3131,15 @@ void CallsiteContextGraph:: // from the same callers as the old node. That should be true in the current // use case, where we will remove None-type edges after copying over all // caller edges from the callee. - assert(IsNewNode || NewCaller->findEdgeFromCaller(OldCallerEdge->Caller)); + auto *ExistingCallerEdge = + NewCaller->findEdgeFromCaller(OldCallerEdge->Caller); + assert(IsNewNode || ExistingCallerEdge); + if (ExistingCallerEdge) { + ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), + EdgeContextIdsToMove.end()); + ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); + continue; + } auto NewEdge = std::make_shared( NewCaller, OldCallerEdge->Caller, computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll index f17e19e1f77ef..99e0718987655 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp.ll @@ -186,9 +186,13 @@ ; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-MAIN: created clone _ZN1B3barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-FOO: created clone _Z3fooR2B0j.memprof.1 ;; In each version of foo we should have promoted the indirect call to two conditional ;; direct calls, one to B::bar and one to B0::bar. The cloned version of foo should call @@ -208,10 +212,10 @@ ; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold -; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis ; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend @@ -247,8 +251,8 @@ ; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" ; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS-BE-DISTRIB: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend ;--- foo.ll @@ -298,6 +302,9 @@ declare i32 @_Z3fooR2B0j(ptr, i32) define i32 @_ZN2B03barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !33, !callsite !38 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !45, !callsite !50 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -311,6 +318,9 @@ declare void @_ZdlPvm() define i32 @_ZN1B3barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !39, !callsite !44 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !51, !callsite !56 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -367,3 +377,15 @@ attributes #0 = { builtin allocsize(0) } !42 = !{!43, !"cold"} !43 = !{i64 4457553070050523782, i64 -2101080423462424381, i64 -6490791336773930154} !44 = !{i64 4457553070050523782} +!45 = !{!46, !48} +!46 = !{!47, !"notcold"} +!47 = !{i64 456, i64 -2101080423462424381, i64 5188446645037944434} +!48 = !{!49, !"cold"} +!49 = !{i64 456, i64 -2101080423462424381, i64 5583420417449503557} +!50 = !{i64 456} +!51 = !{!52, !54} +!52 = !{!53, !"notcold"} +!53 = !{i64 789, i64 -2101080423462424381, i64 132626519179914298} +!54 = !{!55, !"cold"} +!55 = !{i64 789, i64 -2101080423462424381, i64 -6490791336773930154} +!56 = !{i64 789} From 64ff325cf6660cf05f29802146a13e652f0e1e3a Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 25 Oct 2024 10:42:08 -0700 Subject: [PATCH 2/2] Address comment --- llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 8c651fc8a342a..905186edcbecc 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1352,12 +1352,17 @@ static void checkNode(const ContextNode *Node, } assert(NodeContextIds == CalleeEdgeContextIds); } + // FIXME: Since this checking is only invoked under an option, we should + // change the error checking from using assert to something that will trigger + // an error on a release build. +#ifndef NDEBUG // Make sure we don't end up with duplicate edges between the same caller and // callee. DenseSet *> NodeSet; for (const auto &E : Node->CalleeEdges) NodeSet.insert(E->Callee); assert(NodeSet.size() == Node->CalleeEdges.size()); +#endif } template