diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 4efd683dfca36..905186edcbecc 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1352,6 +1352,17 @@ static void checkNode(const ContextNode *Node, } assert(NodeContextIds == CalleeEdgeContextIds); } + // FIXME: Since this checking is only invoked under an option, we should + // change the error checking from using assert to something that will trigger + // an error on a release build. +#ifndef NDEBUG + // Make sure we don't end up with duplicate edges between the same caller and + // callee. + DenseSet *> NodeSet; + for (const auto &E : Node->CalleeEdges) + NodeSet.insert(E->Callee); + assert(NodeSet.size() == Node->CalleeEdges.size()); +#endif } template @@ -3125,7 +3136,15 @@ void CallsiteContextGraph:: // from the same callers as the old node. That should be true in the current // use case, where we will remove None-type edges after copying over all // caller edges from the callee. - assert(IsNewNode || NewCaller->findEdgeFromCaller(OldCallerEdge->Caller)); + auto *ExistingCallerEdge = + NewCaller->findEdgeFromCaller(OldCallerEdge->Caller); + assert(IsNewNode || ExistingCallerEdge); + if (ExistingCallerEdge) { + ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), + EdgeContextIdsToMove.end()); + ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); + continue; + } auto NewEdge = std::make_shared( NewCaller, OldCallerEdge->Caller, computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll index f17e19e1f77ef..99e0718987655 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp.ll @@ -186,9 +186,13 @@ ; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-MAIN: created clone _ZN1B3barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-FOO: created clone _Z3fooR2B0j.memprof.1 ;; In each version of foo we should have promoted the indirect call to two conditional ;; direct calls, one to B::bar and one to B0::bar. The cloned version of foo should call @@ -208,10 +212,10 @@ ; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold -; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis ; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend @@ -247,8 +251,8 @@ ; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" ; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS-BE-DISTRIB: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend ;--- foo.ll @@ -298,6 +302,9 @@ declare i32 @_Z3fooR2B0j(ptr, i32) define i32 @_ZN2B03barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !33, !callsite !38 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !45, !callsite !50 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -311,6 +318,9 @@ declare void @_ZdlPvm() define i32 @_ZN1B3barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !39, !callsite !44 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !51, !callsite !56 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -367,3 +377,15 @@ attributes #0 = { builtin allocsize(0) } !42 = !{!43, !"cold"} !43 = !{i64 4457553070050523782, i64 -2101080423462424381, i64 -6490791336773930154} !44 = !{i64 4457553070050523782} +!45 = !{!46, !48} +!46 = !{!47, !"notcold"} +!47 = !{i64 456, i64 -2101080423462424381, i64 5188446645037944434} +!48 = !{!49, !"cold"} +!49 = !{i64 456, i64 -2101080423462424381, i64 5583420417449503557} +!50 = !{i64 456} +!51 = !{!52, !54} +!52 = !{!53, !"notcold"} +!53 = !{i64 789, i64 -2101080423462424381, i64 132626519179914298} +!54 = !{!55, !"cold"} +!55 = !{i64 789, i64 -2101080423462424381, i64 -6490791336773930154} +!56 = !{i64 789}