From f3df524e714c212130c43c6371c7229638dc153f Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Wed, 19 Mar 2025 21:41:19 +0000 Subject: [PATCH 1/2] [Metadata] Handle memprof, callsite merging when one is missing. For memprof and callsite metadata we want to pick one deterministically and keep that even if one of them may be missing. --- llvm/lib/Transforms/Utils/Local.cpp | 36 +++++++++--- .../SimplifyCFG/merge-calls-memprof.ll | 55 +++++++++++++++++-- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 95f0d099aacb5..161c7c875e0eb 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3355,8 +3355,14 @@ static void combineMetadata(Instruction *K, const Instruction *J, case LLVMContext::MD_invariant_group: // Preserve !invariant.group in K. break; + // Keep empty cases for mmra, memprof, and callsite to prevent them from + // being removed as unknown metadata. The actual merging is handled + // separately below. case LLVMContext::MD_mmra: - // Combine MMRAs + [[fallthrough]]; + case LLVMContext::MD_memprof: + [[fallthrough]]; + case LLVMContext::MD_callsite: break; case LLVMContext::MD_align: if (!AAOnly && (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef))) @@ -3369,14 +3375,6 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; - case LLVMContext::MD_memprof: - if (!AAOnly) - K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD)); - break; - case LLVMContext::MD_callsite: - if (!AAOnly) - K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD)); - break; case LLVMContext::MD_preserve_access_index: // Preserve !preserve.access.index in K. break; @@ -3420,6 +3418,26 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::combine(K->getContext(), JMMRA, KMMRA)); } + + // Merge memprof metadata. + // Handle separately to support cases where only one instruction has the + // metadata. + auto JMemProf = J->getMetadata(LLVMContext::MD_memprof); + auto KMemProf = K->getMetadata(LLVMContext::MD_memprof); + if (!AAOnly && (JMemProf || KMemProf)) { + K->setMetadata(LLVMContext::MD_memprof, + MDNode::getMergedMemProfMetadata(KMemProf, JMemProf)); + } + + // Merge callsite metadata. + // Handle separately to support cases where only one instruction has the + // metadata. + auto JCallSite = J->getMetadata(LLVMContext::MD_callsite); + auto KCallSite = K->getMetadata(LLVMContext::MD_callsite); + if (!AAOnly && (JCallSite || KCallSite)) { + K->setMetadata(LLVMContext::MD_callsite, + MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); + } } void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll index 10c6aeb26ba76..d15eeb7b69fee 100644 --- a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll @@ -1,5 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 - ;; Test to ensure that memprof related metadata is not dropped when ;; instructions are combined. Currently the metadata from the first instruction ;; is kept, which prevents full loss of profile context information. @@ -32,6 +30,51 @@ if.end: ; preds = %if.else, %if.then ret ptr %x.0 } +define dso_local noundef nonnull ptr @_Z9test_leftb(i1 noundef zeroext %b) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z9test_leftb( +; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3 + br label %if.end + +if.else: ; preds = %entry + %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +define dso_local noundef nonnull ptr @_Z10test_rightb(i1 noundef zeroext %b) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z10test_rightb( +; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4) + br label %if.end + +if.else: ; preds = %entry + %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} declare ptr @_Znwm(i64) nounwind readonly @@ -43,9 +86,13 @@ declare ptr @_Znwm(i64) nounwind readonly !5 = !{!6, !"cold"} !6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} !7 = !{i64 123} -;. + ; CHECK: [[META0]] = !{[[META1:![0-9]+]]} ; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"} ; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434} ; CHECK: [[META3]] = !{i64 -852997907418798798} -;. +; CHECK: [[META4]] = !{[[META5:![0-9]+]]} +; CHECK: [[META5]] = !{[[META6:![0-9]+]], !"cold"} +; CHECK: [[META6]] = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} +; CHECK: [[META7]] = !{i64 123} + From 7772c93615475f22052eeb53643d7a7d9fd5961f Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 21 Mar 2025 17:16:48 +0000 Subject: [PATCH 2/2] Address comments. --- llvm/lib/Transforms/Utils/Local.cpp | 10 ++-- .../SimplifyCFG/merge-calls-memprof-left.ll | 46 ++++++++++++++++ .../SimplifyCFG/merge-calls-memprof-right.ll | 46 ++++++++++++++++ .../SimplifyCFG/merge-calls-memprof.ll | 55 ++----------------- 4 files changed, 100 insertions(+), 57 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-left.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-right.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 161c7c875e0eb..edec0e7a94422 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3359,9 +3359,7 @@ static void combineMetadata(Instruction *K, const Instruction *J, // being removed as unknown metadata. The actual merging is handled // separately below. case LLVMContext::MD_mmra: - [[fallthrough]]; case LLVMContext::MD_memprof: - [[fallthrough]]; case LLVMContext::MD_callsite: break; case LLVMContext::MD_align: @@ -3422,8 +3420,8 @@ static void combineMetadata(Instruction *K, const Instruction *J, // Merge memprof metadata. // Handle separately to support cases where only one instruction has the // metadata. - auto JMemProf = J->getMetadata(LLVMContext::MD_memprof); - auto KMemProf = K->getMetadata(LLVMContext::MD_memprof); + auto *JMemProf = J->getMetadata(LLVMContext::MD_memprof); + auto *KMemProf = K->getMetadata(LLVMContext::MD_memprof); if (!AAOnly && (JMemProf || KMemProf)) { K->setMetadata(LLVMContext::MD_memprof, MDNode::getMergedMemProfMetadata(KMemProf, JMemProf)); @@ -3432,8 +3430,8 @@ static void combineMetadata(Instruction *K, const Instruction *J, // Merge callsite metadata. // Handle separately to support cases where only one instruction has the // metadata. - auto JCallSite = J->getMetadata(LLVMContext::MD_callsite); - auto KCallSite = K->getMetadata(LLVMContext::MD_callsite); + auto *JCallSite = J->getMetadata(LLVMContext::MD_callsite); + auto *KCallSite = K->getMetadata(LLVMContext::MD_callsite); if (!AAOnly && (JCallSite || KCallSite)) { K->setMetadata(LLVMContext::MD_callsite, MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-left.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-left.ll new file mode 100644 index 0000000000000..d0f83c04b0352 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-left.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;; Test to ensure that memprof related metadata is not dropped when +;; instructions are combined. Currently the metadata from the first instruction +;; is kept, which prevents full loss of profile context information. + +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local noundef nonnull ptr @_Z9test_leftb(i1 noundef zeroext %b) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z9test_leftb( +; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3 + br label %if.end + +if.else: ; preds = %entry + %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +declare ptr @_Znwm(i64) nounwind readonly + +!0 = !{!1} +!1 = !{!2, !"notcold"} +!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434} +!3 = !{i64 -852997907418798798} + +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"} +; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434} +; CHECK: [[META3]] = !{i64 -852997907418798798} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-right.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-right.ll new file mode 100644 index 0000000000000..463ab865d8810 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof-right.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;; Test to ensure that memprof related metadata is not dropped when +;; instructions are combined. Currently the metadata from the first instruction +;; is kept, which prevents full loss of profile context information. + +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local noundef nonnull ptr @_Z10test_rightb(i1 noundef zeroext %b) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z10test_rightb( +; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4) + br label %if.end + +if.else: ; preds = %entry + %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +declare ptr @_Znwm(i64) nounwind readonly + +!4 = !{!5} +!5 = !{!6, !"cold"} +!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} +!7 = !{i64 123} + +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"cold"} +; CHECK: [[META2]] = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} +; CHECK: [[META3]] = !{i64 123} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll index d15eeb7b69fee..10c6aeb26ba76 100644 --- a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll @@ -1,3 +1,5 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 + ;; Test to ensure that memprof related metadata is not dropped when ;; instructions are combined. Currently the metadata from the first instruction ;; is kept, which prevents full loss of profile context information. @@ -30,51 +32,6 @@ if.end: ; preds = %if.else, %if.then ret ptr %x.0 } -define dso_local noundef nonnull ptr @_Z9test_leftb(i1 noundef zeroext %b) local_unnamed_addr #0 { -; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z9test_leftb( -; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]] -; CHECK-NEXT: ret ptr [[CALL]] -; -entry: - br i1 %b, label %if.then, label %if.else - -if.then: ; preds = %entry - %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3 - br label %if.end - -if.else: ; preds = %entry - %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4) - br label %if.end - -if.end: ; preds = %if.else, %if.then - %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] - ret ptr %x.0 -} - -define dso_local noundef nonnull ptr @_Z10test_rightb(i1 noundef zeroext %b) local_unnamed_addr #0 { -; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z10test_rightb( -; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]] -; CHECK-NEXT: ret ptr [[CALL]] -; -entry: - br i1 %b, label %if.then, label %if.else - -if.then: ; preds = %entry - %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4) - br label %if.end - -if.else: ; preds = %entry - %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7 - br label %if.end - -if.end: ; preds = %if.else, %if.then - %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] - ret ptr %x.0 -} declare ptr @_Znwm(i64) nounwind readonly @@ -86,13 +43,9 @@ declare ptr @_Znwm(i64) nounwind readonly !5 = !{!6, !"cold"} !6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} !7 = !{i64 123} - +;. ; CHECK: [[META0]] = !{[[META1:![0-9]+]]} ; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"} ; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434} ; CHECK: [[META3]] = !{i64 -852997907418798798} -; CHECK: [[META4]] = !{[[META5:![0-9]+]]} -; CHECK: [[META5]] = !{[[META6:![0-9]+]], !"cold"} -; CHECK: [[META6]] = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} -; CHECK: [[META7]] = !{i64 123} - +;.