diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index bc722c7ed3e31..8e78cd9cc573a 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1223,6 +1223,26 @@ MDNode *MDNode::mergeDirectCallProfMetadata(MDNode *A, MDNode *B, MDNode *MDNode::getMergedProfMetadata(MDNode *A, MDNode *B, const Instruction *AInstr, const Instruction *BInstr) { + // Check that it is legal to merge prof metadata based on the opcode. + auto IsLegal = [](const Instruction &I) -> bool { + switch (I.getOpcode()) { + case Instruction::Invoke: + case Instruction::Br: + case Instruction::Switch: + case Instruction::Call: + case Instruction::IndirectBr: + case Instruction::Select: + case Instruction::CallBr: + return true; + default: + return false; + } + }; + if (AInstr && !IsLegal(*AInstr)) + return nullptr; + if (BInstr && !IsLegal(*BInstr)) + return nullptr; + if (!(A && B)) { return A ? A : B; } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 2f3ea2266e07f..b3204da93049b 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3353,9 +3353,10 @@ static void combineMetadata(Instruction *K, const Instruction *J, case LLVMContext::MD_invariant_group: // Preserve !invariant.group in K. break; - // Keep empty cases for mmra, memprof, and callsite to prevent them from - // being removed as unknown metadata. The actual merging is handled + // Keep empty cases for prof, mmra, memprof, and callsite to prevent them + // from being removed as unknown metadata. The actual merging is handled // separately below. + case LLVMContext::MD_prof: case LLVMContext::MD_mmra: case LLVMContext::MD_memprof: case LLVMContext::MD_callsite: @@ -3384,10 +3385,6 @@ static void combineMetadata(Instruction *K, const Instruction *J, if (!AAOnly) K->setMetadata(Kind, JMD); break; - case LLVMContext::MD_prof: - if (!AAOnly && DoesKMove) - K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J)); - break; case LLVMContext::MD_noalias_addrspace: if (DoesKMove) K->setMetadata(Kind, @@ -3434,6 +3431,16 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_callsite, MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); } + + // Merge prof metadata. + // Handle separately to support cases where only one instruction has the + // metadata. + auto *JProf = J->getMetadata(LLVMContext::MD_prof); + auto *KProf = K->getMetadata(LLVMContext::MD_prof); + if (!AAOnly && (JProf || KProf)) { + K->setMetadata(LLVMContext::MD_prof, + MDNode::getMergedProfMetadata(KProf, JProf, K, J)); + } } void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, diff --git a/llvm/test/Transforms/GVN/pre-invalid-prof-metadata.ll b/llvm/test/Transforms/GVN/pre-invalid-prof-metadata.ll new file mode 100644 index 0000000000000..978bfd424ae52 --- /dev/null +++ b/llvm/test/Transforms/GVN/pre-invalid-prof-metadata.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=gvn -S < %s | FileCheck %s + +; Make sure that performing PRE does not add prof metadata onto newly created +; phi nodes. +define fastcc ptr @foo(i8 %__pred.1.val) { +; CHECK-LABEL: define fastcc ptr @foo( +; CHECK-SAME: i8 [[__PRED_1_VAL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: switch i64 0, label %[[SW_BB:.*]] [ +; CHECK-NEXT: i64 1, label %[[ENTRY_SW_BB26_CRIT_EDGE:.*]] +; CHECK-NEXT: i64 0, label %[[ENTRY_SW_BB21_CRIT_EDGE:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SW_BB21_CRIT_EDGE]]: +; CHECK-NEXT: [[DOTPRE:%.*]] = trunc i8 [[__PRED_1_VAL]] to i1 +; CHECK-NEXT: [[DOTPRE1:%.*]] = select i1 [[DOTPRE]], i64 0, i64 4 +; CHECK-NEXT: br label %[[SW_BB21:.*]] +; CHECK: [[ENTRY_SW_BB26_CRIT_EDGE]]: +; CHECK-NEXT: [[DOTPRE2:%.*]] = trunc i8 [[__PRED_1_VAL]] to i1 +; CHECK-NEXT: [[DOTPRE3:%.*]] = select i1 [[DOTPRE2]], i64 0, i64 4, !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br label %[[SW_BB26:.*]] +; CHECK: [[SW_BB]]: +; CHECK-NEXT: [[L78:%.*]] = trunc i8 [[__PRED_1_VAL]] to i1 +; CHECK-NEXT: [[C79:%.*]] = select i1 [[L78]], i64 0, i64 4 +; CHECK-NEXT: br label %[[SW_BB21]] +; CHECK: [[SW_BB21]]: +; CHECK-NEXT: [[C84_PRE_PHI:%.*]] = phi i64 [ [[DOTPRE1]], %[[ENTRY_SW_BB21_CRIT_EDGE]] ], [ [[C79]], %[[SW_BB]] ] +; CHECK-NEXT: [[L83_PRE_PHI:%.*]] = phi i1 [ [[DOTPRE]], %[[ENTRY_SW_BB21_CRIT_EDGE]] ], [ [[L78]], %[[SW_BB]] ] +; CHECK-NEXT: br label %[[SW_BB26]] +; CHECK: [[SW_BB26]]: +; CHECK-NEXT: [[C89_PRE_PHI:%.*]] = phi i64 [ [[DOTPRE3]], %[[ENTRY_SW_BB26_CRIT_EDGE]] ], [ [[C84_PRE_PHI]], %[[SW_BB21]] ] +; CHECK-NEXT: [[L88_PRE_PHI:%.*]] = phi i1 [ [[DOTPRE2]], %[[ENTRY_SW_BB26_CRIT_EDGE]] ], [ [[L83_PRE_PHI]], %[[SW_BB21]] ] +; CHECK-NEXT: ret ptr null +; +entry: + switch i64 0, label %sw.bb [ + i64 1, label %sw.bb26 + i64 0, label %sw.bb21 + ] + +sw.bb: ; preds = %entry + %l78 = trunc i8 %__pred.1.val to i1 + %c79 = select i1 %l78, i64 0, i64 4 + br label %sw.bb21 + +sw.bb21: ; preds = %sw.bb, %entry + %l83 = trunc i8 %__pred.1.val to i1 + %c84 = select i1 %l83, i64 0, i64 4 + br label %sw.bb26 + +sw.bb26: ; preds = %sw.bb21, %entry + %l88 = trunc i8 %__pred.1.val to i1 + %c89 = select i1 %l88, i64 0, i64 4, !prof !0 + ret ptr null +} + +!0 = !{!"branch_weights", i32 3994, i32 883} +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 3994, i32 883} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll new file mode 100644 index 0000000000000..d6058134f5285 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST + +; Test case based on C++ code with manualy annotated !prof metadata. +; This is to test that when calls to 'func1' from 'if.then' block +; and 'if.else' block are hoisted, the branch_weights are merged and +; attached to merged call rather than dropped. +; +; int func1(int a, int b) ; +; int func2(int a, int b) ; + +; int func(int a, int b, bool c) { +; int sum= 0; +; if(c) { +; sum += func1(a, b); +; } else { +; sum += func1(a, b); +; sum -= func2(a, b); +; } +; return sum; +; } +define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) { +; HOIST-LABEL: define i32 @_Z4funciib +; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { +; HOIST-NEXT: entry: +; HOIST-NEXT: [[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 [[B]]), !prof [[PROF0:![0-9]+]] +; HOIST-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]] +; HOIST: if.else: +; HOIST-NEXT: [[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 [[B]]) +; HOIST-NEXT: [[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]] +; HOIST-NEXT: br label [[IF_END]] +; HOIST: if.end: +; HOIST-NEXT: [[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], [[ENTRY:%.*]] ] +; HOIST-NEXT: ret i32 [[SUM_0]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b) + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0 + %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b) + %sub = sub i32 %call1, %call3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sum.0 = phi i32 [ %call, %if.then ], [ %sub, %if.else ] + ret i32 %sum.0 +} + +declare i32 @_Z5func1ii(i32, i32) + +declare i32 @_Z5func2ii(i32, i32) + +!0 = !{!"branch_weights", i32 10} + +;. +; HOIST: [[PROF0]] = !{!"branch_weights", i32 10} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll new file mode 100644 index 0000000000000..c4aed5eb95888 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=SINK + + +; Test case based on the following C++ code with manualy annotated !prof metadata. +; This is to test that when calls to 'func1' from 'if.then' and 'if.else' are +; sinked, the branch weights are merged and attached to sinked call. +; +; int func1(int a, int b) ; +; int func2(int a, int b) ; + +; int func(int a, int b, bool c) { +; int sum = 0; +; if (c) { +; sum += func1(a,b); +; } else { +; b -= func2(a,b); +; sum += func1(a,b); +; } +; return sum; +; } + +define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) { +; SINK-LABEL: define i32 @_Z4funciib +; SINK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { +; SINK-NEXT: entry: +; SINK-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]] +; SINK: if.else: +; SINK-NEXT: [[CALL1:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 [[B]]) +; SINK-NEXT: [[SUB:%.*]] = sub i32 [[B]], [[CALL1]] +; SINK-NEXT: br label [[IF_END]] +; SINK: if.end: +; SINK-NEXT: [[SUB_SINK:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[B]], [[ENTRY:%.*]] ] +; SINK-NEXT: [[CALL2:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 [[SUB_SINK]]), !prof [[PROF0:![0-9]+]] +; SINK-NEXT: ret i32 [[CALL2]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0 + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call i32 @_Z5func2ii(i32 %a, i32 %b) + %sub = sub i32 %b, %call1 + %call2 = tail call i32 @_Z5func1ii(i32 %a, i32 %sub) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sum.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %sum.0 +} + +declare i32 @_Z5func1ii(i32, i32) + +declare i32 @_Z5func2ii(i32, i32) + +!0 = !{!"branch_weights", i32 10} + +;. +; SINK: [[PROF0]] = !{!"branch_weights", i32 10} +;.