From 029d36943b79218b52d62bc3225498c73de6080d Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Sat, 18 Oct 2025 17:14:08 -0700 Subject: [PATCH 1/2] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 17 ++++- .../SimpleLoopUnswitch/partial-unswitch.ll | 75 +++++++++++-------- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index bb6c879f4d47e..dd36a63c1564b 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch( static void buildPartialInvariantUnswitchConditionalBranch( BasicBlock &BB, ArrayRef ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) { ValueToValueMapTy VMap; for (auto *Val : reverse(ToDuplicate)) { Instruction *Inst = cast(Val); @@ -377,8 +377,17 @@ static void buildPartialInvariantUnswitchConditionalBranch( IRBuilder<> IRB(&BB); IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); Value *Cond = VMap[ToDuplicate[0]]; - IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, - Direction ? &NormalSucc : &UnswitchedSucc); + auto *ProfData = + !ProfcheckDisableMetadataFixes && + ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition()) + ? OriginalBranch.getMetadata(LLVMContext::MD_prof) + : nullptr; + auto *BR = + IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc, ProfData); + if (!ProfData) + setExplicitlyUnknownBranchWeightsIfProfiled( + *BR, *BR->getParent()->getParent(), DEBUG_TYPE); } /// Rewrite the PHI nodes in an unswitched loop exit basic block. @@ -2515,7 +2524,7 @@ static void unswitchNontrivialInvariants( // the branch in the split block. if (PartiallyInvariant) buildPartialInvariantUnswitchConditionalBranch( - *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); + *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI); else { buildPartialUnswitchConditionalBranch( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll index 1d8942079ffd8..87161707d9f69 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll @@ -1,14 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s declare void @clobber() -define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { +define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 { ; CHECK-LABEL: @partial_unswitch_true_successor( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100 -; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: entry.split.us: ; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] ; CHECK: loop.header.us: @@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { ; CHECK: loop.latch.us: ; CHECK-NEXT: [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]] ; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 -; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK-NEXT: br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]] ; CHECK: exit.split.us: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: entry.split: @@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 -; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] +; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]], !prof [[PROF1]] ; CHECK: noclobber: ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: clobber: @@ -37,7 +37,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !prof [[PROF2]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -50,7 +50,7 @@ loop.header: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] %lv = load i32, ptr %ptr %sc = icmp eq i32 %lv, 100 - br i1 %sc, label %noclobber, label %clobber + br i1 %sc, label %noclobber, label %clobber, !prof !1 noclobber: br label %loop.latch @@ -62,7 +62,7 @@ clobber: loop.latch: %c = icmp ult i32 %iv, %N %iv.next = add i32 %iv, 1 - br i1 %c, label %loop.header, label %exit + br i1 %c, label %loop.header, label %exit, !prof !2 exit: ret i32 10 @@ -102,7 +102,7 @@ define i32 @partial_unswitch_false_successor(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -171,7 +171,7 @@ define i32 @partial_unswtich_gep_load_icmp(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -246,7 +246,7 @@ define i32 @partial_unswitch_reduction_phi(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[RED_NEXT]] = phi i32 [ [[ADD_5]], [[CLOBBER]] ], [ [[ADD_10]], [[NOCLOBBER]] ] ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP_LATCH]] ] ; CHECK-NEXT: br label [[EXIT]] @@ -325,7 +325,7 @@ define i32 @partial_unswitch_true_successor_noclobber(ptr noalias %ptr.1, ptr no ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -637,7 +637,7 @@ define i32 @partial_unswitch_true_successor_preheader_insertion(ptr %ptr, i32 %N ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT_SPLIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT_SPLIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit.loopexit.split: ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: exit.loopexit: @@ -713,7 +713,7 @@ define i32 @partial_unswitch_true_successor_insert_point(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -784,7 +784,7 @@ define i32 @partial_unswitch_true_successor_hoist_invariant(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -1073,7 +1073,7 @@ define i32 @partial_unswitch_true_to_latch(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -1138,7 +1138,7 @@ define i32 @partial_unswitch_exiting_block_with_multiple_unswitch_candidates(i32 ; CHECK-NEXT: store i32 [[TMP1:%.*]], ptr [[PTR]], align 16 ; CHECK-NEXT: br label [[EXITING]] ; CHECK: exiting: -; CHECK-NEXT: br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: [[RET_VAL:%.*]] = phi i32 [ 1, [[EXITING]] ] ; CHECK-NEXT: br label [[EXIT]] @@ -1249,7 +1249,7 @@ define i32 @partial_unswitch_true_successor_for_cost_calculation(ptr %ptr, i32 % ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -1360,7 +1360,7 @@ define i32 @partial_unswitch_true_successor_trunc(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -1425,7 +1425,7 @@ define i32 @partial_unswitch_false_successor_trunc(ptr %ptr, i32 %N) { ; CHECK: loop.latch: ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: exit.split: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -1456,15 +1456,26 @@ exit: ret i32 10 } -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]} -; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[UNSWITCH_PARTIAL_DISABLE]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[UNSWITCH_PARTIAL_DISABLE]]} +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +!2 = !{!"branch_weights", i32 100, i32 3} + +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 100, i32 3} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +; CHECK: [[META4]] = !{!"llvm.loop.unswitch.partial.disable"} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META4]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]]} +; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META4]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]]} +; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]]} +;. From 7f94d4afa16c49c665034db7a4b8c40f4ae2bc45 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 31 Oct 2025 15:48:16 -0700 Subject: [PATCH 2/2] Apply suggestions from code review --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index dd36a63c1564b..239526e85e1fd 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -377,6 +377,8 @@ static void buildPartialInvariantUnswitchConditionalBranch( IRBuilder<> IRB(&BB); IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); Value *Cond = VMap[ToDuplicate[0]]; + // The expectation is that ToDuplicate[0] is the condition used by the + // OriginalBranch, case in which we can clone the profile metadata from there. auto *ProfData = !ProfcheckDisableMetadataFixes && ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition()) @@ -386,8 +388,8 @@ static void buildPartialInvariantUnswitchConditionalBranch( IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc, ProfData); if (!ProfData) - setExplicitlyUnknownBranchWeightsIfProfiled( - *BR, *BR->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(), + DEBUG_TYPE); } /// Rewrite the PHI nodes in an unswitched loop exit basic block.