Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 26 additions & 23 deletions llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2144,34 +2144,36 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName,
bool CurrentLoopValid, bool PartiallyInvariant,
bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
// If we did a non-trivial unswitch, we have added new (cloned) loops.
if (!NewLoops.empty())
auto RecordLoopAsUnswitched = [&](Loop *TargetLoop, StringRef Tag,
StringRef DisableTag) {
auto &Ctx = TargetLoop->getHeader()->getContext();
MDNode *DisableMD = MDNode::get(Ctx, MDString::get(Ctx, DisableTag));
MDNode *NewLoopID = makePostTransformationMetadata(
Ctx, TargetLoop->getLoopID(), {Tag}, {DisableMD});
TargetLoop->setLoopID(NewLoopID);
};

// If we performed a non-trivial unswitch, we have added new cloned loops.
// Mark such newly-created loops as visited.
if (!NewLoops.empty()) {
for (Loop *NL : NewLoops)
RecordLoopAsUnswitched(NL, "llvm.loop.unswitch.nontrivial",
"llvm.loop.unswitch.nontrivial.disable");
U.addSiblingLoops(NewLoops);
}

// If the current loop remains valid, we should revisit it to catch any
// other unswitch opportunities. Otherwise, we need to mark it as deleted.
if (CurrentLoopValid) {
if (PartiallyInvariant) {
// Mark the new loop as partially unswitched, to avoid unswitching on
// the same condition again.
auto &Context = L.getHeader()->getContext();
MDNode *DisableUnswitchMD = MDNode::get(
Context,
MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
MDNode *NewLoopID = makePostTransformationMetadata(
Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
{DisableUnswitchMD});
L.setLoopID(NewLoopID);
RecordLoopAsUnswitched(&L, "llvm.loop.unswitch.partial",
"llvm.loop.unswitch.partial.disable");
} else if (InjectedCondition) {
// Do the same for injection of invariant conditions.
auto &Context = L.getHeader()->getContext();
MDNode *DisableUnswitchMD = MDNode::get(
Context,
MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
MDNode *NewLoopID = makePostTransformationMetadata(
Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
{DisableUnswitchMD});
L.setLoopID(NewLoopID);
RecordLoopAsUnswitched(&L, "llvm.loop.unswitch.injection",
"llvm.loop.unswitch.injection.disable");
} else
U.revisitCurrentLoop();
} else
Expand Down Expand Up @@ -2809,9 +2811,9 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
}

/// Cost multiplier is a way to limit potentially exponential behavior
/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
/// candidates available. Also accounting for the number of "sibling" loops with
/// the idea to account for previous unswitches that already happened on this
/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
/// candidates available. Also consider the number of "sibling" loops with
/// the idea of accounting for previous unswitches that already happened on this
/// cluster of loops. There was an attempt to keep this formula simple,
/// just enough to limit the worst case behavior. Even if it is not that simple
/// now it is still not an attempt to provide a detailed heuristic size
Expand Down Expand Up @@ -3507,8 +3509,9 @@ static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
IVConditionInfo PartialIVInfo;
Instruction *PartialIVCondBranch = nullptr;
collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
PartialIVCondBranch, L, LI, AA, MSSAU);
if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.nontrivial.disable"))
collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
PartialIVCondBranch, L, LI, AA, MSSAU);
if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
PartialIVCondBranch, L, DT, LI, AA,
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define i32 @foo(i1 %arg, ptr %arg1) {
; CHECK: [[BB1]]:
; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi ptr [ [[ARG1]], %[[BB0]] ]
; CHECK-NEXT: [[I3_US:%.*]] = call i32 [[UNSWITCHED_SELECT_US]]()
; CHECK-NEXT: br i1 true, label %[[LOOP_US]], label %[[RET_SPLIT_US:.*]]
; CHECK-NEXT: br i1 true, label %[[LOOP_US]], label %[[RET_SPLIT_US:.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[RET_SPLIT_US]]:
; CHECK-NEXT: [[I3_LCSSA_US:%.*]] = phi i32 [ [[I3_US]], %[[BB1]] ]
; CHECK-NEXT: br label %[[RET:.*]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ define dso_local noundef i32 @_Z33block_scaling_decompr_8bitjPK27compressed_data
; CHECK-NEXT: [[DST_ADDR_1]] = getelementptr inbounds nuw i8, ptr [[DST_ADDR_052]], i64 48
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT58]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP4]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
Expand Down Expand Up @@ -801,6 +801,8 @@ attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
!4 = distinct !{!4, !5}
!5 = !{!"llvm.loop.mustprogress"}
;.
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
; CHECK: [[META5]] = !{!"llvm.loop.mustprogress"}
; CHECK: [[META6]] = !{!"llvm.loop.unswitch.nontrivial.disable"}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=false \
; RUN: -passes='loop-mssa(licm,simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \
; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP32
; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP6
;
; Single loop nest, not unswitched
; LOOP1: Loop at depth 1 containing:
Expand All @@ -55,23 +55,23 @@
;
; Half unswitched loop nests, with unscaled4 and div1 it gets less depth1 loops unswitched
; since they have more cost.
; LOOP-UNSCALE4-DIV1-COUNT-6: Loop at depth 1 containing:
; LOOP-UNSCALE4-DIV1-COUNT-19: Loop at depth 2 containing:
; LOOP-UNSCALE4-DIV1-COUNT-29: Loop at depth 3 containing:
; LOOP-UNSCALE4-DIV1-COUNT-4: Loop at depth 1 containing:
; LOOP-UNSCALE4-DIV1-COUNT-4: Loop at depth 2 containing:
; LOOP-UNSCALE4-DIV1-COUNT-4: Loop at depth 3 containing:
; LOOP-UNSCALE4-DIV1-NOT: Loop at depth {{[0-9]+}} containing:
;
; Half unswitched loop nests, with unscaled4 and div2 it gets more depth1 loops unswitched
; as div2 kicks in.
; LOOP-UNSCALE4-DIV2-COUNT-11: Loop at depth 1 containing:
; LOOP-UNSCALE4-DIV2-COUNT-22: Loop at depth 2 containing:
; LOOP-UNSCALE4-DIV2-COUNT-29: Loop at depth 3 containing:
; LOOP-UNSCALE4-DIV2-COUNT-4: Loop at depth 1 containing:
; LOOP-UNSCALE4-DIV2-COUNT-4: Loop at depth 2 containing:
; LOOP-UNSCALE4-DIV2-COUNT-4: Loop at depth 3 containing:
; LOOP-UNSCALE4-DIV2-NOT: Loop at depth {{[0-9]+}} containing:
;
; 32 loop nests, fully unswitched
; LOOP32-COUNT-32: Loop at depth 1 containing:
; LOOP32-COUNT-32: Loop at depth 2 containing:
; LOOP32-COUNT-32: Loop at depth 3 containing:
; LOOP32-NOT: Loop at depth {{[0-9]+}} containing:
; 6 loop nests, fully unswitched
; LOOP6-COUNT-6: Loop at depth 1 containing:
; LOOP6-COUNT-6: Loop at depth 2 containing:
; LOOP6-COUNT-6: Loop at depth 3 containing:
; LOOP6-NOT: Loop at depth {{[0-9]+}} containing:

declare void @bar()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
;
; Half unswitched loop nests, with unscaled3 and div1 it gets less depth1 loops unswitched
; since they have more cost.
; LOOP-UNSCALE3-DIV1-COUNT-4: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV1-COUNT-2: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV1-NOT: Loop at depth 2 containing:
Expand All @@ -69,15 +69,15 @@
;
; Half unswitched loop nests, with unscaled3 and div2 it gets more depth1 loops unswitched
; as div2 kicks in.
; LOOP-UNSCALE3-DIV2-COUNT-6: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV2-COUNT-2: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 1 containing:
; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 2 containing:
; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 3 containing:
; LOOP-UNSCALE3-DIV2-NOT: Loop at depth 3 containing:
;
; Maximally unswitched (copy of the outer loop per each condition)
; LOOP-MAX-COUNT-6: Loop at depth 1 containing:
; LOOP-MAX-COUNT-2: Loop at depth 1 containing:
; LOOP-MAX-NOT: Loop at depth 1 containing:
; LOOP-MAX-COUNT-1: Loop at depth 2 containing:
; LOOP-MAX-NOT: Loop at depth 2 containing:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,46 +25,37 @@
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
; RUN: -passes='loop(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP5
; RUN: -passes='loop(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP4
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
; RUN: -passes='loop-mssa(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP5
;
; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed
; siblings multiplier for top-level loops (toplevel-div == 8) we should get
; 2^(num conds) == 2^5 == 32
; copies of the loop:
; RUN: -passes='loop-mssa(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP4
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
; RUN: -passes='loop(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
; RUN: -passes='loop(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP6
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=true \
; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
; RUN: -passes='loop-mssa(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
;
; Similarly get
; 2^(num conds) == 2^5 == 32
; copies of the loop when cost multiplier is disabled:
; RUN: -passes='loop-mssa(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP6
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=false \
; RUN: -passes='loop(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
; RUN: -passes='loop(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP6
;
; RUN: opt < %s -enable-unswitch-cost-multiplier=false \
; RUN: -passes='loop-mssa(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
; RUN: -passes='loop-mssa(simple-loop-unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP6
;
; Single loop, not unswitched
; LOOP1: Loop at depth 1 containing:
; LOOP1-NOT: Loop at depth 1 containing:

; 5 loops, unswitched 4 times
; LOOP5-COUNT-5: Loop at depth 1 containing:
; LOOP5-NOT: Loop at depth 1 containing:
; 4 loops, unswitched 4 times
; LOOP4-COUNT-4: Loop at depth 1 containing:
; LOOP4-NOT: Loop at depth 1 containing:

; 32 loops, fully unswitched
; LOOP32-COUNT-32: Loop at depth 1 containing:
; LOOP32-NOT: Loop at depth 1 containing:
; 6 loops, fully unswitched
; LOOP6-COUNT-6: Loop at depth 1 containing:
; LOOP6-NOT: Loop at depth 1 containing:

define void @loop_simple5(ptr %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,19 @@
; Somewhat relaxed restrictions on candidates:
; LOOP-RELAX-COUNT-5: Loop at depth 1 containing:
; LOOP-RELAX-NOT: Loop at depth 1 containing:
; LOOP-RELAX-COUNT-32: Loop at depth 2 containing:
; LOOP-RELAX-COUNT-5: Loop at depth 2 containing:
; LOOP-RELAX-NOT: Loop at depth 2 containing:
;
; Even more relaxed restrictions on candidates and siblings.
; LOOP-RELAX2-COUNT-11: Loop at depth 1 containing:
; LOOP-RELAX2-COUNT-5: Loop at depth 1 containing:
; LOOP-RELAX2-NOT: Loop at depth 1 containing:
; LOOP-RELAX2-COUNT-40: Loop at depth 2 containing:
; LOOP-RELAX2-COUNT-5: Loop at depth 2 containing:
; LOOP-RELAX-NOT: Loop at depth 2 containing:
;
; Unswitched as much as it could (with multiplier disabled).
; LOOP-MAX-COUNT-56: Loop at depth 1 containing:
; LOOP-MAX-COUNT-6: Loop at depth 1 containing:
; LOOP-MAX-NOT: Loop at depth 1 containing:
; LOOP-MAX-COUNT-111: Loop at depth 2 containing:
; LOOP-MAX-COUNT-11: Loop at depth 2 containing:
; LOOP-MAX-NOT: Loop at depth 2 containing:

define i32 @loop_switch(ptr %addr, i32 %c1, i32 %c2) {
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,25 @@ exit:
}

define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
; CHECK-LABEL: @test_two_guards(
; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
; CHECK: entry.split.us:
; CHECK-NEXT: br i1 [[COND2:%.*]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]]
; CHECK: entry.split.us.split.us:
; CHECK-NEXT: br label [[LOOP_US_US:%.*]]
; CHECK: loop.us.us:
; CHECK-NEXT: [[IV_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[IV_NEXT_US_US:%.*]], [[GUARDED_US2:%.*]] ]
; CHECK-NEXT: br label [[GUARDED_US_US:%.*]]
; CHECK: guarded.us.us:
; CHECK-NEXT: br label [[GUARDED_US2]]
; CHECK: guarded.us2:
; CHECK-NEXT: [[IV_NEXT_US_US]] = add i32 [[IV_US_US]], 1
; CHECK-NEXT: [[LOOP_COND_US_US:%.*]] = icmp slt i32 [[IV_NEXT_US_US]], [[N:%.*]]
; CHECK-NEXT: br i1 [[LOOP_COND_US_US]], label [[LOOP_US_US]], label [[EXIT_SPLIT_US_SPLIT_US:%.*]]
; CHECK: deopt1:
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
; CHECK-NEXT: unreachable
; CHECK-NEXT: br label %loop.us
; CHECK: loop.us:
; CHECK-NEXT: %iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us, %guarded.us ]
; CHECK-NEXT: br label %guarded.us
; CHECK: guarded.us:
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
; CHECK-NEXT: %iv.next.us = add i32 %iv.us, 1
; CHECK-NEXT: %loop.cond.us = icmp slt i32 %iv.next.us, %N
; CHECK-NEXT: br i1 %loop.cond.us, label %loop.us, label %exit.split.us, !llvm.loop !2
; CHECK: exit.split.us:
; CHECK-NEXT: br label %exit
; CHECK: entry.split:
; CHECK-NEXT: br label %loop
; CHECK: loop:
; CHECK-NEXT: br label %deopt
; CHECK: deopt:
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
; CHECK-NEXT: unreachable
Expand Down
Loading
Loading