Skip to content

Commit 5354b0a

Browse files
caojoshuavchuravy
authored andcommitted
[SimpleLoopUnswitch] unswitch selects
The old LoopUnswitch pass unswitched selects, but the changes were never ported to the new SimpleLoopUnswitch. We unswitch by turning: ``` S = select %cond, %a, %b ``` into: ``` head: br %cond, label %then, label %tail then: br label %tail tail: S = phi [ %a, %then ], [ %b, %head ] ``` Unswitch selects are always nontrivial, since the successors do not exit the loop and the loop body always needs to be cloned. Differential Revision: https://reviews.llvm.org/D138526
1 parent 6783826 commit 5354b0a

File tree

3 files changed

+115
-28
lines changed

3 files changed

+115
-28
lines changed

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 82 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ using namespace llvm::PatternMatch;
7070

7171
STATISTIC(NumBranches, "Number of branches unswitched");
7272
STATISTIC(NumSwitches, "Number of switches unswitched");
73+
STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
7374
STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
7475
STATISTIC(NumTrivial, "Number of unswitches that are trivial");
7576
STATISTIC(
@@ -2552,6 +2553,59 @@ static InstructionCost computeDomSubtreeCost(
25522553
return Cost;
25532554
}
25542555

2556+
/// Turns a select instruction into implicit control flow branch,
2557+
/// making the following replacement:
2558+
///
2559+
/// head:
2560+
/// --code before select--
2561+
/// select %cond, %trueval, %falseval
2562+
/// --code after select--
2563+
///
2564+
/// into
2565+
///
2566+
/// head:
2567+
/// --code before select--
2568+
/// br i1 %cond, label %then, label %tail
2569+
///
2570+
/// then:
2571+
/// br %tail
2572+
///
2573+
/// tail:
2574+
/// phi [ %trueval, %then ], [ %falseval, %head]
2575+
/// unreachable
2576+
///
2577+
/// It also makes all relevant DT and LI updates, so that all structures are in
2578+
/// valid state after this transform.
2579+
static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
2580+
LoopInfo &LI, MemorySSAUpdater *MSSAU,
2581+
AssumptionCache *AC) {
2582+
LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2583+
BasicBlock *HeadBB = SI->getParent();
2584+
2585+
Value *Cond = SI->getCondition();
2586+
if (!isGuaranteedNotToBeUndefOrPoison(Cond, AC, SI, &DT))
2587+
Cond = new FreezeInst(Cond, Cond->getName() + ".fr", SI);
2588+
SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2589+
SI->getMetadata(LLVMContext::MD_prof), &DT, &LI);
2590+
auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2591+
BasicBlock *ThenBB = CondBr->getSuccessor(0),
2592+
*TailBB = CondBr->getSuccessor(1);
2593+
if (MSSAU)
2594+
MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2595+
2596+
PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
2597+
Phi->addIncoming(SI->getTrueValue(), ThenBB);
2598+
Phi->addIncoming(SI->getFalseValue(), HeadBB);
2599+
SI->replaceAllUsesWith(Phi);
2600+
SI->eraseFromParent();
2601+
2602+
if (MSSAU && VerifyMemorySSA)
2603+
MSSAU->getMemorySSA()->verifyMemorySSA();
2604+
2605+
++NumSelects;
2606+
return CondBr;
2607+
}
2608+
25552609
/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
25562610
/// making the following replacement:
25572611
///
@@ -2663,9 +2717,10 @@ static int CalculateUnswitchCostMultiplier(
26632717
BasicBlock *CondBlock = TI.getParent();
26642718
if (DT.dominates(CondBlock, Latch) &&
26652719
(isGuard(&TI) ||
2666-
llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2667-
return L.contains(SuccBB);
2668-
}) <= 1)) {
2720+
(TI.isTerminator() &&
2721+
llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2722+
return L.contains(SuccBB);
2723+
}) <= 1))) {
26692724
NumCostMultiplierSkipped++;
26702725
return 1;
26712726
}
@@ -2674,12 +2729,17 @@ static int CalculateUnswitchCostMultiplier(
26742729
int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
26752730
: std::distance(LI.begin(), LI.end()));
26762731
// Count amount of clones that all the candidates might cause during
2677-
// unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2732+
// unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2733+
// cases.
26782734
int UnswitchedClones = 0;
26792735
for (auto Candidate : UnswitchCandidates) {
26802736
Instruction *CI = Candidate.first;
26812737
BasicBlock *CondBlock = CI->getParent();
26822738
bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2739+
if (isa<SelectInst>(CI)) {
2740+
UnswitchedClones++;
2741+
continue;
2742+
}
26832743
if (isGuard(CI)) {
26842744
if (!SkipExitingSuccessors)
26852745
UnswitchedClones++;
@@ -2747,14 +2807,19 @@ static bool unswitchBestCondition(
27472807
if (LI.getLoopFor(BB) != &L)
27482808
continue;
27492809

2750-
if (CollectGuards)
2751-
for (auto &I : *BB)
2752-
if (isGuard(&I)) {
2753-
auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2754-
// TODO: Support AND, OR conditions and partial unswitching.
2755-
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2756-
UnswitchCandidates.push_back({&I, {Cond}});
2757-
}
2810+
for (auto &I : *BB) {
2811+
if (auto *SI = dyn_cast<SelectInst>(&I)) {
2812+
auto *Cond = SI->getCondition();
2813+
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2814+
UnswitchCandidates.push_back({&I, {Cond}});
2815+
} else if (CollectGuards && isGuard(&I)) {
2816+
auto *Cond =
2817+
skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2818+
// TODO: Support AND, OR conditions and partial unswitching.
2819+
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2820+
UnswitchCandidates.push_back({&I, {Cond}});
2821+
}
2822+
}
27582823

27592824
if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
27602825
// We can only consider fully loop-invariant switch conditions as we need
@@ -2953,7 +3018,8 @@ static bool unswitchBestCondition(
29533018
// loop. This is computing the new cost of unswitching a condition.
29543019
// Note that guards always have 2 unique successors that are implicit and
29553020
// will be materialized if we decide to unswitch it.
2956-
int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3021+
int SuccessorsCount =
3022+
isGuard(&TI) || isa<SelectInst>(TI) ? 2 : Visited.size();
29573023
assert(SuccessorsCount > 1 &&
29583024
"Cannot unswitch a condition without multiple distinct successors!");
29593025
return (LoopCost - Cost) * (SuccessorsCount - 1);
@@ -3004,7 +3070,9 @@ static bool unswitchBestCondition(
30043070
PartialIVInfo.InstToDuplicate.clear();
30053071

30063072
// If the best candidate is a guard, turn it into a branch.
3007-
if (isGuard(BestUnswitchTI))
3073+
if (auto *SI = dyn_cast<SelectInst>(BestUnswitchTI))
3074+
BestUnswitchTI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3075+
else if (isGuard(BestUnswitchTI))
30083076
BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
30093077
ExitBlocks, DT, LI, MSSAU);
30103078

llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2332,21 +2332,26 @@ exit:
23322332
define i32 @test_partial_unswitch_all_conds_guaranteed_non_poison(i1 noundef %c.1, i1 noundef %c.2) {
23332333
; CHECK-LABEL: @test_partial_unswitch_all_conds_guaranteed_non_poison(
23342334
; CHECK-NEXT: entry:
2335-
; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[C_1:%.*]], [[C_2:%.*]]
2336-
; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]]
2335+
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
23372336
; CHECK: entry.split.us:
23382337
; CHECK-NEXT: br label [[LOOP_US:%.*]]
23392338
; CHECK: loop.us:
2340-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @a()
2341-
; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]]
2339+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
2340+
; CHECK-NEXT: br label [[TMP1:%.*]]
2341+
; CHECK: 1:
2342+
; CHECK-NEXT: br label [[TMP2:%.*]]
2343+
; CHECK: 2:
2344+
; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i1 [ [[C_2:%.*]], [[TMP1]] ]
2345+
; CHECK-NEXT: br i1 [[UNSWITCHED_SELECT_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
23422346
; CHECK: exit.split.us:
23432347
; CHECK-NEXT: br label [[EXIT:%.*]]
23442348
; CHECK: entry.split:
23452349
; CHECK-NEXT: br label [[LOOP:%.*]]
23462350
; CHECK: loop:
2347-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
2348-
; CHECK-NEXT: [[SEL:%.*]] = select i1 true, i1 true, i1 false
2349-
; CHECK-NEXT: br i1 [[SEL]], label [[LOOP]], label [[EXIT_SPLIT:%.*]]
2351+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @a()
2352+
; CHECK-NEXT: br label [[TMP4:%.*]]
2353+
; CHECK: 4:
2354+
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT_SPLIT:%.*]]
23502355
; CHECK: exit.split:
23512356
; CHECK-NEXT: br label [[EXIT]]
23522357
; CHECK: exit:

llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-trivial-select.ll

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,31 @@ define i32 @unswitch_trivial_select_cmp_outside(i32 %x) {
9292
; CHECK: entry.split.us:
9393
; CHECK-NEXT: br label [[LOOP_US:%.*]]
9494
; CHECK: loop.us:
95-
; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[LOOP_US]] ]
96-
; CHECK-NEXT: br label [[LOOP_US]]
95+
; CHECK-NEXT: [[P_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 35, [[TMP1:%.*]] ]
96+
; CHECK-NEXT: [[C_FR_US:%.*]] = freeze i1 true
97+
; CHECK-NEXT: br label [[TMP0:%.*]]
98+
; CHECK: 0:
99+
; CHECK-NEXT: br label [[TMP1]]
100+
; CHECK: 1:
101+
; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi i1 [ true, [[TMP0]] ]
102+
; CHECK-NEXT: br i1 [[UNSWITCHED_SELECT_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
103+
; CHECK: exit.split.us:
104+
; CHECK-NEXT: [[LCSSA_US:%.*]] = phi i32 [ [[P_US]], [[TMP1]] ]
105+
; CHECK-NEXT: br label [[EXIT:%.*]]
97106
; CHECK: entry.split:
98107
; CHECK-NEXT: br label [[LOOP:%.*]]
99108
; CHECK: loop:
100-
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ]
101-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 false, i1 true, i1 false
102-
; CHECK-NEXT: br label [[EXIT:%.*]]
109+
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 35, [[TMP2:%.*]] ]
110+
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 false
111+
; CHECK-NEXT: br label [[TMP2]]
112+
; CHECK: 2:
113+
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT_SPLIT:%.*]]
114+
; CHECK: exit.split:
115+
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[TMP2]] ]
116+
; CHECK-NEXT: br label [[EXIT]]
103117
; CHECK: exit:
104-
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[P]], [[LOOP]] ]
105-
; CHECK-NEXT: ret i32 [[LCSSA]]
118+
; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[LCSSA]], [[EXIT_SPLIT]] ], [ [[LCSSA_US]], [[EXIT_SPLIT_US]] ]
119+
; CHECK-NEXT: ret i32 [[DOTUS_PHI]]
106120
;
107121
entry:
108122
%c = icmp ult i32 %x, 100

0 commit comments

Comments
 (0)