Skip to content

Commit fe8f098

Browse files
committed
[SimplifyCFG] Speculatively execute empty BB with multiple predecessors
1 parent 003eee9 commit fe8f098

File tree

12 files changed

+248
-233
lines changed

12 files changed

+248
-233
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3506,6 +3506,96 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
35063506
return true;
35073507
}
35083508

3509+
/// Speculate a conditional basic block flattening the CFG.
3510+
/// Compared to speculativelyExecuteBB, it allows \p ThenBB to have multiple
3511+
/// predecessors other than the current BB. An illustration of this transform is
3512+
/// turning this IR:
3513+
/// \code
3514+
/// BB:
3515+
/// %cmp = icmp ult %x, %y
3516+
/// br i1 %cmp, label %EndBB, label %ThenBB
3517+
/// ThenBB:
3518+
/// br label BB2
3519+
/// EndBB:
3520+
/// %phi = phi i1 [ true, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
3521+
/// ...
3522+
/// \endcode
3523+
///
3524+
/// Into this IR:
3525+
/// \code
3526+
/// BB:
3527+
/// %cmp = icmp ult %x, %y
3528+
/// %sel = select i1 %cmp, i1 true, i1 false
3529+
/// br label %EndBB
3530+
/// ThenBB:
3531+
/// br label BB2
3532+
/// EndBB:
3533+
/// %phi = phi i1 [ %sel, %ThenBB ], [ false, %BB ], [ false, %OtherBB ]
3534+
/// ...
3535+
/// \endcode
3536+
/// \returns true if the branch edge is removed.
3537+
static bool speculativelyExecuteEmptyBB(BranchInst *BI, bool Invert,
3538+
DomTreeUpdater *DTU,
3539+
const TargetTransformInfo &TTI) {
3540+
BasicBlock *BB = BI->getParent();
3541+
BasicBlock *ThenBB = BI->getSuccessor(Invert);
3542+
BasicBlock *EndBB = BI->getSuccessor(!Invert);
3543+
3544+
BranchInst *SuccBI = dyn_cast<BranchInst>(ThenBB->getTerminator());
3545+
if (!SuccBI || !SuccBI->isUnconditional() || SuccBI->getSuccessor(0) != EndBB)
3546+
return false;
3547+
if (&ThenBB->front() != SuccBI)
3548+
return false;
3549+
if (!isProfitableToSpeculate(BI, Invert, TTI))
3550+
return false;
3551+
3552+
InstructionCost Budget =
3553+
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3554+
InstructionCost Cost = 0;
3555+
unsigned SpeculatedInstructions = 0;
3556+
if (!validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions,
3557+
Cost, TTI) ||
3558+
Cost > Budget)
3559+
return false;
3560+
3561+
LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3562+
3563+
// Insert selects and rewrite the PHI operands.
3564+
Value *BrCond = BI->getCondition();
3565+
IRBuilder<NoFolder> Builder(BI);
3566+
for (PHINode &PN : EndBB->phis()) {
3567+
unsigned OrigI = PN.getBasicBlockIndex(BB);
3568+
unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3569+
Value *OrigV = PN.getIncomingValue(OrigI);
3570+
Value *ThenV = PN.getIncomingValue(ThenI);
3571+
3572+
// Skip PHIs which are trivial.
3573+
if (OrigV == ThenV)
3574+
continue;
3575+
3576+
// Create a select whose true value is the speculatively executed value and
3577+
// false value is the pre-existing value. Swap them if the branch
3578+
// destinations were inverted.
3579+
Value *TrueV = ThenV, *FalseV = OrigV;
3580+
if (Invert)
3581+
std::swap(TrueV, FalseV);
3582+
Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3583+
PN.setIncomingValue(OrigI, V);
3584+
}
3585+
3586+
// Modify CFG
3587+
ThenBB->removePredecessor(BB);
3588+
BranchInst *NewBI = Builder.CreateBr(EndBB);
3589+
// Transfer the metadata to the new branch instruction.
3590+
NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
3591+
LLVMContext::MD_annotation});
3592+
BI->eraseFromParent();
3593+
if (DTU)
3594+
DTU->applyUpdates({{DominatorTree::Delete, BB, ThenBB}});
3595+
3596+
return true;
3597+
}
3598+
35093599
/// Return true if we can thread a branch across this block.
35103600
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
35113601
int Size = 0;
@@ -8125,6 +8215,13 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
81258215
return requestResimplify();
81268216
}
81278217

8218+
if (Options.SpeculateBlocks) {
8219+
if (speculativelyExecuteEmptyBB(BI, /*Invert=*/false, DTU, TTI))
8220+
return true;
8221+
if (speculativelyExecuteEmptyBB(BI, /*Invert=*/true, DTU, TTI))
8222+
return true;
8223+
}
8224+
81288225
// If this is a branch on something for which we know the constant value in
81298226
// predecessors (e.g. a phi node in the current block), thread control
81308227
// through this block.

llvm/test/CodeGen/AArch64/and-sink.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,14 @@
1111
define dso_local i32 @and_sink1(i32 %a, i1 %c) {
1212
; CHECK-LABEL: and_sink1:
1313
; CHECK: // %bb.0:
14-
; CHECK-NEXT: tbz w1, #0, .LBB0_3
14+
; CHECK-NEXT: tbz w1, #0, .LBB0_2
1515
; CHECK-NEXT: // %bb.1: // %bb0
16+
; CHECK-NEXT: tst w0, #0x4
1617
; CHECK-NEXT: adrp x8, A
18+
; CHECK-NEXT: cset w0, eq
1719
; CHECK-NEXT: str wzr, [x8, :lo12:A]
18-
; CHECK-NEXT: tbnz w0, #2, .LBB0_3
19-
; CHECK-NEXT: // %bb.2:
20-
; CHECK-NEXT: mov w0, #1 // =0x1
2120
; CHECK-NEXT: ret
22-
; CHECK-NEXT: .LBB0_3: // %bb2
21+
; CHECK-NEXT: .LBB0_2:
2322
; CHECK-NEXT: mov w0, wzr
2423
; CHECK-NEXT: ret
2524

llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
2+
; RUN: llc < %s -mtriple=aarch64 -phi-node-folding-threshold=0 | FileCheck %s
33

44
; When consuming profile data we sometimes flip a branch to improve runtime
55
; performance. If we are optimizing for size, we avoid changing the branch to

0 commit comments

Comments
 (0)