Skip to content

Commit 9d5adc7

Browse files
committed
Revert "reland e5581df [SimplifyCFG] accumulate bonus insts cost"
This reverts commit bd7949b. Revert this patch since reviwers have different opinions regarding the approach in post-commit review. Will open RFC for further discussion. Differential Revision: https://reviews.llvm.org/D132408
1 parent b3c9ced commit 9d5adc7

File tree

9 files changed

+40
-105
lines changed

9 files changed

+40
-105
lines changed

llvm/include/llvm/Transforms/Utils/Local.h

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
#include "llvm/ADT/ArrayRef.h"
1818
#include "llvm/IR/Dominators.h"
19-
#include "llvm/IR/ValueMap.h"
2019
#include "llvm/Support/CommandLine.h"
2120
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
2221
#include <cstdint>
@@ -165,26 +164,6 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
165164
/// values, but instcombine orders them so it usually won't matter.
166165
bool EliminateDuplicatePHINodes(BasicBlock *BB);
167166

168-
/// Class to track cost of simplify CFG transformations.
169-
class SimplifyCFGCostTracker {
170-
/// Number of bonus instructions due to folding branches into predecessors.
171-
/// E.g. folding
172-
/// if (cond1) return false;
173-
/// if (cond2) return false;
174-
/// return true;
175-
/// into
176-
/// if (cond1 | cond2) return false;
177-
/// return true;
178-
/// In this case cond2 is always executed whereas originally it may be
179-
/// evicted due to early exit of cond1. 'cond2' is called bonus instructions
180-
/// and such bonus instructions could accumulate for unrolled loops, therefore
181-
/// use a value map to accumulate their costs across transformations.
182-
ValueMap<BasicBlock *, unsigned> NumBonusInsts;
183-
184-
public:
185-
void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount);
186-
unsigned getNumBonusInsts(BasicBlock *Parent);
187-
};
188167
/// This function is used to do simplification of a CFG. For example, it
189168
/// adjusts branches to branches to eliminate the extra hop, it eliminates
190169
/// unreachable basic blocks, and does other peephole optimization of the CFG.
@@ -195,8 +174,7 @@ extern cl::opt<bool> RequireAndPreserveDomTree;
195174
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
196175
DomTreeUpdater *DTU = nullptr,
197176
const SimplifyCFGOptions &Options = {},
198-
ArrayRef<WeakVH> LoopHeaders = {},
199-
SimplifyCFGCostTracker *CostTracker = nullptr);
177+
ArrayRef<WeakVH> LoopHeaders = {});
200178

201179
/// This function is used to flatten a CFG. For example, it uses parallel-and
202180
/// and parallel-or mode to collapse if-conditions and merge if-regions with
@@ -206,8 +184,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
206184
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
207185
/// branches to us and one of our successors, fold the setcc into the
208186
/// predecessor and use logical operations to pick the right destination.
209-
bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker,
210-
DomTreeUpdater *DTU = nullptr,
187+
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr,
211188
MemorySSAUpdater *MSSAU = nullptr,
212189
const TargetTransformInfo *TTI = nullptr,
213190
unsigned BonusInstThreshold = 1);

llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,7 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F,
221221
/// iterating until no more changes are made.
222222
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
223223
DomTreeUpdater *DTU,
224-
const SimplifyCFGOptions &Options,
225-
SimplifyCFGCostTracker &CostTracker) {
224+
const SimplifyCFGOptions &Options) {
226225
bool Changed = false;
227226
bool LocalChange = true;
228227

@@ -253,7 +252,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
253252
while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
254253
++BBIt;
255254
}
256-
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) {
255+
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
257256
LocalChange = true;
258257
++NumSimpl;
259258
}
@@ -267,13 +266,11 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
267266
DominatorTree *DT,
268267
const SimplifyCFGOptions &Options) {
269268
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
270-
SimplifyCFGCostTracker CostTracker;
271269

272270
bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
273271
EverChanged |=
274272
tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr);
275-
EverChanged |=
276-
iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker);
273+
EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
277274

278275
// If neither pass changed anything, we're done.
279276
if (!EverChanged) return false;
@@ -287,8 +284,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
287284
return true;
288285

289286
do {
290-
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options,
291-
CostTracker);
287+
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
292288
EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
293289
} while (EverChanged);
294290

llvm/lib/Transforms/Utils/LoopSimplify.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,6 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
480480
DominatorTree *DT, LoopInfo *LI,
481481
ScalarEvolution *SE, AssumptionCache *AC,
482482
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
483-
SimplifyCFGCostTracker CostTracker;
484483
bool Changed = false;
485484
if (MSSAU && VerifyMemorySSA)
486485
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -662,7 +661,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
662661
// The block has now been cleared of all instructions except for
663662
// a comparison and a conditional branch. SimplifyCFG may be able
664663
// to fold it now.
665-
if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU))
664+
if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
666665
continue;
667666

668667
// Success. The block is now dead, so remove it from the loop,

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 18 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -207,21 +207,6 @@ STATISTIC(NumInvokes,
207207
STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
208208
STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
209209

210-
namespace llvm {
211-
212-
void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB,
213-
unsigned InstCount) {
214-
auto Loc = NumBonusInsts.find(BB);
215-
if (Loc == NumBonusInsts.end())
216-
Loc = NumBonusInsts.insert({BB, 0}).first;
217-
Loc->second = Loc->second + InstCount;
218-
}
219-
unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) {
220-
return NumBonusInsts.lookup(BB);
221-
}
222-
223-
} // namespace llvm
224-
225210
namespace {
226211

227212
// The first field contains the value that the switch produces when a certain
@@ -258,10 +243,6 @@ class SimplifyCFGOpt {
258243
ArrayRef<WeakVH> LoopHeaders;
259244
const SimplifyCFGOptions &Options;
260245
bool Resimplify;
261-
// Accumulates number of bonus instructions due to merging basic blocks
262-
// of common destination.
263-
SimplifyCFGCostTracker *CostTracker;
264-
std::unique_ptr<SimplifyCFGCostTracker> LocalCostTracker;
265246

266247
Value *isValueEqualityComparison(Instruction *TI);
267248
BasicBlock *GetValueEqualityComparisonCases(
@@ -305,15 +286,8 @@ class SimplifyCFGOpt {
305286
public:
306287
SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
307288
const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
308-
const SimplifyCFGOptions &Opts,
309-
SimplifyCFGCostTracker *CostTracker_)
289+
const SimplifyCFGOptions &Opts)
310290
: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
311-
// Cannot do this with member initializer list since LocalCostTracker is not
312-
// initialized there yet.
313-
CostTracker = CostTracker_
314-
? CostTracker_
315-
: (LocalCostTracker.reset(new SimplifyCFGCostTracker()),
316-
LocalCostTracker.get());
317291
assert((!DTU || !DTU->hasPostDomTree()) &&
318292
"SimplifyCFG is not yet capable of maintaining validity of a "
319293
"PostDomTree, so don't ask for it.");
@@ -3650,9 +3624,8 @@ static bool isVectorOp(Instruction &I) {
36503624
/// If this basic block is simple enough, and if a predecessor branches to us
36513625
/// and one of our successors, fold the block into the predecessor and use
36523626
/// logical operations to pick the right destination.
3653-
bool llvm::FoldBranchToCommonDest(BranchInst *BI,
3654-
SimplifyCFGCostTracker &CostTracker,
3655-
DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU,
3627+
bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
3628+
MemorySSAUpdater *MSSAU,
36563629
const TargetTransformInfo *TTI,
36573630
unsigned BonusInstThreshold) {
36583631
// If this block ends with an unconditional branch,
@@ -3724,6 +3697,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
37243697
// as "bonus instructions", and only allow this transformation when the
37253698
// number of the bonus instructions we'll need to create when cloning into
37263699
// each predecessor does not exceed a certain threshold.
3700+
unsigned NumBonusInsts = 0;
37273701
bool SawVectorOp = false;
37283702
const unsigned PredCount = Preds.size();
37293703
for (Instruction &I : *BB) {
@@ -3742,13 +3716,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
37423716
// predecessor. Ignore free instructions.
37433717
if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
37443718
TargetTransformInfo::TCC_Free) {
3745-
for (auto PredBB : Preds) {
3746-
CostTracker.updateNumBonusInsts(PredBB, PredCount);
3747-
// Early exits once we reach the limit.
3748-
if (CostTracker.getNumBonusInsts(PredBB) >
3749-
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3750-
return false;
3751-
}
3719+
NumBonusInsts += PredCount;
3720+
3721+
// Early exits once we reach the limit.
3722+
if (NumBonusInsts >
3723+
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3724+
return false;
37523725
}
37533726

37543727
auto IsBCSSAUse = [BB, &I](Use &U) {
@@ -3762,12 +3735,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
37623735
if (!all_of(I.uses(), IsBCSSAUse))
37633736
return false;
37643737
}
3765-
for (auto PredBB : Preds) {
3766-
if (CostTracker.getNumBonusInsts(PredBB) >
3767-
BonusInstThreshold *
3768-
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3769-
return false;
3770-
}
3738+
if (NumBonusInsts >
3739+
BonusInstThreshold *
3740+
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3741+
return false;
37713742

37723743
// Ok, we have the budget. Perform the transformation.
37733744
for (BasicBlock *PredBlock : Preds) {
@@ -6918,7 +6889,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
69186889
// branches to us and our successor, fold the comparison into the
69196890
// predecessor and use logical operations to update the incoming value
69206891
// for PHI nodes in common successor.
6921-
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
6892+
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
69226893
Options.BonusInstThreshold))
69236894
return requestResimplify();
69246895
return false;
@@ -6987,7 +6958,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
69876958
// If this basic block is ONLY a compare and a branch, and if a predecessor
69886959
// branches to us and one of our successors, fold the comparison into the
69896960
// predecessor and use logical operations to pick the right destination.
6990-
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
6961+
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
69916962
Options.BonusInstThreshold))
69926963
return requestResimplify();
69936964

@@ -7286,9 +7257,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
72867257

72877258
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
72887259
DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
7289-
ArrayRef<WeakVH> LoopHeaders,
7290-
SimplifyCFGCostTracker *CostTracker) {
7260+
ArrayRef<WeakVH> LoopHeaders) {
72917261
return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7292-
Options, CostTracker)
7262+
Options)
72937263
.run(BB);
72947264
}

llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg<bonus-inst-threshold=3>,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
2+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
33

44
define void @basic(i32 %K, i32 %N) {
55
; CHECK-LABEL: @basic(

llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s
2+
; RUN: opt -O2 -S < %s | FileCheck %s
33

44
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
55
target triple = "x86_64--"

llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,9 @@
33

44
%struct.S = type { [4 x i32] }
55

6-
; Check the second basic block is folded into the first basic block
7-
; since it has one bonus intruction. The third basic block is not
8-
; folded into the first basic block since the accumulated bonus
9-
; instructions will exceed the default threshold of 1. The fourth basic
10-
; block is foled into the third basic block since the accumulated
11-
; bonus instruction cost is 1.
6+
; Check the second, third, and fourth basic blocks are folded into
7+
; the first basic block since each has one bonus intruction, which
8+
; does not exceed the default bouns instruction threshold of 1.
129

1310
define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
1411
; CHECK-LABEL: @test1(
@@ -18,18 +15,14 @@ define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
1815
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]]
1916
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0
2017
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]]
21-
; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]]
22-
; CHECK: for.cond.1:
2318
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]]
2419
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0
20+
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]]
2521
; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]]
2622
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0
27-
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]]
28-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true
29-
; CHECK-NEXT: br label [[CLEANUP]]
30-
; CHECK: cleanup:
31-
; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ]
32-
; CHECK-NEXT: ret i1 [[CMP]]
23+
; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]]
24+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true
25+
; CHECK-NEXT: ret i1 [[SPEC_SELECT]]
3326
;
3427
entry:
3528
%mul0 = mul i32 %0, %0

llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL
2-
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE
3-
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE
2+
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
3+
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
44
; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL
5-
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=3>' -S | FileCheck %s --check-prefix=AGGRESSIVE
6-
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=6>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
5+
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=2>' -S | FileCheck %s --check-prefix=AGGRESSIVE
6+
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=4>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
77

88
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
99
; NORMAL-LABEL: @foo(

llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s
3-
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s
3+
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s
44

55
declare void @sideeffect0()
66
declare void @sideeffect1()
@@ -10,7 +10,7 @@ declare i1 @gen1()
1010

1111
; Here we'd want to duplicate %v3_adj into two predecessors,
1212
; but -bonus-inst-threshold=1 says that we can only clone it into one.
13-
; With -bonus-inst-threshold=3 we can clone it into both though.
13+
; With -bonus-inst-threshold=2 we can clone it into both though.
1414
define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) {
1515
; THR1-LABEL: @two_preds_with_extra_op(
1616
; THR1-NEXT: entry:

0 commit comments

Comments
 (0)