Skip to content

Commit 1de19a1

Browse files
committed
SimplifyCFG: fix a compile time problem with block merging
When merging many blocks to a single block (in the wrong order), instructions are getting moved over and over again. This is quadratic and can result in very long compile times for large functions. To fix this, always move the instruction to smaller block to the larger block. rdar://problem/56268570
1 parent 242bf15 commit 1de19a1

File tree

11 files changed

+154
-103
lines changed

11 files changed

+154
-103
lines changed

include/swift/SIL/SILBasicBlock.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ public llvm::ilist_node<SILBasicBlock>, public SILAllocated<SILBasicBlock> {
110110
InstList.splice(end(), Other->InstList);
111111
}
112112

113+
void spliceAtBegin(SILBasicBlock *Other) {
114+
InstList.splice(begin(), Other->InstList);
115+
}
116+
113117
bool empty() const { return InstList.empty(); }
114118
iterator begin() { return InstList.begin(); }
115119
iterator end() { return InstList.end(); }
@@ -202,6 +206,8 @@ public llvm::ilist_node<SILBasicBlock>, public SILAllocated<SILBasicBlock> {
202206

203207
void cloneArgumentList(SILBasicBlock *Other);
204208

209+
void moveArgumentList(SILBasicBlock *from);
210+
205211
/// Erase a specific argument from the arg list.
206212
void eraseArgument(int Index);
207213

lib/SIL/IR/SILBasicBlock.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ void SILBasicBlock::cloneArgumentList(SILBasicBlock *Other) {
142142
}
143143
}
144144

145+
void SILBasicBlock::moveArgumentList(SILBasicBlock *from) {
146+
ArgumentList = std::move(from->ArgumentList);
147+
for (SILArgument *arg : getArguments()) {
148+
arg->parentBlock = this;
149+
}
150+
}
151+
145152
SILFunctionArgument *
146153
SILBasicBlock::createFunctionArgument(SILType Ty, const ValueDecl *D,
147154
bool disableEntryBlockVerification) {

lib/SILOptimizer/Transforms/SimplifyCFG.cpp

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,22 @@ static bool isReachable(SILBasicBlock *Block) {
11531153
static llvm::cl::opt<bool> SimplifyUnconditionalBranches(
11541154
"simplify-cfg-simplify-unconditional-branches", llvm::cl::init(true));
11551155

1156+
/// Returns true if \p block has less instructions than \p other.
1157+
static bool hasLessInstructions(SILBasicBlock *block, SILBasicBlock *other) {
1158+
auto blockIter = block->begin();
1159+
auto blockEnd = block->end();
1160+
auto otherIter = other->begin();
1161+
auto otherEnd = other->end();
1162+
while (true) {
1163+
if (otherIter == otherEnd)
1164+
return false;
1165+
if (blockIter == blockEnd)
1166+
return true;
1167+
++blockIter;
1168+
++otherIter;
1169+
}
1170+
}
1171+
11561172
/// simplifyBranchBlock - Simplify a basic block that ends with an unconditional
11571173
/// branch.
11581174
bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
@@ -1201,29 +1217,48 @@ bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
12011217
}
12021218
}
12031219

1204-
// Zap BI and move all of the instructions from DestBB into this one.
12051220
BI->eraseFromParent();
1206-
BB->spliceAtEnd(DestBB);
12071221

1208-
// Revisit this block now that we've changed it and remove the DestBB.
1209-
addToWorklist(BB);
1222+
// Move instruction from the smaller block to the larger block.
1223+
// The order is essential because if many blocks are merged and this is done
1224+
// in the wrong order, we end up with quadratic complexity.
1225+
//
1226+
SILBasicBlock *remainingBlock = nullptr, *deletedBlock = nullptr;
1227+
if (BB != Fn.getEntryBlock() && hasLessInstructions(BB, DestBB)) {
1228+
while (!BB->pred_empty()) {
1229+
SILBasicBlock *pred = *BB->pred_begin();
1230+
replaceBranchTarget(pred->getTerminator(), BB, DestBB, true);
1231+
}
1232+
DestBB->spliceAtBegin(BB);
1233+
DestBB->dropAllArguments();
1234+
DestBB->moveArgumentList(BB);
1235+
remainingBlock = DestBB;
1236+
deletedBlock = BB;
1237+
} else {
1238+
BB->spliceAtEnd(DestBB);
1239+
remainingBlock = BB;
1240+
deletedBlock = DestBB;
1241+
}
1242+
1243+
// Revisit this block now that we've changed it.
1244+
addToWorklist(remainingBlock);
12101245

12111246
// This can also expose opportunities in the successors of
12121247
// the merged block.
1213-
for (auto &Succ : BB->getSuccessors())
1248+
for (auto &Succ : remainingBlock->getSuccessors())
12141249
addToWorklist(Succ);
12151250

1216-
if (LoopHeaders.count(DestBB))
1217-
LoopHeaders.insert(BB);
1251+
if (LoopHeaders.count(deletedBlock))
1252+
LoopHeaders.insert(remainingBlock);
12181253

1219-
auto Iter = JumpThreadingCost.find(DestBB);
1254+
auto Iter = JumpThreadingCost.find(deletedBlock);
12201255
if (Iter != JumpThreadingCost.end()) {
12211256
int costs = Iter->second;
1222-
JumpThreadingCost[BB] += costs;
1257+
JumpThreadingCost[remainingBlock] += costs;
12231258
}
12241259

1225-
removeFromWorklist(DestBB);
1226-
DestBB->eraseFromParent();
1260+
removeFromWorklist(deletedBlock);
1261+
deletedBlock->eraseFromParent();
12271262
++NumBlocksMerged;
12281263
return true;
12291264
}
@@ -2544,7 +2579,10 @@ bool SimplifyCFG::simplifyBlocks() {
25442579

25452580
switch (TI->getTermKind()) {
25462581
case TermKind::BranchInst:
2547-
Changed |= simplifyBranchBlock(cast<BranchInst>(TI));
2582+
if (simplifyBranchBlock(cast<BranchInst>(TI))) {
2583+
Changed = true;
2584+
continue;
2585+
}
25482586
break;
25492587
case TermKind::CondBranchInst:
25502588
Changed |= simplifyCondBrBlock(cast<CondBranchInst>(TI));

test/SILOptimizer/OSLogFullOptTest.swift

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ func testSimpleInterpolation() {
2121
// CHECK: tail call swiftcc i1 @"${{.*}}isLoggingEnabled{{.*}}"()
2222
// CHECK-NEXT: br i1 {{%.*}}, label %[[ENABLED:[0-9]+]], label %[[NOT_ENABLED:[0-9]+]]
2323

24+
// CHECK: [[NOT_ENABLED]]:
25+
// CHECK-NEXT: tail call void @swift_release
26+
// CHECK-NEXT: ret void
27+
2428
// CHECK: [[ENABLED]]:
2529
//
2630
// Header bytes.
@@ -47,10 +51,6 @@ func testSimpleInterpolation() {
4751
// CHECK-32-NEXT: tail call swiftcc void @"${{.*}}_os_log_impl_test{{.*}}"({{.*}}, {{.*}}, {{.*}}, {{.*}}, i8* getelementptr inbounds ([27 x i8], [27 x i8]* @{{.*}}, i32 0, i32 0), i8* {{(nonnull )?}}[[BUFFER]], i32 8)
4852
// CHECK-NEXT: tail call void @swift_slowDealloc(i8* {{(nonnull )?}}[[BUFFER]]
4953
// CHECK-NEXT: br label %[[NOT_ENABLED]]
50-
51-
// CHECK: [[NOT_ENABLED]]:
52-
// CHECK-NEXT: tail call void @swift_release
53-
// CHECK-NEXT: ret void
5454
}
5555

5656
// CHECK-LABEL: define hidden swiftcc void @"${{.*}}testInterpolationWithMultipleArguments
@@ -68,6 +68,10 @@ func testInterpolationWithMultipleArguments() {
6868
// CHECK: tail call swiftcc i1 @"${{.*}}isLoggingEnabled{{.*}}"()
6969
// CHECK-NEXT: br i1 {{%.*}}, label %[[ENABLED:[0-9]+]], label %[[NOT_ENABLED:[0-9]+]]
7070

71+
// CHECK: [[NOT_ENABLED]]:
72+
// CHECK-NEXT: tail call void @swift_release
73+
// CHECK-NEXT: ret void
74+
7175
// CHECK: [[ENABLED]]:
7276
//
7377
// Header bytes.
@@ -112,10 +116,6 @@ func testInterpolationWithMultipleArguments() {
112116
// CHECK-NEXT: tail call swiftcc void @"${{.*}}_os_log_impl_test{{.*}}"({{.*}}, {{.*}}, {{.*}}, {{.*}}, i8* getelementptr inbounds ([106 x i8], [106 x i8]* @{{.*}}, i{{.*}} 0, i{{.*}} 0), i8* {{(nonnull )?}}[[BUFFER]], i32 20)
113117
// CHECK-NEXT: tail call void @swift_slowDealloc(i8* {{(nonnull )?}}[[BUFFER]]
114118
// CHECK-NEXT: br label %[[NOT_ENABLED]]
115-
116-
// CHECK: [[NOT_ENABLED]]:
117-
// CHECK-NEXT: tail call void @swift_release
118-
// CHECK-NEXT: ret void
119119
}
120120

121121
// CHECK-LABEL: define hidden swiftcc void @"${{.*}}testNSObjectInterpolation
@@ -134,6 +134,10 @@ func testNSObjectInterpolation(nsArray: NSArray) {
134134
// CHECK-NEXT: tail call void @llvm.objc.release
135135
// CHECK-NEXT: br label %[[EXIT:[0-9]+]]
136136

137+
// CHECK: [[EXIT]]:
138+
// CHECK-NEXT: tail call void @llvm.objc.release(i8* [[NSARRAY_ARG]])
139+
// CHECK-NEXT: ret void
140+
137141
// CHECK: [[ENABLED]]:
138142
//
139143
// Header bytes.
@@ -163,10 +167,6 @@ func testNSObjectInterpolation(nsArray: NSArray) {
163167
// CHECK-NEXT: tail call void @swift_slowDealloc(i8* {{(nonnull )?}}[[BUFFER]]
164168
// CHECK-NEXT: tail call void @swift_release
165169
// CHECK-NEXT: br label %[[EXIT]]
166-
167-
// CHECK: [[EXIT]]:
168-
// CHECK-NEXT: tail call void @llvm.objc.release(i8* [[NSARRAY_ARG]])
169-
// CHECK-NEXT: ret void
170170
}
171171

172172
// CHECK-LABEL: define hidden swiftcc void @"${{.*}}testFloatInterpolation
@@ -176,6 +176,10 @@ func testFloatInterpolation(doubleValue: Double) {
176176
// CHECK: tail call swiftcc i1 @"${{.*}}isLoggingEnabled{{.*}}"()
177177
// CHECK-NEXT: br i1 {{%.*}}, label %[[ENABLED:[0-9]+]], label %[[NOT_ENABLED:[0-9]+]]
178178

179+
// CHECK: [[NOT_ENABLED]]:
180+
// CHECK-NEXT: tail call void @swift_release
181+
// CHECK-NEXT: ret void
182+
179183
// CHECK: [[ENABLED]]:
180184
//
181185
// Header bytes.
@@ -198,10 +202,6 @@ func testFloatInterpolation(doubleValue: Double) {
198202
// CHECK-NEXT: tail call swiftcc void @"${{.*}}_os_log_impl_test{{.*}}"({{.*}}, {{.*}}, {{.*}}, {{.*}}, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @{{.*}}, i{{.*}} 0, i{{.*}} 0), i8* {{(nonnull )?}}[[BUFFER]], i32 12)
199203
// CHECK-NEXT: tail call void @swift_slowDealloc(i8* {{(nonnull )?}}[[BUFFER]]
200204
// CHECK-NEXT: br label %[[NOT_ENABLED]]
201-
202-
// CHECK: [[NOT_ENABLED]]:
203-
// CHECK-NEXT: tail call void @swift_release
204-
// CHECK-NEXT: ret void
205205
}
206206

207207
// This test checks that the precision and alignment are optimally "stored" into the
@@ -217,6 +217,10 @@ func testDynamicPrecisionAndAlignment() {
217217
// CHECK: tail call swiftcc i1 @"${{.*}}isLoggingEnabled{{.*}}"()
218218
// CHECK-NEXT: br i1 {{%.*}}, label %[[ENABLED:[0-9]+]], label %[[NOT_ENABLED:[0-9]+]]
219219

220+
// CHECK: [[NOT_ENABLED]]:
221+
// CHECK-NEXT: tail call void @swift_release
222+
// CHECK-NEXT: ret void
223+
220224
// CHECK: [[ENABLED]]:
221225
//
222226
// Header bytes.
@@ -261,10 +265,6 @@ func testDynamicPrecisionAndAlignment() {
261265
// CHECK-NEXT: tail call swiftcc void @"${{.*}}_os_log_impl_test{{.*}}"({{.*}}, {{.*}}, {{.*}}, {{.*}}, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @{{.*}}, i{{.*}} 0, i{{.*}} 0), i8* {{(nonnull )?}}[[BUFFER]], i32 20)
262266
// CHECK-NEXT: tail call void @swift_slowDealloc(i8* {{(nonnull )?}}[[BUFFER]]
263267
// CHECK-NEXT: br label %[[NOT_ENABLED]]
264-
265-
// CHECK: [[NOT_ENABLED]]:
266-
// CHECK-NEXT: tail call void @swift_release
267-
// CHECK-NEXT: ret void
268268
}
269269

270270
// TODO: add test for String. It is more complicated due to more complex logic

test/SILOptimizer/devirt_jump_thread.sil

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,17 @@ bb0(%0 : $Int32, %1 : $FooClass):
5454
//
5555
// CHECK-LABEL: sil @_TF18devirt_jump_thread26jumpthread_checked_cast_brFCS_8FooClassSi
5656
// CHECK: checked_cast_br
57-
// CHECK: bb2(%{{.*}} : $FooClass):
57+
// CHECK: bb1(%{{.*}} : $FooClass):
5858
// CHECK: function_ref @_TTOS_no2g__TFC18devirt_jump_thread8FooClass3foofS0_FSiSi
5959
// CHECK-NOT: function_ref @_TTOS_no2g__TFC18devirt_jump_thread8FooClass3foofS0_FSiSi
6060
// CHECK-NOT: class_method
61-
// CHECK: br bb1
61+
// CHECK: br bb2
6262
// CHECK-NOT: checked_cast_br
6363
// CHECK: bb3:
6464
// CHECK-NOT: function_ref
6565
// CHECK: class_method
6666
// CHECK-NOT: function_ref
67-
// CHECK: br bb1
67+
// CHECK: br bb2
6868
// CHECK: }
6969
// devirt_jump_thread.jumpthread_checked_cast_br (devirt_jump_thread.FooClass) -> Swift.Int32
7070
sil @_TF18devirt_jump_thread26jumpthread_checked_cast_brFCS_8FooClassSi : $@convention(thin) (@guaranteed FooClass) -> Int32 {
@@ -169,13 +169,13 @@ bb12: // Preds: bb3
169169
// Check that checked_cast_br gets jump threaded
170170
// CHECK-LABEL: sil @_TF18devirt_jump_thread6doubleFCS_8FooClassSi
171171
// CHECK: checked_cast_br
172-
// CHECK: bb2(%{{.*}} : $FooClass):
172+
// CHECK: bb2
173+
// CHECK: class_method
174+
// CHECK: br bb4
175+
// CHECK: bb3(%{{.*}} : $FooClass):
173176
// CHECK-NOT: class_method
174177
// CHECK: br bb1
175178
// CHECK-NOT: checked_cast_br
176-
// CHECK: bb3
177-
// CHECK: class_method
178-
// CHECK: br bb1
179179
// CHECK: }
180180
// devirt_jump_thread.double (devirt_jump_thread.FooClass) -> Swift.Int32
181181
sil @_TF18devirt_jump_thread6doubleFCS_8FooClassSi : $@convention(thin) (@guaranteed FooClass) -> Int32 {

test/SILOptimizer/global_init_opt.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public func cse() -> Int {
2020
// CHECK-LABEL: sil @$s4test4licmSiyF
2121
// CHECK: bb0:
2222
// CHECK: builtin "once"
23-
// CHECK: bb1:
23+
// CHECK: bb1{{.*}}:
2424
// CHECK-NOT: builtin "once"
2525
// CHECK: } // end sil function '$s4test4licmSiyF'
2626
public func licm() -> Int {

test/SILOptimizer/merge_exclusivity.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ func sum(_ x: UInt64, _ y: UInt64) -> UInt64 {
1818
// TESTSIL: [[GLOBALVAR:%.*]] = global_addr @$s17merge_exclusivity5checks6UInt64Vvp
1919
// TESTSIL: [[B1:%.*]] = begin_access [modify] [dynamic] [no_nested_conflict] [[GLOBALVAR]]
2020
// TESTSIL: end_access [[B1]]
21-
// TESTSIL: bb5
21+
// TESTSIL: bb4{{.*}}:
2222
// TESTSIL: [[B2a:%.*]] = begin_access [read] [static] [no_nested_conflict] [[GLOBALVAR]]
2323
// TESTSIL-NEXT: load [[B2a]]
2424
// TESTSIL: end_access [[B2a]]
2525
// TESTSIL: [[B2b:%.*]] = begin_access [modify] [static] [no_nested_conflict] [[GLOBALVAR]]
2626
// TESTSIL: store {{.*}} to [[B2b]]
2727
// TESTSIL: end_access [[B2b]]
28-
// TESTSIL: bb6
28+
// TESTSIL: bb5:
2929
// TESTSIL: [[B3a:%.*]] = begin_access [read] [static] [no_nested_conflict] [[GLOBALVAR]]
3030
// TESTSIL-NEXT: load [[B3a]]
3131
// TESTSIL: end_access [[B3a]]
3232
// TESTSIL: [[B3b:%.*]] = begin_access [modify] [static] [no_nested_conflict] [[GLOBALVAR]]
3333
// TESTSIL: store {{.*}} to [[B3b]]
3434
// TESTSIL: end_access [[B3b]]
35-
// TESTSIL: bb7
35+
// TESTSIL: bb6:
3636
// TESTSIL: [[B4a:%.*]] = begin_access [read] [static] [no_nested_conflict] [[GLOBALVAR]]
3737
// TESTSIL-NEXT: load [[B4a]]
3838
// TESTSIL: end_access [[B4a]]

0 commit comments

Comments
 (0)