Skip to content

Commit b8e348c

Browse files
committed
SIL: fix some problems in findJointPostDominatingSet
1. dead-end blocks (= blocks which eventually end up in an unreachable): We cannot just ignore all dead-end blocks. This causes crashes for some corner cases (see the "infinite_loop_and_unreachable" test case). Instead just handle the common case of a simple single dead-end block - like we do in DestroyHoisting. For other (more complex) dead-end control flows, the analysis is not incorrect. In worst case we end up inserting a not-needed destroy instruction. 2. sortUnique I restructured the code a bit so that sortUnique is not needed anymore. sortUnique on pointer arrays can result in non-deterministic behavior. 3. lower_bound Also, using lower_bound on a vector is not good in this function, because it can result in quadratic behavior. Though, in practice, there are only very few elements in the vector. So it's more a theoretical thing. The restructuring made the code a bit simpler, e.g. beside the worklist, no other vectors are needed anymore.
1 parent 72def19 commit b8e348c

File tree

2 files changed

+184
-75
lines changed

2 files changed

+184
-75
lines changed

lib/SIL/Utils/BasicBlockUtils.cpp

Lines changed: 67 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,14 @@ void DeadEndBlocks::compute() {
390390
// Post Dominance Set Completion Utilities
391391
//===----------------------------------------------------------------------===//
392392

393+
static bool endsInUnreachable(SILBasicBlock *block) {
394+
// Handle the case where a single "unreachable" block (e.g. containing a call
395+
// to fatalError()), is jumped to from multiple source blocks.
396+
if (SILBasicBlock *singleSucc = block->getSingleSuccessorBlock())
397+
block = singleSucc;
398+
return isa<UnreachableInst>(block->getTerminator());
399+
}
400+
393401
void JointPostDominanceSetComputer::findJointPostDominatingSet(
394402
SILBasicBlock *dominatingBlock, ArrayRef<SILBasicBlock *> dominatedBlockSet,
395403
function_ref<void(SILBasicBlock *)> inputBlocksFoundDuringWalk,
@@ -403,47 +411,31 @@ void JointPostDominanceSetComputer::findJointPostDominatingSet(
403411
// dominatingBlock, then we return success since a block post-doms its self so
404412
// it is already complete.
405413
//
406-
// NOTE: We do not consider this a visiteed
407-
if (dominatedBlockSet.size() == 1) {
408-
if (dominatingBlock == dominatedBlockSet[0]) {
409-
if (inputBlocksInJointPostDomSet)
410-
inputBlocksInJointPostDomSet(dominatingBlock);
411-
return;
412-
}
414+
// NOTE: We do not consider this a visited
415+
if (dominatedBlockSet.size() == 1 && dominatingBlock == dominatedBlockSet[0]) {
416+
if (inputBlocksInJointPostDomSet)
417+
inputBlocksInJointPostDomSet(dominatingBlock);
418+
return;
413419
}
414420

415421
// At the top of where we for sure are going to use state... make sure we
416422
// always clean up any resources that we use!
417423
SWIFT_DEFER { clear(); };
418424

419-
/// A set that guards our worklist. Any block before it is added to worklist
420-
/// should be checked against visitedBlocks.
421-
SILFunction *function = dominatingBlock->getParent();
422-
BasicBlockSet visitedBlocks(function);
425+
/// All blocks visited during the backwards walk of the CFG, but not including
426+
/// the initial blocks in `dominatedBlockSet`.
427+
BasicBlockSet visitedBlocks(dominatingBlock->getParent());
423428

424-
/// The set of blocks where we begin our walk.
425-
BasicBlockSet initialBlocks(function);
429+
/// All blocks in `dominatedBlockSet` (= blocks where we begin our walk).
430+
BasicBlockSet initialBlocks(visitedBlocks.getFunction());
426431

427-
/// True for blocks which are in blocksThatLeakIfNeverVisited.
428-
BasicBlockFlag isLeakingBlock(function);
429-
430-
// Otherwise, we need to compute our joint post dominating set. We do this by
431-
// performing a backwards walk up the CFG tracking back liveness until we find
432-
// our dominating block. As we walk up, we keep track of any successor blocks
433-
// that we need to visit before the walk completes lest we leak. After we
434-
// finish the walk, these leaking blocks are a valid (albeit not unique)
435-
// completion of the post dom set.
432+
// Compute our joint post dominating set. We do this by performing a backwards
433+
// walk up the CFG tracking back liveness until we find our dominating block.
436434
for (auto *block : dominatedBlockSet) {
437-
// Skip dead end blocks.
438-
if (deadEndBlocks.isDeadEnd(block))
439-
continue;
440-
441435
// We require dominatedBlockSet to be a set and thus assert if we hit it to
442436
// flag user error to our caller.
443-
bool succeededInserting = visitedBlocks.insert(block);
444-
(void)succeededInserting;
445-
assert(succeededInserting &&
446-
"Repeat Elt: dominatedBlockSet should be a set?!");
437+
assert(!initialBlocks.contains(block) &&
438+
"dominatedBlockSet must not contain duplicate elements");
447439
initialBlocks.insert(block);
448440
worklist.push_back(block);
449441
}
@@ -452,64 +444,64 @@ void JointPostDominanceSetComputer::findJointPostDominatingSet(
452444
while (!worklist.empty()) {
453445
auto *block = worklist.pop_back_val();
454446

455-
// Then if our block is not one of our initial blocks, add the block's
456-
// successors to blocksThatLeakIfNeverVisited.
457-
if (!initialBlocks.contains(block)) {
458-
for (auto *succBlock : block->getSuccessorBlocks()) {
459-
if (visitedBlocks.contains(succBlock))
460-
continue;
461-
if (deadEndBlocks.isDeadEnd(succBlock))
462-
continue;
463-
blocksThatLeakIfNeverVisited.push_back(succBlock);
464-
isLeakingBlock.set(succBlock);
465-
}
466-
}
467-
468447
// If we are the dominating block, we are done.
469448
if (dominatingBlock == block)
470449
continue;
471450

472-
// Otherwise for each predecessor that we have, first check if it was one of
473-
// our initial blocks (signaling a loop) and then add it to the worklist if
474-
// we haven't visited it already.
475451
for (auto *predBlock : block->getPredecessorBlocks()) {
476-
if (initialBlocks.contains(predBlock)) {
477-
reachableInputBlocks.push_back(predBlock);
478-
for (auto *succBlock : predBlock->getSuccessorBlocks()) {
479-
if (visitedBlocks.contains(succBlock))
480-
continue;
481-
if (deadEndBlocks.isDeadEnd(succBlock))
482-
continue;
483-
if (!isLeakingBlock.testAndSet(succBlock))
484-
blocksThatLeakIfNeverVisited.push_back(succBlock);
485-
}
486-
}
487452
if (visitedBlocks.insert(predBlock))
488453
worklist.push_back(predBlock);
489454
}
490455
}
491456

492-
// After our worklist has emptied, any not visited blocks in
493-
// blocksThatLeakIfNeverVisited are "leaking blocks".
494-
for (auto *leakingBlock : blocksThatLeakIfNeverVisited) {
495-
if (!visitedBlocks.contains(leakingBlock))
496-
foundJointPostDomSetCompletionBlocks(leakingBlock);
497-
}
498-
499-
// Then unique our list of reachable input blocks and pass them to our
500-
// callback.
501-
sortUnique(reachableInputBlocks);
502-
for (auto *block : reachableInputBlocks)
503-
inputBlocksFoundDuringWalk(block);
457+
// Do the same walk over all visited blocks again to find the "leaking"
458+
// blocks. These leaking blocks are the completion of the post dom set.
459+
//
460+
// Note that we could also keep all visited blocks in a SmallVector in the
461+
// first run. But the worklist algorithm is fast and we don't want
462+
// to risk that the small vector overflows (the set of visited blocks can be
463+
// much larger than the maximum worklist size).
464+
BasicBlockSet visitedBlocksInSecondRun(visitedBlocks.getFunction());
465+
assert(worklist.empty());
466+
worklist.append(dominatedBlockSet.begin(), dominatedBlockSet.end());
467+
while (!worklist.empty()) {
468+
auto *block = worklist.pop_back_val();
469+
if (dominatingBlock == block)
470+
continue;
504471

505-
// Then if were asked to find the subset of our input blocks that are in the
506-
// joint-postdominance set, compute that.
507-
if (!inputBlocksInJointPostDomSet)
508-
return;
472+
for (auto *predBlock : block->getPredecessorBlocks()) {
473+
assert(visitedBlocks.contains(predBlock));
474+
if (visitedBlocksInSecondRun.insert(predBlock)) {
475+
worklist.push_back(predBlock);
476+
477+
for (auto *succBlock : predBlock->getSuccessorBlocks()) {
478+
// All not-visited successors of a visited block are "leaking" blocks.
479+
if (!visitedBlocks.contains(succBlock) &&
480+
// For this purpose also the initial blocks count as "visited",
481+
// although they are not added to the visitedBlocks set.
482+
!initialBlocks.contains(succBlock) &&
483+
// Ignore blocks which end in an unreachable. This is a very
484+
// simple check, but covers most of the cases, e.g. block which
485+
// calls fatalError().
486+
!endsInUnreachable(succBlock)) {
487+
assert(succBlock->getSinglePredecessorBlock() == predBlock &&
488+
"CFG must not contain critical edge");
489+
// Note that since there are no critical edges in the CFG, we are
490+
// not calling the closure for a leaking successor block twice.
491+
foundJointPostDomSetCompletionBlocks(succBlock);
492+
}
493+
}
494+
}
495+
}
496+
}
509497

510498
// Pass back the reachable input blocks that were not reachable from other
511499
// input blocks to.
512-
for (auto *block : dominatedBlockSet)
513-
if (lower_bound(reachableInputBlocks, block) == reachableInputBlocks.end())
500+
for (auto *block : dominatedBlockSet) {
501+
if (visitedBlocks.contains(block)) {
502+
inputBlocksFoundDuringWalk(block);
503+
} else if (inputBlocksInJointPostDomSet) {
514504
inputBlocksInJointPostDomSet(block);
505+
}
506+
}
515507
}

test/SILOptimizer/redundant_load_elim_nontrivial_ossa.sil

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,3 +961,120 @@ bb8:
961961
return %res : $()
962962
}
963963

964+
// CHECK-LABEL: @ignore_unreachable_simple :
965+
// CHECK: [[V:%[0-9]+]] = load [copy]
966+
// CHECK: [[C:%[0-9]+]] = copy_value [[V]]
967+
// CHECK: bb1:
968+
// CHECK-NEXT: destroy_value [[C]]
969+
// CHECK: bb2:
970+
// CHECK-NEXT: unreachable
971+
// CHECK: } // end sil function 'ignore_unreachable_simple'
972+
sil [ossa] @ignore_unreachable_simple : $@convention(thin) (@in_guaranteed NonTrivialStruct) -> () {
973+
bb0(%0 : $*NonTrivialStruct):
974+
%ele = struct_element_addr %0 : $*NonTrivialStruct, #NonTrivialStruct.val
975+
%val1 = load [copy] %ele : $*Klass
976+
destroy_value %val1 : $Klass
977+
cond_br undef, bb1, bb2
978+
979+
bb1:
980+
%val2 = load [copy] %ele : $*Klass
981+
destroy_value %val2 : $Klass
982+
br bb3
983+
984+
bb2:
985+
unreachable
986+
987+
bb3:
988+
%res = tuple ()
989+
return %res : $()
990+
}
991+
992+
// CHECK-LABEL: @ignore_unreachable_complex :
993+
// CHECK: [[V:%[0-9]+]] = load [copy]
994+
// CHECK: copy_value [[V]]
995+
// CHECK: copy_value [[V]]
996+
// CHECK: bb1:
997+
// CHECK-NEXT: destroy_value
998+
// CHECK-NEXT: cond_br
999+
// CHECK: bb2:
1000+
// CHECK-NEXT: destroy_value
1001+
// CHECK-NEXT: cond_br
1002+
// CHECK: bb3:
1003+
// CHECK-NEXT: destroy_value
1004+
// CHECK-NEXT: br bb5
1005+
// CHECK: bb4:
1006+
// CHECK-NEXT: destroy_value
1007+
// CHECK-NEXT: br bb5
1008+
// CHECK: bb5:
1009+
// CHECK-NEXT: tuple
1010+
// CHECK: bb6:
1011+
// CHECK-NEXT: br bb8
1012+
// CHECK: bb7:
1013+
// CHECK-NEXT: br bb8
1014+
// CHECK: bb8:
1015+
// CHECK-NEXT: unreachable
1016+
// CHECK: } // end sil function 'ignore_unreachable_complex'
1017+
sil [ossa] @ignore_unreachable_complex : $@convention(thin) (@in_guaranteed NonTrivialStruct) -> () {
1018+
bb0(%0 : $*NonTrivialStruct):
1019+
%ele = struct_element_addr %0 : $*NonTrivialStruct, #NonTrivialStruct.val
1020+
%val1 = load [copy] %ele : $*Klass
1021+
destroy_value %val1 : $Klass
1022+
cond_br undef, bb1, bb2
1023+
1024+
bb1:
1025+
cond_br undef, bb3, bb6
1026+
1027+
bb2:
1028+
cond_br undef, bb4, bb7
1029+
1030+
bb3:
1031+
%val2 = load [copy] %ele : $*Klass
1032+
destroy_value %val2 : $Klass
1033+
br bb5
1034+
1035+
bb4:
1036+
%val3 = load [copy] %ele : $*Klass
1037+
destroy_value %val3 : $Klass
1038+
br bb5
1039+
1040+
bb5:
1041+
%res = tuple ()
1042+
return %res : $()
1043+
1044+
bb6:
1045+
br bb8
1046+
1047+
bb7:
1048+
br bb8
1049+
1050+
bb8:
1051+
unreachable
1052+
}
1053+
1054+
1055+
// CHECK-LABEL: @infinite_loop_and_unreachable :
1056+
// CHECK: [[V:%[0-9]+]] = load [copy]
1057+
// CHECK: [[C1:%[0-9]+]] = copy_value [[V]]
1058+
// CHECK: bb1:
1059+
// CHECK: [[C2:%[0-9]+]] = copy_value [[C1]]
1060+
// CHECK: destroy_value [[C2]]
1061+
// CHECK: } // end sil function 'infinite_loop_and_unreachable'
1062+
sil [ossa] @infinite_loop_and_unreachable : $@convention(thin) (@in_guaranteed NonTrivialStruct) -> () {
1063+
bb0(%0 : $*NonTrivialStruct):
1064+
%ele = struct_element_addr %0 : $*NonTrivialStruct, #NonTrivialStruct.val
1065+
%val1 = load [copy] %ele : $*Klass
1066+
destroy_value %val1 : $Klass
1067+
br bb1
1068+
1069+
bb1:
1070+
%val2 = load [copy] %ele : $*Klass
1071+
destroy_value %val2 : $Klass
1072+
cond_br undef, bb2, bb3
1073+
1074+
bb2:
1075+
br bb1
1076+
1077+
bb3:
1078+
unreachable
1079+
}
1080+

0 commit comments

Comments
 (0)