Skip to content

Commit 3ad7d54

Browse files
committed
LICM: hoist calls to global_init functions
Global initializers are executed only once. Therefore it's possible to hoist such an initializer call to a loop pre-header - in case there are no conflicting side-effects in the loop before the call. Also, the call must post-dominate the loop pre-header. Otherwise it would be executed speculatively.
1 parent e0c4fa2 commit 3ad7d54

File tree

5 files changed

+324
-11
lines changed

5 files changed

+324
-11
lines changed

lib/SILOptimizer/LoopTransforms/LICM.cpp

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,57 @@ static bool mayWriteTo(AliasAnalysis *AA, SideEffectAnalysis *SEA,
146146
return false;
147147
}
148148

149+
/// Returns true if \p sideEffectInst cannot be reordered with a call to a
150+
/// global initialier.
151+
static bool mayConflictWithGlobalInit(AliasAnalysis *AA,
152+
SILInstruction *sideEffectInst, ApplyInst *globalInitCall) {
153+
if (auto *SI = dyn_cast<StoreInst>(sideEffectInst)) {
154+
return AA->mayReadOrWriteMemory(globalInitCall, SI->getDest());
155+
}
156+
if (auto *LI = dyn_cast<LoadInst>(sideEffectInst)) {
157+
return AA->mayWriteToMemory(globalInitCall, LI->getOperand());
158+
}
159+
return true;
160+
}
161+
162+
/// Returns true if any of the instructions in \p sideEffectInsts which are
163+
/// post-dominated by a call to a global initialier cannot be reordered with
164+
/// the call.
165+
static bool mayConflictWithGlobalInit(AliasAnalysis *AA,
166+
InstSet &sideEffectInsts,
167+
ApplyInst *globalInitCall,
168+
SILBasicBlock *preHeader, PostDominanceInfo *PD) {
169+
if (!PD->dominates(globalInitCall->getParent(), preHeader))
170+
return true;
171+
172+
SILBasicBlock *globalInitBlock = globalInitCall->getParent();
173+
for (auto *seInst : sideEffectInsts) {
174+
// Only check instructions in blocks which are "before" (i.e. post-dominated
175+
// by) the block which contains the init-call.
176+
// Instructions which are before the call in the same block have already
177+
// been checked.
178+
if (PD->properlyDominates(globalInitBlock, seInst->getParent())) {
179+
if (mayConflictWithGlobalInit(AA, seInst, globalInitCall))
180+
return true;
181+
}
182+
}
183+
return false;
184+
}
185+
186+
/// Returns true if any of the instructions in \p sideEffectInsts cannot be
187+
/// reordered with a call to a global initialier (which is in the same basic
188+
/// block).
189+
static bool mayConflictWithGlobalInit(AliasAnalysis *AA,
190+
ArrayRef<SILInstruction *> sideEffectInsts,
191+
ApplyInst *globalInitCall) {
192+
for (auto *seInst : sideEffectInsts) {
193+
assert(seInst->getParent() == globalInitCall->getParent());
194+
if (mayConflictWithGlobalInit(AA, seInst, globalInitCall))
195+
return true;
196+
}
197+
return false;
198+
}
199+
149200
// When Hoisting / Sinking,
150201
// Don't descend into control-dependent code.
151202
// Only traverse into basic blocks that dominate all exits.
@@ -409,6 +460,8 @@ class LoopTreeOptimization {
409460
AliasAnalysis *AA;
410461
SideEffectAnalysis *SEA;
411462
DominanceInfo *DomTree;
463+
PostDominanceAnalysis *PDA;
464+
PostDominanceInfo *postDomTree = nullptr;
412465
AccessedStorageAnalysis *ASA;
413466
bool Changed;
414467

@@ -435,10 +488,11 @@ class LoopTreeOptimization {
435488
public:
436489
LoopTreeOptimization(SILLoop *TopLevelLoop, SILLoopInfo *LI,
437490
AliasAnalysis *AA, SideEffectAnalysis *SEA,
438-
DominanceInfo *DT, AccessedStorageAnalysis *ASA,
491+
DominanceInfo *DT, PostDominanceAnalysis *PDA,
492+
AccessedStorageAnalysis *ASA,
439493
bool RunsOnHighLevelSil)
440-
: LoopInfo(LI), AA(AA), SEA(SEA), DomTree(DT), ASA(ASA), Changed(false),
441-
RunsOnHighLevelSIL(RunsOnHighLevelSil) {
494+
: LoopInfo(LI), AA(AA), SEA(SEA), DomTree(DT), PDA(PDA), ASA(ASA),
495+
Changed(false), RunsOnHighLevelSIL(RunsOnHighLevelSil) {
442496
// Collect loops for a recursive bottom-up traversal in the loop tree.
443497
BotUpWorkList.push_back(TopLevelLoop);
444498
for (unsigned i = 0; i < BotUpWorkList.size(); ++i) {
@@ -556,9 +610,11 @@ static bool isSafeReadOnlyApply(SideEffectAnalysis *SEA, ApplyInst *AI) {
556610
}
557611

558612
static void checkSideEffects(swift::SILInstruction &Inst,
559-
InstSet &SideEffectInsts) {
613+
InstSet &SideEffectInsts,
614+
SmallVectorImpl<SILInstruction *> &sideEffectsInBlock) {
560615
if (Inst.mayHaveSideEffects()) {
561616
SideEffectInsts.insert(&Inst);
617+
sideEffectsInBlock.push_back(&Inst);
562618
}
563619
}
564620

@@ -708,13 +764,15 @@ void LoopTreeOptimization::analyzeCurrentLoop(
708764

709765
// Interesting instructions in the loop:
710766
SmallVector<ApplyInst *, 8> ReadOnlyApplies;
767+
SmallVector<ApplyInst *, 8> globalInitCalls;
711768
SmallVector<LoadInst *, 8> Loads;
712769
SmallVector<StoreInst *, 8> Stores;
713770
SmallVector<FixLifetimeInst *, 8> FixLifetimes;
714771
SmallVector<BeginAccessInst *, 8> BeginAccesses;
715772
SmallVector<FullApplySite, 8> fullApplies;
716773

717774
for (auto *BB : Loop->getBlocks()) {
775+
SmallVector<SILInstruction *, 8> sideEffectsInBlock;
718776
for (auto &Inst : *BB) {
719777
switch (Inst.getKind()) {
720778
case SILInstructionKind::FixLifetimeInst: {
@@ -731,12 +789,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
731789
case SILInstructionKind::StoreInst: {
732790
Stores.push_back(cast<StoreInst>(&Inst));
733791
LoadsAndStores.push_back(&Inst);
734-
checkSideEffects(Inst, sideEffects);
792+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
735793
break;
736794
}
737795
case SILInstructionKind::BeginAccessInst:
738796
BeginAccesses.push_back(cast<BeginAccessInst>(&Inst));
739-
checkSideEffects(Inst, sideEffects);
797+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
740798
break;
741799
case SILInstructionKind::RefElementAddrInst:
742800
SpecialHoist.push_back(cast<RefElementAddrInst>(&Inst));
@@ -747,12 +805,21 @@ void LoopTreeOptimization::analyzeCurrentLoop(
747805
// cond_fail that would have protected (executed before) a memory access
748806
// must - after hoisting - also be executed before said access.
749807
HoistUp.insert(&Inst);
750-
checkSideEffects(Inst, sideEffects);
808+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
751809
break;
752810
case SILInstructionKind::ApplyInst: {
753811
auto *AI = cast<ApplyInst>(&Inst);
754812
if (isSafeReadOnlyApply(SEA, AI)) {
755813
ReadOnlyApplies.push_back(AI);
814+
} else if (SILFunction *callee = AI->getReferencedFunctionOrNull()) {
815+
// Calls to global inits are different because we don't care about
816+
// side effects which are "after" the call in the loop.
817+
if (callee->isGlobalInit() &&
818+
// Check against side-effects within the same block.
819+
// Side-effects in other blocks are checked later (after we
820+
// scanned all blocks of the loop).
821+
!mayConflictWithGlobalInit(AA, sideEffectsInBlock, AI))
822+
globalInitCalls.push_back(AI);
756823
}
757824
// check for array semantics and side effects - same as default
758825
LLVM_FALLTHROUGH;
@@ -761,7 +828,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
761828
if (auto fullApply = FullApplySite::isa(&Inst)) {
762829
fullApplies.push_back(fullApply);
763830
}
764-
checkSideEffects(Inst, sideEffects);
831+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
765832
if (canHoistUpDefault(&Inst, Loop, DomTree, RunsOnHighLevelSIL)) {
766833
HoistUp.insert(&Inst);
767834
}
@@ -780,6 +847,23 @@ void LoopTreeOptimization::analyzeCurrentLoop(
780847
HoistUp.insert(LI);
781848
}
782849
}
850+
851+
if (!globalInitCalls.empty()) {
852+
if (!postDomTree) {
853+
postDomTree = PDA->get(Preheader->getParent());
854+
}
855+
if (postDomTree->getRootNode()) {
856+
for (ApplyInst *ginitCall : globalInitCalls) {
857+
// Check against side effects which are "before" (i.e. post-dominated
858+
// by) the global initializer call.
859+
if (!mayConflictWithGlobalInit(AA, sideEffects, ginitCall, Preheader,
860+
postDomTree)) {
861+
HoistUp.insert(ginitCall);
862+
}
863+
}
864+
}
865+
}
866+
783867
// Collect memory locations for which we can move all loads and stores out
784868
// of the loop.
785869
for (StoreInst *SI : Stores) {
@@ -1041,6 +1125,7 @@ class LICM : public SILFunctionTransform {
10411125
}
10421126

10431127
DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
1128+
PostDominanceAnalysis *PDA = PM->getAnalysis<PostDominanceAnalysis>();
10441129
AliasAnalysis *AA = PM->getAnalysis<AliasAnalysis>();
10451130
SideEffectAnalysis *SEA = PM->getAnalysis<SideEffectAnalysis>();
10461131
AccessedStorageAnalysis *ASA = getAnalysis<AccessedStorageAnalysis>();
@@ -1051,8 +1136,8 @@ class LICM : public SILFunctionTransform {
10511136

10521137
for (auto *TopLevelLoop : *LoopInfo) {
10531138
if (!DomTree) DomTree = DA->get(F);
1054-
LoopTreeOptimization Opt(TopLevelLoop, LoopInfo, AA, SEA, DomTree, ASA,
1055-
RunsOnHighLevelSil);
1139+
LoopTreeOptimization Opt(TopLevelLoop, LoopInfo, AA, SEA, DomTree, PDA,
1140+
ASA, RunsOnHighLevelSil);
10561141
Changed |= Opt.optimize();
10571142
}
10581143

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: %target-build-swift -O %s -o %t/a.out
3+
// RUN: %target-run %t/a.out | %FileCheck %s
4+
5+
// REQUIRES: executable_test
6+
7+
struct Teststruct {
8+
static let s = Teststruct()
9+
10+
@inline(never)
11+
init() {
12+
let set = Set<String>()
13+
for _ in set {
14+
// Check that the global initializer is not hoisted out of this loop,
15+
// resulting in a dispatch_once re-retrance crash.
16+
_ = Teststruct.s
17+
}
18+
}
19+
}
20+
21+
// CHECK: Teststruct
22+
print(Teststruct.s)
23+
24+

test/SILOptimizer/global_init_opt.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,18 @@ var gg: Int = {
1616
public func cse() -> Int {
1717
return gg + gg
1818
}
19+
20+
// CHECK-LABEL: sil @$s4test4licmSiyF
21+
// CHECK: bb0:
22+
// CHECK: builtin "once"
23+
// CHECK: bb1:
24+
// CHECK-NOT: builtin "once"
25+
// CHECK: } // end sil function '$s4test4licmSiyF'
26+
public func licm() -> Int {
27+
var s = 0
28+
for _ in 0..<100 {
29+
s += gg
30+
}
31+
return s
32+
}
33+

0 commit comments

Comments
 (0)