Skip to content

Commit c4eda1a

Browse files
committed
Fix a bug in dead store elimination. Make sure we consider SILargument when we
invalidate base. Also small change on how BlockState is allocated.
1 parent 42d97ed commit c4eda1a

File tree

2 files changed

+98
-73
lines changed

2 files changed

+98
-73
lines changed

lib/SILOptimizer/Transforms/DeadStoreElimination.cpp

Lines changed: 71 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ enum class DSEKind : unsigned {
9898

9999
/// Return the deallocate stack instructions corresponding to the given
100100
/// AllocStackInst.
101-
static llvm::SmallVector<SILInstruction *, 1> findDeallocStackInst(
102-
AllocStackInst *ASI) {
101+
static llvm::SmallVector<SILInstruction *, 1>
102+
findDeallocStackInst(AllocStackInst *ASI) {
103103
llvm::SmallVector<SILInstruction *, 1> DSIs;
104104
for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E; ++UI) {
105105
if (DeallocStackInst *D = dyn_cast<DeallocStackInst>(UI->getUser())) {
@@ -249,13 +249,16 @@ class BlockState {
249249
llvm::DenseMap<SILValue, SILValue> LiveStores;
250250

251251
/// Constructors.
252-
BlockState(SILBasicBlock *B) : BB(B) {}
252+
BlockState(SILBasicBlock *B, unsigned LocationNum, bool Optimistic)
253+
: BB(B), LocationNum(LocationNum) {
254+
init(LocationNum, Optimistic);
255+
}
253256

254257
/// Return the current basic block.
255258
SILBasicBlock *getBB() const { return BB; }
256259

257260
/// Initialize the bitvectors for the current basic block.
258-
void init(DSEContext &Ctx, bool Optimistic);
261+
void init(unsigned LocationNum, bool Optimistic);
259262

260263
/// Check whether the BBWriteSetIn has changed. If it does, we need to rerun
261264
/// the data flow on this block's predecessors to reach fixed point.
@@ -298,11 +301,13 @@ bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned i) {
298301
namespace {
299302

300303
class DSEContext {
301-
enum class ProcessKind {
302-
ProcessOptimistic = 0,
303-
ProcessPessimistic = 1,
304-
ProcessNone = 2,
305-
};
304+
/// How to process the current function.
305+
enum class ProcessKind {
306+
ProcessOptimistic = 0,
307+
ProcessPessimistic = 1,
308+
ProcessNone = 2,
309+
};
310+
306311
private:
307312
/// The module we are currently processing.
308313
SILModule *Mod;
@@ -322,17 +327,14 @@ class DSEContext {
322327
/// Type Expansion Analysis.
323328
TypeExpansionAnalysis *TE;
324329

330+
/// The allocator we are using.
331+
llvm::BumpPtrAllocator &BPA;
332+
325333
/// The epilogue release matcher we are using.
326334
ConsumedArgToEpilogueReleaseMatcher& ERM;
327335

328-
/// Allocator.
329-
llvm::BumpPtrAllocator BPA;
330-
331336
/// Map every basic block to its location state.
332-
llvm::DenseMap<SILBasicBlock *, BlockState *> BBToLocState;
333-
334-
/// Keeps the actual BlockStates.
335-
std::vector<BlockState> BlockStates;
337+
llvm::SmallDenseMap<SILBasicBlock *, BlockState *> BBToLocState;
336338

337339
/// Keeps all the locations for the current function. The BitVector in each
338340
/// BlockState is then laid on top of it to keep track of which LSLocation
@@ -371,14 +373,13 @@ class DSEContext {
371373

372374
/// There is a read to a location, expand the location into individual fields
373375
/// before processing them.
374-
void processRead(SILInstruction *Inst, BlockState *S, SILValue M, DSEKind K);
376+
void processRead(SILInstruction *Inst, SILValue M, DSEKind K);
375377
void processReadForGenKillSet(BlockState *S, unsigned bit);
376378
void processReadForDSE(BlockState *S, unsigned Bit);
377379

378380
/// There is a write to a location, expand the location into individual fields
379381
/// before processing them.
380-
void processWrite(SILInstruction *Inst, BlockState *S, SILValue V, SILValue M,
381-
DSEKind K);
382+
void processWrite(SILInstruction *Inst, SILValue V, SILValue M, DSEKind K);
382383
void processWriteForGenKillSet(BlockState *S, unsigned bit);
383384
bool processWriteForDSE(BlockState *S, unsigned bit);
384385

@@ -395,7 +396,8 @@ class DSEContext {
395396
void processDebugValueAddrInstForGenKillSet(SILInstruction *I);
396397
void processDebugValueAddrInstForDSE(SILInstruction *I);
397398

398-
/// Process instructions. Extract locations from unknown memory inst.
399+
/// Process unknown read instructions. Extract locations from unknown memory
400+
/// inst.
399401
void processUnknownReadInst(SILInstruction *Inst, DSEKind Kind);
400402
void processUnknownReadInstForGenKillSet(SILInstruction *Inst);
401403
void processUnknownReadInstForDSE(SILInstruction *Inst);
@@ -421,9 +423,9 @@ class DSEContext {
421423
/// If not, on the second iteration, the intersection of the successors of
422424
/// the loop basic block will have store to x.a and therefore x.a = 13 can now
423425
/// be considered dead.
424-
void invalidateLSLocationBase(SILInstruction *Inst, DSEKind Kind);
425-
void invalidateLSLocationBaseForGenKillSet(SILInstruction *Inst);
426-
void invalidateLSLocationBaseForDSE(SILInstruction *Inst);
426+
void invalidateBase(SILValue B, BlockState *S, DSEKind Kind);
427+
void invalidateBaseForGenKillSet(SILValue B, BlockState *S);
428+
void invalidateBaseForDSE(SILValue B, BlockState *S);
427429

428430
/// Get the bit representing the location in the LocationVault.
429431
unsigned getLocationBit(const LSLocation &L);
@@ -432,8 +434,9 @@ class DSEContext {
432434
/// Constructor.
433435
DSEContext(SILFunction *F, SILModule *M, SILPassManager *PM,
434436
AliasAnalysis *AA, EscapeAnalysis *EA, TypeExpansionAnalysis *TE,
437+
llvm::BumpPtrAllocator &BPA,
435438
ConsumedArgToEpilogueReleaseMatcher &ERM)
436-
: Mod(M), F(F), PM(PM), AA(AA), EA(EA), TE(TE), ERM(ERM) {}
439+
: Mod(M), F(F), PM(PM), AA(AA), EA(EA), TE(TE), BPA(BPA), ERM(ERM) {}
437440

438441
/// Entry point for dead store elimination.
439442
bool run();
@@ -444,12 +447,10 @@ class DSEContext {
444447
/// Returns the escape analysis we use.
445448
EscapeAnalysis *getEA() { return EA; }
446449

447-
/// Returns the function currently being processing.
448-
SILFunction *getFn() { return F; }
449-
450450
/// Returns the location vault of the current function.
451451
std::vector<LSLocation> &getLocationVault() { return LocationVault; }
452452

453+
/// Returns the epilogue release matcher we are using.
453454
ConsumedArgToEpilogueReleaseMatcher &getERM() const { return ERM; };
454455

455456
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
@@ -476,9 +477,7 @@ class DSEContext {
476477

477478
} // end anonymous namespace
478479

479-
void BlockState::init(DSEContext &Ctx, bool Optimistic) {
480-
std::vector<LSLocation> &LV = Ctx.getLocationVault();
481-
LocationNum = LV.size();
480+
void BlockState::init(unsigned LocationNum, bool Optimistic) {
482481
// For function that requires just 1 iteration of the data flow to converge
483482
// we set the initial state of BBWriteSetIn to 0.
484483
//
@@ -506,7 +505,6 @@ void BlockState::init(DSEContext &Ctx, bool Optimistic) {
506505
// MaxStoreSet is optimistically set to true initially.
507506
BBMaxStoreSet.resize(LocationNum, true);
508507

509-
510508
// DeallocateLocation initially empty.
511509
BBDeallocateLocation.resize(LocationNum, false);
512510
}
@@ -604,6 +602,12 @@ void DSEContext::processBasicBlockForGenKillSet(SILBasicBlock *BB) {
604602
// Compute the genset and killset for this instruction.
605603
processInstruction(&(*I), DSEKind::BuildGenKillSet);
606604
}
605+
606+
// Handle SILArgument for base invalidation.
607+
ArrayRef<SILArgument *> Args = BB->getBBArgs();
608+
for(auto &X : Args) {
609+
invalidateBase(X, BBState, DSEKind::BuildGenKillSet);
610+
}
607611
}
608612

609613
bool DSEContext::processBasicBlockWithGenKillSet(SILBasicBlock *BB) {
@@ -620,8 +624,7 @@ bool DSEContext::processBasicBlockWithGenKillSet(SILBasicBlock *BB) {
620624
return S->updateBBWriteSetIn(S->BBWriteSetMid);
621625
}
622626

623-
void DSEContext::processBasicBlockForDSE(SILBasicBlock *BB,
624-
bool Optimistic) {
627+
void DSEContext::processBasicBlockForDSE(SILBasicBlock *BB, bool Optimistic) {
625628
// If we know this is not a one iteration function which means its
626629
// its BBWriteSetIn and BBWriteSetOut have been computed and converged,
627630
// and this basic block does not even have StoreInsts, there is no point
@@ -643,6 +646,12 @@ void DSEContext::processBasicBlockForDSE(SILBasicBlock *BB,
643646
processInstruction(&(*I), DSEKind::PerformDSE);
644647
}
645648

649+
// Handle SILArgument for base invalidation.
650+
ArrayRef<SILArgument *> Args = BB->getBBArgs();
651+
for(auto &X : Args) {
652+
invalidateBase(X, S, DSEKind::BuildGenKillSet);
653+
}
654+
646655
S->BBWriteSetIn = S->BBWriteSetMid;
647656
}
648657

@@ -697,40 +706,38 @@ void DSEContext::mergeSuccessorLiveIns(SILBasicBlock *BB) {
697706
C->BBWriteSetOut |= C->BBDeallocateLocation;
698707
}
699708

700-
void DSEContext::invalidateLSLocationBaseForGenKillSet(SILInstruction *I) {
701-
BlockState *S = getBlockState(I);
709+
void DSEContext::invalidateBaseForGenKillSet(SILValue B, BlockState *S) {
702710
for (unsigned i = 0; i < S->LocationNum; ++i) {
703-
if (LocationVault[i].getBase() != I)
711+
if (LocationVault[i].getBase() != B)
704712
continue;
705713
S->startTrackingLocation(S->BBKillSet, i);
706714
S->stopTrackingLocation(S->BBGenSet, i);
707715
}
708716
}
709717

710-
void DSEContext::invalidateLSLocationBaseForDSE(SILInstruction *I) {
711-
BlockState *S = getBlockState(I);
718+
void DSEContext::invalidateBaseForDSE(SILValue B, BlockState *S) {
712719
for (unsigned i = 0; i < S->LocationNum; ++i) {
713720
if (!S->BBWriteSetMid.test(i))
714721
continue;
715-
if (LocationVault[i].getBase() != I)
722+
if (LocationVault[i].getBase() != B)
716723
continue;
717724
S->stopTrackingLocation(S->BBWriteSetMid, i);
718725
}
719726
}
720727

721-
void DSEContext::invalidateLSLocationBase(SILInstruction *I, DSEKind Kind) {
728+
void DSEContext::invalidateBase(SILValue B, BlockState *S, DSEKind Kind) {
722729
// If this instruction defines the base of a location, then we need to
723730
// invalidate any locations with the same base.
724731
//
725732
// Are we building genset and killset.
726733
if (isBuildingGenKillSet(Kind)) {
727-
invalidateLSLocationBaseForGenKillSet(I);
734+
invalidateBaseForGenKillSet(B, S);
728735
return;
729736
}
730737

731738
// Are we performing dead store elimination.
732739
if (isPerformingDSE(Kind)) {
733-
invalidateLSLocationBaseForDSE(I);
740+
invalidateBaseForDSE(B, S);
734741
return;
735742
}
736743

@@ -771,8 +778,8 @@ void DSEContext::processReadForGenKillSet(BlockState *S, unsigned bit) {
771778
}
772779
}
773780

774-
void DSEContext::processRead(SILInstruction *I, BlockState *S, SILValue Mem,
775-
DSEKind Kind) {
781+
void DSEContext::processRead(SILInstruction *I, SILValue Mem, DSEKind Kind) {
782+
auto *S = getBlockState(I);
776783
// Construct a LSLocation to represent the memory read by this instruction.
777784
// NOTE: The base will point to the actual object this inst is accessing,
778785
// not this particular field.
@@ -856,8 +863,9 @@ void DSEContext::processWriteForMaxStoreSet(BlockState *S, unsigned bit) {
856863
S->startTrackingLocation(S->BBMaxStoreSet, bit);
857864
}
858865

859-
void DSEContext::processWrite(SILInstruction *I, BlockState *S, SILValue Val,
860-
SILValue Mem, DSEKind Kind) {
866+
void DSEContext::processWrite(SILInstruction *I, SILValue Val, SILValue Mem,
867+
DSEKind Kind) {
868+
auto *S = getBlockState(I);
861869
// Construct a LSLocation to represent the memory read by this instruction.
862870
// NOTE: The base will point to the actual object this inst is accessing,
863871
// not this particular field.
@@ -979,12 +987,12 @@ void DSEContext::processWrite(SILInstruction *I, BlockState *S, SILValue Val,
979987
}
980988

981989
void DSEContext::processLoadInst(SILInstruction *I, DSEKind Kind) {
982-
processRead(I, getBlockState(I), cast<LoadInst>(I)->getOperand(), Kind);
990+
processRead(I, cast<LoadInst>(I)->getOperand(), Kind);
983991
}
984992

985993
void DSEContext::processStoreInst(SILInstruction *I, DSEKind Kind) {
986994
auto *SI = cast<StoreInst>(I);
987-
processWrite(I, getBlockState(I), SI->getSrc(), SI->getDest(), Kind);
995+
processWrite(I, SI->getSrc(), SI->getDest(), Kind);
988996
}
989997

990998
void DSEContext::processDebugValueAddrInstForGenKillSet(SILInstruction *I) {
@@ -1091,7 +1099,7 @@ void DSEContext::processInstruction(SILInstruction *I, DSEKind Kind) {
10911099
}
10921100

10931101
// Check whether this instruction will invalidate any other locations.
1094-
invalidateLSLocationBase(I, Kind);
1102+
invalidateBase(I, getBlockState(I), Kind);
10951103
}
10961104

10971105
void DSEContext::runIterativeDSE() {
@@ -1132,46 +1140,30 @@ void DSEContext::runIterativeDSE() {
11321140
}
11331141

11341142
bool DSEContext::run() {
1135-
// Is this a one iteration function.
1136-
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
1137-
11381143
std::pair<int, int> LSCount = std::make_pair(0, 0);
11391144
// Walk over the function and find all the locations accessed by
11401145
// this function.
1141-
LSLocation::enumerateLSLocations(*F, LocationVault,
1142-
LocToBitIndex,
1146+
LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex,
11431147
BaseToLocIndex, TE, LSCount);
11441148

11451149
// Check how to optimize this function.
11461150
ProcessKind Kind = getProcessFunctionKind(LSCount.second);
11471151

11481152
// We do not optimize this function at all.
11491153
if (Kind == ProcessKind::ProcessNone)
1150-
return false;
1154+
return false;
11511155

11521156
// Do we run a pessimistic data flow ?
11531157
bool Optimistic = Kind == ProcessKind::ProcessOptimistic ? true : false;
11541158

11551159
// For all basic blocks in the function, initialize a BB state.
11561160
//
1157-
// DenseMap has a minimum size of 64, while many functions do not have more
1158-
// than 64 basic blocks. Therefore, allocate the BlockState in a vector and
1159-
// use pointer in BBToLocState to access them.
1160-
for (auto &B : *F) {
1161-
BlockStates.push_back(BlockState(&B));
1162-
// Since we know all the locations accessed in this function, we can resize
1163-
// the bit vector to the appropriate size.
1164-
BlockStates.back().init(*this, Optimistic);
1165-
}
1166-
11671161
// Initialize the BBToLocState mapping.
1168-
for (auto &S : BlockStates) {
1169-
BBToLocState[S.getBB()] = &S;
1170-
}
1171-
1172-
// Initialize the store bit state at the end of each basic block.
1173-
for (auto &X : BlockStates) {
1174-
X.initStoreSetAtEndOfBlock(*this);
1162+
unsigned LocationNum = this->getLocationVault().size();
1163+
for (auto &B : *F) {
1164+
auto *State = new (BPA) BlockState(&B, LocationNum, Optimistic);
1165+
BBToLocState[&B] = State;
1166+
State->initStoreSetAtEndOfBlock(*this);
11751167
}
11761168

11771169
// We perform dead store elimination in the following phases.
@@ -1198,6 +1190,8 @@ bool DSEContext::run() {
11981190

11991191
// The data flow has stabilized, run one last iteration over all the basic
12001192
// blocks and try to remove dead stores.
1193+
// Is this a one iteration function.
1194+
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
12011195
for (SILBasicBlock *B : PO->getPostOrder()) {
12021196
processBasicBlockForDSE(B, Optimistic);
12031197
}
@@ -1245,9 +1239,13 @@ class DeadStoreElimination : public SILFunctionTransform {
12451239
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
12461240
auto *RCFI = PM->getAnalysis<RCIdentityAnalysis>()->get(F);
12471241

1242+
// The allocator we are using.
1243+
llvm::BumpPtrAllocator BPA;
1244+
1245+
// The epilogue release matcher we are using.
12481246
ConsumedArgToEpilogueReleaseMatcher ERM(RCFI, F);
12491247

1250-
DSEContext DSE(F, &F->getModule(), PM, AA, EA, TE, ERM);
1248+
DSEContext DSE(F, &F->getModule(), PM, AA, EA, TE, BPA, ERM);
12511249
if (DSE.run()) {
12521250
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
12531251
}

test/SILOptimizer/dead_store_elim.sil

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,3 +1340,30 @@ bb0(%0 : $*Int):
13401340

13411341
sil @closure : $@convention(thin) (Bool, @in Int) -> Int
13421342

1343+
// Make sure we invalidate the store in bb1 when we process the SILargument. If not
1344+
// we could remove the store in bb1 which is incorrect.
1345+
//
1346+
// CHECK-LABEL: invalidate_base_for_silargument
1347+
// CHECK: bb1([[A0:%.+]] : $foo):
1348+
// CHECK: {{ store}}
1349+
// CHECK: bb2:
1350+
// CHECK: {{ store}}
1351+
sil hidden @invalidate_base_for_silargument : $@convention(thin) () -> () {
1352+
bb0:
1353+
%1 = alloc_ref $foo
1354+
%4 = integer_literal $Builtin.Int64, 12
1355+
%5 = struct $Int (%4 : $Builtin.Int64)
1356+
br bb1(%1 : $foo)
1357+
1358+
bb1(%2 : $foo):
1359+
%6 = ref_element_addr %2 : $foo, #foo.a
1360+
store %5 to %6 : $*Int
1361+
%9 = alloc_ref $foo
1362+
cond_br undef, bb1(%9: $foo), bb2
1363+
1364+
bb2:
1365+
%7 = ref_element_addr %2 : $foo, #foo.a
1366+
store %5 to %7 : $*Int
1367+
%14 = tuple ()
1368+
return %14 : $()
1369+
}

0 commit comments

Comments
 (0)