Skip to content

Commit b17935a

Browse files
committed
[pred-memopt] Rather than extracting values at the load site, extract at the store site.
Previously in PredMemOpts, we would insert any extracts at the load site, i.e.: store %foo to %0 ... %1 = struct_element_addr %0, $Type, $Type.field %2 = load %1 ... apply %use(%2) would transform to: store %foo to %0 ... %2 = struct_extract %foo apply %use(%2) This framework will not work with Ownership enabled since the value stored is considered consumed by the store. This change fixes the issue by moving such non-destructive extracts to occur while %foo is considered live, i.e. before the store: %2 = struct_extract %foo store %foo to %0 ... apply %use(%2) This means that we have to store insertion points for each store that provides us with available values and insert the extracts at those points. This creates some complications in the case where we have multiple stores since we need to deal with phi nodes. Rather than dealing with it by hand, we just insert the extracts at each point and then use the SSA updater to insert the relevant phi nodes. rdar://31521023
1 parent 49f5c76 commit b17935a

File tree

3 files changed

+493
-67
lines changed

3 files changed

+493
-67
lines changed

include/swift/SIL/SILBuilder.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2049,6 +2049,35 @@ class SILBuilderWithScope : public SILBuilder {
20492049
}
20502050
};
20512051

2052+
class SavedInsertionPointRAII {
2053+
SILBuilder &Builder;
2054+
PointerUnion<SILInstruction *, SILBasicBlock *> SavedIP;
2055+
2056+
public:
2057+
SavedInsertionPointRAII(SILBuilder &B, SILInstruction *NewIP)
2058+
: Builder(B), SavedIP(&*B.getInsertionPoint()) {
2059+
Builder.setInsertionPoint(NewIP);
2060+
}
2061+
2062+
SavedInsertionPointRAII(SILBuilder &B, SILBasicBlock *NewIP)
2063+
: Builder(B), SavedIP(B.getInsertionBB()) {
2064+
Builder.setInsertionPoint(NewIP);
2065+
}
2066+
2067+
SavedInsertionPointRAII(const SavedInsertionPointRAII &) = delete;
2068+
SavedInsertionPointRAII &operator=(const SavedInsertionPointRAII &) = delete;
2069+
SavedInsertionPointRAII(SavedInsertionPointRAII &&) = delete;
2070+
SavedInsertionPointRAII &operator=(SavedInsertionPointRAII &&) = delete;
2071+
2072+
~SavedInsertionPointRAII() {
2073+
if (SavedIP.is<SILInstruction *>()) {
2074+
Builder.setInsertionPoint(SavedIP.get<SILInstruction *>());
2075+
} else {
2076+
Builder.setInsertionPoint(SavedIP.get<SILBasicBlock *>());
2077+
}
2078+
}
2079+
};
2080+
20522081
} // end swift namespace
20532082

20542083
#endif

lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp

Lines changed: 116 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212

1313
#define DEBUG_TYPE "predictable-memopt"
1414

15-
#include "swift/SILOptimizer/PassManager/Passes.h"
1615
#include "DIMemoryUseCollector.h"
1716
#include "swift/SIL/SILBuilder.h"
18-
#include "swift/SILOptimizer/Utils/Local.h"
17+
#include "swift/SILOptimizer/PassManager/Passes.h"
1918
#include "swift/SILOptimizer/PassManager/Transforms.h"
19+
#include "swift/SILOptimizer/Utils/Local.h"
20+
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
2021
#include "llvm/ADT/SmallBitVector.h"
2122
#include "llvm/ADT/Statistic.h"
2223
#include "llvm/Support/Compiler.h"
@@ -91,14 +92,14 @@ static SILValue getAccessPathRoot(SILValue Pointer) {
9192
/// If this pointer is to within an existential projection, it returns ~0U.
9293
static unsigned computeSubelement(SILValue Pointer,
9394
SingleValueInstruction *RootInst) {
94-
unsigned SubEltNumber = 0;
95+
unsigned SubElementNumber = 0;
9596
SILModule &M = RootInst->getModule();
9697

9798
while (1) {
9899
// If we got to the root, we're done.
99100
if (RootInst == Pointer)
100-
return SubEltNumber;
101-
101+
return SubElementNumber;
102+
102103
if (auto *PBI = dyn_cast<ProjectBoxInst>(Pointer)) {
103104
Pointer = PBI->getOperand();
104105
continue;
@@ -114,7 +115,7 @@ static unsigned computeSubelement(SILValue Pointer,
114115

115116
// Keep track of what subelement is being referenced.
116117
for (unsigned i = 0, e = TEAI->getFieldNo(); i != e; ++i) {
117-
SubEltNumber += getNumSubElements(TT.getTupleElementType(i), M);
118+
SubElementNumber += getNumSubElements(TT.getTupleElementType(i), M);
118119
}
119120
Pointer = TEAI->getOperand();
120121
continue;
@@ -127,7 +128,7 @@ static unsigned computeSubelement(SILValue Pointer,
127128
StructDecl *SD = SEAI->getStructDecl();
128129
for (auto *D : SD->getStoredProperties()) {
129130
if (D == SEAI->getField()) break;
130-
SubEltNumber += getNumSubElements(ST.getFieldType(D, M), M);
131+
SubElementNumber += getNumSubElements(ST.getFieldType(D, M), M);
131132
}
132133

133134
Pointer = SEAI->getOperand();
@@ -149,14 +150,26 @@ static unsigned computeSubelement(SILValue Pointer,
149150
namespace {
150151

151152
struct AvailableValue {
153+
/// If this gets too expensive in terms of copying, we can use an arena and a
154+
/// FrozenPtrSet like we do in ARC.
155+
using SetVector = llvm::SmallSetVector<SILInstruction *, 1>;
156+
152157
SILValue Value;
153158
unsigned SubElementNumber;
159+
SetVector InsertionPoints;
154160

155161
public:
156162
AvailableValue() = default;
157163

158-
AvailableValue(SILValue Value, unsigned SubElementNumber)
159-
: Value(Value), SubElementNumber(SubElementNumber) {}
164+
/// Main initializer for available values.
165+
///
166+
/// *NOTE* We assume that all available values start with a singular insertion
167+
/// point and insertion points are added by merging.
168+
AvailableValue(SILValue Value, unsigned SubElementNumber,
169+
SILInstruction *InsertPoint)
170+
: Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() {
171+
InsertionPoints.insert(InsertPoint);
172+
}
160173

161174
/// Deleted copy constructor. This is a move only type.
162175
AvailableValue(const AvailableValue &) = delete;
@@ -166,15 +179,17 @@ struct AvailableValue {
166179

167180
/// Move constructor.
168181
AvailableValue(AvailableValue &&Other)
169-
: Value(nullptr), SubElementNumber(~0) {
182+
: Value(nullptr), SubElementNumber(~0), InsertionPoints() {
170183
std::swap(Value, Other.Value);
171184
std::swap(SubElementNumber, Other.SubElementNumber);
185+
std::swap(InsertionPoints, Other.InsertionPoints);
172186
}
173187

174188
/// Move operator.
175189
AvailableValue &operator=(AvailableValue &&Other) {
176190
std::swap(Value, Other.Value);
177191
std::swap(SubElementNumber, Other.SubElementNumber);
192+
std::swap(InsertionPoints, Other.InsertionPoints);
178193
return *this;
179194
}
180195

@@ -191,24 +206,70 @@ struct AvailableValue {
191206
SILValue getValue() const { return Value; }
192207
SILType getType() const { return Value->getType(); }
193208
unsigned getSubElementNumber() const { return SubElementNumber; }
209+
ArrayRef<SILInstruction *> getInsertionPoints() const {
210+
return InsertionPoints.getArrayRef();
211+
}
212+
213+
void mergeInsertionPoints(const AvailableValue &Other) & {
214+
assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber);
215+
InsertionPoints.set_union(Other.InsertionPoints);
216+
}
217+
218+
void addInsertionPoint(SILInstruction *I) & { InsertionPoints.insert(I); }
194219

195220
/// TODO: This needs a better name.
196221
AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D,
197-
unsigned SubEltNumber) const {
222+
unsigned SubElementNumber) const {
198223
SILValue NewValue = B.emitStructExtract(Loc, Value, D);
199-
return {NewValue, SubEltNumber};
224+
return {NewValue, SubElementNumber, InsertionPoints};
200225
}
201226

202227
/// TODO: This needs a better name.
203228
AvailableValue emitTupleExtract(SILBuilder &B, SILLocation Loc,
204-
unsigned EltNo, unsigned SubEltNumber) const {
229+
unsigned EltNo,
230+
unsigned SubElementNumber) const {
205231
SILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo);
206-
return {NewValue, SubEltNumber};
232+
return {NewValue, SubElementNumber, InsertionPoints};
207233
}
234+
235+
void dump() const __attribute__((used));
236+
void print(llvm::raw_ostream &os) const;
237+
238+
private:
239+
/// Private constructor for use by emitStructExtract and emitTupleExtract.
240+
AvailableValue(SILValue Value, unsigned SubElementNumber,
241+
const SetVector &InsertPoints)
242+
: Value(Value), SubElementNumber(SubElementNumber),
243+
InsertionPoints(InsertPoints) {}
208244
};
209245

210246
} // end anonymous namespace
211247

248+
void AvailableValue::dump() const { print(llvm::dbgs()); }
249+
250+
void AvailableValue::print(llvm::raw_ostream &os) const {
251+
os << "Available Value Dump. Value: ";
252+
if (getValue()) {
253+
os << getValue();
254+
} else {
255+
os << "NoValue;\n";
256+
}
257+
os << "SubElementNumber: " << getSubElementNumber() << "\n";
258+
os << "Insertion Points:\n";
259+
for (auto *I : getInsertionPoints()) {
260+
os << *I;
261+
}
262+
}
263+
264+
namespace llvm {
265+
266+
llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) {
267+
V.print(os);
268+
return os;
269+
}
270+
271+
} // end llvm namespace
272+
212273
//===----------------------------------------------------------------------===//
213274
// Subelement Extraction
214275
//===----------------------------------------------------------------------===//
@@ -301,7 +362,7 @@ class AvailableValueAggregator {
301362
SILValue aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt);
302363

303364
void print(llvm::raw_ostream &os) const;
304-
void dump() const;
365+
void dump() const __attribute__((used));
305366

306367
private:
307368
SILValue aggregateFullyAvailableValue(SILType LoadTy, unsigned FirstElt);
@@ -321,8 +382,7 @@ void AvailableValueAggregator::print(llvm::raw_ostream &os) const {
321382
os << "Available Value List, N = " << AvailableValueList.size()
322383
<< ". Elts:\n";
323384
for (auto &V : AvailableValueList) {
324-
os << "Value: " << V.getValue()
325-
<< "SubElementNumber: " << V.getSubElementNumber() << "\n";
385+
os << V;
326386
}
327387
}
328388

@@ -440,14 +500,30 @@ SILValue AvailableValueAggregator::handlePrimitiveValue(SILType LoadTy,
440500
return B.createLoad(Loc, Address, LoadOwnershipQualifier::Unqualified);
441501
}
442502

443-
// If we have an available value, we know that we know that the available
444-
// value is already being consumed by the store. This means that we must
445-
// insert a copy of EltVal after we extract it if we do not have a trivial
446-
// value. We use SILBuilder::emit*Operation to handle both trivial/non-trivial
447-
// cases without needing to introduce control flow here.
503+
// If we have 1 insertion point, just extract the value and return.
504+
//
505+
// This saves us from having to spend compile time in the SSA updater in this
506+
// case.
507+
ArrayRef<SILInstruction *> InsertPts = Val.getInsertionPoints();
508+
if (InsertPts.size() == 1) {
509+
SavedInsertionPointRAII SavedInsertPt(B, InsertPts[0]);
510+
SILValue EltVal = nonDestructivelyExtractSubElement(Val, B, Loc);
511+
assert(EltVal->getType() == LoadTy && "Subelement types mismatch");
512+
return EltVal;
513+
}
514+
515+
// If we have an available value, then we want to extract the subelement from
516+
// the borrowed aggregate before each insertion point.
517+
SILSSAUpdater Updater;
518+
Updater.Initialize(LoadTy);
519+
for (auto *I : Val.getInsertionPoints()) {
520+
SavedInsertionPointRAII SavedInsertPt(B, I);
521+
SILValue EltVal = nonDestructivelyExtractSubElement(Val, B, Loc);
522+
Updater.AddAvailableValue(I->getParent(), EltVal);
523+
}
448524

449-
// Then extract the subelement from the borrowed aggregate.
450-
SILValue EltVal = nonDestructivelyExtractSubElement(Val, B, Loc);
525+
// Finally, grab the value from the SSA updater.
526+
SILValue EltVal = Updater.GetValueInMiddleOfBlock(B.getInsertionBB());
451527
assert(EltVal->getType() == LoadTy && "Subelement types mismatch");
452528
return EltVal;
453529
}
@@ -587,6 +663,7 @@ void AllocOptimize::updateAvailableValues(
587663
SILInstruction *Inst, llvm::SmallBitVector &RequiredElts,
588664
SmallVectorImpl<AvailableValue> &Result,
589665
llvm::SmallBitVector &ConflictingValues) {
666+
590667
// Handle store and assign.
591668
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
592669
unsigned StartSubElt = computeSubelement(SI->getDest(), TheMemory);
@@ -601,12 +678,19 @@ void AllocOptimize::updateAvailableValues(
601678
// there already is a result, check it for conflict. If there is no
602679
// conflict, then we're ok.
603680
auto &Entry = Result[StartSubElt+i];
604-
if (!Entry)
605-
Entry = {SI->getSrc(), i};
606-
else if (Entry.getValue() != SI->getSrc() ||
607-
Entry.getSubElementNumber() != i)
608-
ConflictingValues[StartSubElt+i] = true;
609-
681+
if (!Entry) {
682+
Entry = {SI->getSrc(), i, Inst};
683+
} else {
684+
// TODO: This is /really/, /really/, conservative. This basically means
685+
// that if we do not have an identical store, we will not promote.
686+
if (Entry.getValue() != SI->getSrc() ||
687+
Entry.getSubElementNumber() != i) {
688+
ConflictingValues[StartSubElt + i] = true;
689+
} else {
690+
Entry.addInsertionPoint(Inst);
691+
}
692+
}
693+
610694
// This element is now provided.
611695
RequiredElts[StartSubElt+i] = false;
612696
}
@@ -682,8 +766,8 @@ void AllocOptimize::computeAvailableValues(
682766
if (!ConflictingValues.none())
683767
for (unsigned i = 0, e = Result.size(); i != e; ++i)
684768
if (ConflictingValues[i])
685-
Result[i] = { SILValue(), 0U };
686-
769+
Result[i] = {};
770+
687771
return;
688772
}
689773

0 commit comments

Comments
 (0)