Skip to content

Commit 2372862

Browse files
authored
Redundant Set Elimination pass (#1344)
This optimizes #1343. It looks for stores of a value that is already present in the local, which in particular can remove the initial set to 0 of loops starting at zero, since all locals are initialized to that already. This helps in real-world code, but is not super-common since coalescing means we tend to have assigned something else to it anyhow before we need it to be zero, so this mainly helps in small functions (and running this before coalescing would extend live ranges in potentially bad ways).
1 parent 4c55e49 commit 2372862

34 files changed

+1223
-497
lines changed

build-js.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ echo "building shared bitcode"
9797
src/passes/Precompute.cpp \
9898
src/passes/Print.cpp \
9999
src/passes/PrintCallGraph.cpp \
100+
src/passes/RedundantSetElimination.cpp \
100101
src/passes/RelooperJumpThreading.cpp \
101102
src/passes/RemoveImports.cpp \
102103
src/passes/RemoveMemory.cpp \

src/cfg/liveness-traversal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include <wasm-printing.h>
21
/*
32
* Copyright 2017 WebAssembly Community Group participants
43
*

src/passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ SET(passes_SOURCES
2626
Precompute.cpp
2727
Print.cpp
2828
PrintCallGraph.cpp
29+
RedundantSetElimination.cpp
2930
RelooperJumpThreading.cpp
3031
ReReloop.cpp
3132
RemoveImports.cpp
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
/*
2+
* Copyright 2017 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
//
18+
// Eliminate redundant set_locals: if a local already has a particular
19+
// value, we don't need to set it again. A common case here is loops
20+
// that start at zero, since the default value is initialized to
21+
// zero anyhow.
22+
//
23+
// A risk here is that we extend live ranges, e.g. we may use the default
24+
// value at the very end of a function, keeping that local alive throughout.
25+
// For that reason it is probably better to run this near the end of
26+
// optimization, and especially after coalesce-locals. A final vaccum
27+
// should be done after it, as this pass can leave around drop()s of
28+
// values no longer necessary.
29+
//
30+
// So far this tracks constant values, and for everything else it considers
31+
// them unique (so each set_local of a non-constant is a unique value, each
32+
// merge is a unique value, etc.; there is no sophisticated value numbering
33+
// here).
34+
//
35+
36+
#include <wasm.h>
37+
#include <pass.h>
38+
#include <wasm-builder.h>
39+
#include <cfg/cfg-traversal.h>
40+
#include <ir/literal-utils.h>
41+
#include <ir/utils.h>
42+
#include <support/unique_deferring_queue.h>
43+
44+
namespace wasm {
45+
46+
// We do a very simple numbering of local values, just a unique
47+
// number for constants so far, enough to see
48+
// trivial duplication. LocalValues maps each local index to
49+
// its current value
50+
typedef std::vector<Index> LocalValues;
51+
52+
// information in a basic block
53+
struct Info {
54+
LocalValues start, end; // the local values at the start and end of the block
55+
std::vector<Expression**> setps;
56+
};
57+
58+
struct RedundantSetElimination : public WalkerPass<CFGWalker<RedundantSetElimination, Visitor<RedundantSetElimination>, Info>> {
59+
bool isFunctionParallel() override { return true; }
60+
61+
Pass* create() override { return new RedundantSetElimination(); }
62+
63+
Index numLocals;
64+
65+
// cfg traversal work
66+
67+
static void doVisitSetLocal(RedundantSetElimination* self, Expression** currp) {
68+
if (self->currBasicBlock) {
69+
self->currBasicBlock->contents.setps.push_back(currp);
70+
}
71+
}
72+
73+
// main entry point
74+
75+
void doWalkFunction(Function* func) {
76+
numLocals = func->getNumLocals();
77+
// create the CFG by walking the IR
78+
CFGWalker<RedundantSetElimination, Visitor<RedundantSetElimination>, Info>::doWalkFunction(func);
79+
// flow values across blocks
80+
flowValues(func);
81+
// remove redundant sets
82+
optimize();
83+
}
84+
85+
// numbering
86+
87+
Index nextValue = 1; // 0 is reserved for the "unseen value"
88+
std::unordered_map<Literal, Index> literalValues; // each constant has a value
89+
std::unordered_map<Expression*, Index> expressionValues; // each value can have a value
90+
std::unordered_map<BasicBlock*, std::unordered_map<Index, Index>> blockMergeValues; // each block has values for each merge
91+
92+
Index getUnseenValue() { // we haven't seen this location yet
93+
return 0;
94+
}
95+
Index getUniqueValue() {
96+
#ifdef RSE_DEBUG
97+
std::cout << "new unique value " << nextValue << '\n';
98+
#endif
99+
return nextValue++;
100+
}
101+
102+
Index getLiteralValue(Literal lit) {
103+
auto iter = literalValues.find(lit);
104+
if (iter != literalValues.end()) {
105+
return iter->second;
106+
}
107+
#ifdef RSE_DEBUG
108+
std::cout << "new literal value for " << lit << '\n';
109+
#endif
110+
return literalValues[lit] = getUniqueValue();
111+
}
112+
113+
Index getExpressionValue(Expression* expr) {
114+
auto iter = expressionValues.find(expr);
115+
if (iter != expressionValues.end()) {
116+
return iter->second;
117+
}
118+
#ifdef RSE_DEBUG
119+
std::cout << "new expr value for " << expr << '\n';
120+
#endif
121+
return expressionValues[expr] = getUniqueValue();
122+
}
123+
124+
Index getBlockMergeValue(BasicBlock* block, Index index) {
125+
auto& mergeValues = blockMergeValues[block];
126+
auto iter = mergeValues.find(index);
127+
if (iter != mergeValues.end()) {
128+
return iter->second;
129+
}
130+
#ifdef RSE_DEBUG
131+
std::cout << "new block-merge value for " << block << " : " << index << '\n';
132+
#endif
133+
return mergeValues[index] = getUniqueValue();
134+
}
135+
136+
bool isBlockMergeValue(BasicBlock* block, Index index, Index value) {
137+
auto iter = blockMergeValues.find(block);
138+
if (iter == blockMergeValues.end()) return false;
139+
auto& mergeValues = iter->second;
140+
auto iter2 = mergeValues.find(index);
141+
if (iter2 == mergeValues.end()) return false;
142+
return value == iter2->second;
143+
}
144+
145+
Index getValue(Expression* value, LocalValues& currValues) {
146+
if (auto* c = value->dynCast<Const>()) {
147+
// a constant
148+
return getLiteralValue(c->value);
149+
} else if (auto* get = value->dynCast<GetLocal>()) {
150+
// a copy of whatever that was
151+
return currValues[get->index];
152+
} else {
153+
// get the value's own unique value
154+
return getExpressionValue(value);
155+
}
156+
}
157+
158+
// flowing
159+
160+
void flowValues(Function* func) {
161+
for (auto& block : basicBlocks) {
162+
LocalValues& start = block->contents.start;
163+
start.resize(numLocals);
164+
if (block.get() == entry) {
165+
// params are complex values we can't optimize; vars are zeros
166+
for (Index i = 0; i < numLocals; i++) {
167+
if (func->isParam(i)) {
168+
#ifdef RSE_DEBUG
169+
std::cout << "new param value for " << i << '\n';
170+
#endif
171+
start[i] = getUniqueValue();
172+
} else {
173+
start[i] = getLiteralValue(LiteralUtils::makeLiteralZero(func->getLocalType(i)));
174+
}
175+
}
176+
} else {
177+
// other blocks have all unseen values to begin with
178+
for (Index i = 0; i < numLocals; i++) {
179+
start[i] = getUnseenValue();
180+
}
181+
}
182+
// the ends all begin unseen
183+
LocalValues& end = block->contents.end;
184+
end.resize(numLocals);
185+
for (Index i = 0; i < numLocals; i++) {
186+
end[i] = getUnseenValue();
187+
}
188+
}
189+
// keep working while stuff is flowing. we use a unique deferred queue
190+
// which ensures both FIFO and that we don't do needless work - if
191+
// A and B reach C, and both queue C, we only want to do C at the latest
192+
// time, when we have information from all those reaching it.
193+
UniqueDeferredQueue<BasicBlock*> work;
194+
work.push(entry);
195+
while (!work.empty()) {
196+
auto* curr = work.pop();
197+
#ifdef RSE_DEBUG
198+
std::cout << "flow block " << curr << '\n';
199+
#endif
200+
// process a block: first, update its start based on those reaching it
201+
if (!curr->in.empty()) {
202+
if (curr->in.size() == 1) {
203+
// just copy the pred, nothing to merge
204+
curr->contents.start = (*curr->in.begin())->contents.end;
205+
} else {
206+
// perform a merge
207+
auto in = curr->in;
208+
for (Index i = 0; i < numLocals; i++) {
209+
auto old = curr->contents.start[i];
210+
// If we already had a merge value here, keep it.
211+
// TODO This may have some false positives, as we may e.g. have
212+
// a single pred that first gives us x, then later y after
213+
// flow led to a merge, and we may see x and y at the same
214+
// time due to flow from a successor, and then it looks like
215+
// we need a merge but we don't. avoiding that would require
216+
// more memory and is probably not worth it, but might be
217+
// worth investigating
218+
// NB While suboptimal, this simplification provides a simple proof
219+
// of convergence. We prove that, in each fixed block+local,
220+
// the value number at the end is nondecreasing across
221+
// iterations, by induction on the iteration:
222+
// * The first iteration is on the entry block. It increases
223+
// the value number at the end from 0 (unseen) to something
224+
// else (a value number for 0 for locals, a unique value
225+
// for params; all >0).
226+
// * Induction step: assuming the property holds for all past
227+
// iterations, consider the current iteration. Of our
228+
// predecessors, those that we iterated on have the property;
229+
// those that we haven't will have 0 (unseen).
230+
// * If we assign to that local in this block, that will be
231+
// the value in the output, forever, and it is greater
232+
// than the initial value of 0.
233+
// * If we see different values coming in, we create a merge
234+
// value number. Its number is higher than everything
235+
// else since we give it the next available number, so we
236+
// do not decrease in this iteration, and we will output
237+
// the same value in the future too (here is where we use
238+
// the simplification property).
239+
// * Otherwise, we will flow the incoming value through,
240+
// and it did not decrease (by induction), so neither do
241+
// we.
242+
// Finally, given value numbers are nondecreasing, we must
243+
// converge since we only keep working as long as we see new
244+
// values at the end of a block.
245+
//
246+
// Not that we don't trust this proof, but the convergence
247+
// property (value numbers at block ends do not decrease) is
248+
// verified later down.
249+
if (isBlockMergeValue(curr, i, old)) {
250+
continue;
251+
}
252+
auto iter = in.begin();
253+
auto value = (*iter)->contents.end[i];
254+
iter++;
255+
while (iter != in.end()) {
256+
auto otherValue = (*iter)->contents.end[i];
257+
if (value == getUnseenValue()) {
258+
value = otherValue;
259+
} else if (otherValue == getUnseenValue()) {
260+
// nothing to do, other has no information
261+
} else if (value != otherValue) {
262+
// 2 different values, this is a merged value
263+
value = getBlockMergeValue(curr, i);
264+
break; // no more work once we see a merge
265+
}
266+
iter++;
267+
}
268+
curr->contents.start[i] = value;
269+
}
270+
}
271+
}
272+
#ifdef RSE_DEBUG
273+
dump("start", curr->contents.start);
274+
#endif
275+
// flow values through it, then add those we can reach if they need an update.
276+
auto currValues = curr->contents.start; // we'll modify this as we go
277+
auto& setps = curr->contents.setps;
278+
for (auto** setp : setps) {
279+
auto* set = (*setp)->cast<SetLocal>();
280+
currValues[set->index] = getValue(set->value, currValues);
281+
}
282+
if (currValues == curr->contents.end) {
283+
// nothing changed, so no more work to do
284+
// note that the first iteration this is always not the case,
285+
// since end contains unseen (and then the comparison ends on
286+
// the first element)
287+
continue;
288+
}
289+
// update the end state and update children
290+
#ifndef NDEBUG
291+
// verify the convergence property mentioned in the NB comment
292+
// above: the value numbers at the end must be nondecreasing
293+
for (Index i = 0; i < numLocals; i++) {
294+
assert(currValues[i] >= curr->contents.end[i]);
295+
}
296+
#endif
297+
curr->contents.end.swap(currValues);
298+
#ifdef RSE_DEBUG
299+
dump("end ", curr->contents.end);
300+
#endif
301+
for (auto* next : curr->out) {
302+
work.push(next);
303+
}
304+
}
305+
}
306+
307+
// optimizing
308+
void optimize() {
309+
// in each block, run the values through the sets,
310+
// and remove redundant sets when we see them
311+
for (auto& block : basicBlocks) {
312+
auto currValues = block->contents.start; // we'll modify this as we go
313+
auto& setps = block->contents.setps;
314+
for (auto** setp : setps) {
315+
auto* set = (*setp)->cast<SetLocal>();
316+
auto oldValue = currValues[set->index];
317+
auto newValue = getValue(set->value, currValues);
318+
auto index = set->index;
319+
if (newValue == oldValue) {
320+
remove(setp);
321+
continue; // no more work to do
322+
}
323+
// update for later steps
324+
currValues[index] = newValue;
325+
}
326+
}
327+
}
328+
329+
void remove(Expression** setp) {
330+
auto* set = (*setp)->cast<SetLocal>();
331+
auto* value = set->value;
332+
if (!set->isTee()) {
333+
auto* drop = ExpressionManipulator::convert<SetLocal, Drop>(set);
334+
drop->value = value;
335+
drop->finalize();
336+
} else {
337+
*setp = value;
338+
}
339+
}
340+
341+
// debugging
342+
343+
void dump(BasicBlock* block) {
344+
std::cout << "====\n";
345+
if (block) {
346+
std::cout << "block: " << block << '\n';
347+
for (auto* out : block->out) {
348+
std::cout << " goes to " << out << '\n';
349+
}
350+
}
351+
for (Index i = 0; i < block->contents.start.size(); i++) {
352+
std::cout << " start[" << i << "] = " << block->contents.start[i] << '\n';
353+
}
354+
for (auto** setp : block->contents.setps) {
355+
std::cout << " " << *setp << '\n';
356+
}
357+
std::cout << "====\n";
358+
}
359+
360+
void dump(const char* desc, LocalValues& values) {
361+
std::cout << desc << ": ";
362+
for (auto x : values) {
363+
std::cout << x << ' ';
364+
}
365+
std::cout << '\n';
366+
}
367+
};
368+
369+
Pass *createRedundantSetEliminationPass() {
370+
return new RedundantSetElimination();
371+
}
372+
373+
} // namespace wasm
374+

0 commit comments

Comments
 (0)