Skip to content

Commit 689fe40

Browse files
authored
Consistently optimize small added constants into load/store offsets (#1924)
See #1919 - we did not do this consistently before. This adds a lowMemoryUnused option to PassOptions. It can be passed on the commandline with --low-memory-unused. If enabled, we run the new optimize-added-constants pass, which does the real work here, replacing older code in post-emscripten. Aside from running at the proper time (unlike the old pass, see #1919), this also has a -propagate mode, which can do stuff like this: y = x + 10 [..] load(y) [..] load(y) => y = x + 10 [..] load(x, offset=10) [..] load(x, offset=10) That is, it can propagate such offsets to the loads/stores. This pattern is common in big interpreter loops, where the pointers are offsets into a big struct of state. The pass does this propagation by using a new feature of LocalGraph, which can verify which locals are in SSA mode. Binaryen IR is not SSA (intentionally, since it's a later IR), but if a local only has a single set for all gets, that means that local is in such a state, and can be optimized. The tricky thing is that all locals are initialized to zero, so there are at minimum two sets. But if we verify that the real set dominates all the gets, then the zero initialization cannot reach them, and we are safe. This PR also makes safe-heap aware of lowMemoryUnused. If so, we check for not just an access of 0, but the range 0-1023. This makes zlib 5% faster, with either the wasm backend or asm2wasm. It also makes it 0.5% smaller. Also helps sqlite (1.5% faster) and lua (1% faster)
1 parent f59c303 commit 689fe40

36 files changed

+26550
-15348
lines changed

build-js.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ echo "building shared bitcode"
117117
$BINARYEN_SRC/passes/MinifyImportsAndExports.cpp \
118118
$BINARYEN_SRC/passes/NameList.cpp \
119119
$BINARYEN_SRC/passes/NoExitRuntime.cpp \
120+
$BINARYEN_SRC/passes/OptimizeAddedConstants.cpp \
120121
$BINARYEN_SRC/passes/OptimizeInstructions.cpp \
121122
$BINARYEN_SRC/passes/PickLoadSigns.cpp \
122123
$BINARYEN_SRC/passes/PostEmscripten.cpp \

src/asm2wasm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
957957
}
958958
optimizingBuilder = make_unique<OptimizingIncrementalModuleBuilder>(&wasm, numFunctions, passOptions, [&](PassRunner& passRunner) {
959959
// addPrePasses
960+
passRunner.options.lowMemoryUnused = true;
960961
if (debug) {
961962
passRunner.setDebug(true);
962963
passRunner.setValidateGlobally(false);
@@ -1189,6 +1190,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
11891190
// functions). Optimize those now. Typically there are very few, just do it
11901191
// sequentially.
11911192
PassRunner passRunner(&wasm, passOptions);
1193+
passRunner.options.lowMemoryUnused = true;
11921194
passRunner.addDefaultFunctionOptimizationPasses();
11931195
for (auto& pair : trappingFunctions.getFunctions()) {
11941196
auto* func = pair.second;
@@ -1447,6 +1449,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
14471449
};
14481450

14491451
PassRunner passRunner(&wasm, passOptions);
1452+
passRunner.options.lowMemoryUnused = true;
14501453
passRunner.setFeatures(passOptions.features);
14511454
if (debug) {
14521455
passRunner.setDebug(true);

src/ir/LocalGraph.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,5 +242,38 @@ void LocalGraph::computeInfluences() {
242242
}
243243
}
244244

245+
void LocalGraph::computeSSAIndexes() {
246+
std::unordered_map<Index, std::set<SetLocal*>> indexSets;
247+
for (auto& pair : getSetses) {
248+
auto* get = pair.first;
249+
auto& sets = pair.second;
250+
for (auto* set : sets) {
251+
indexSets[get->index].insert(set);
252+
}
253+
}
254+
for (auto& pair : locations) {
255+
auto* curr = pair.first;
256+
if (auto* set = curr->dynCast<SetLocal>()) {
257+
auto& sets = indexSets[set->index];
258+
if (sets.size() == 1 && *sets.begin() != curr) {
259+
// While it has just one set, it is not the right one (us),
260+
// so mark it invalid.
261+
sets.clear();
262+
}
263+
}
264+
}
265+
for (auto& pair : indexSets) {
266+
auto index = pair.first;
267+
auto& sets = pair.second;
268+
if (sets.size() == 1) {
269+
SSAIndexes.insert(index);
270+
}
271+
}
272+
}
273+
274+
bool LocalGraph::isSSA(Index x) {
275+
return SSAIndexes.count(x);
276+
}
277+
245278
} // namespace wasm
246279

src/ir/local-graph.h

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,40 @@ struct LocalGraph {
4545
// value (0 for a var, the received value for a param)
4646
Locations locations; // where each get and set is (for easy replacing)
4747

48-
// optional computation: compute the influence graphs between sets and gets
49-
// (useful for algorithms that propagate changes)
48+
// Optional: compute the influence graphs between sets and gets
49+
// (useful for algorithms that propagate changes).
50+
51+
void computeInfluences();
5052

5153
std::unordered_map<GetLocal*, std::unordered_set<SetLocal*>> getInfluences; // for each get, the sets whose values are influenced by that get
5254
std::unordered_map<SetLocal*, std::unordered_set<GetLocal*>> setInfluences; // for each set, the gets whose values are influenced by that set
5355

54-
void computeInfluences();
56+
// Optional: Compute the local indexes that are SSA, in the sense of
57+
// * a single set for all the gets for that local index
58+
// * the set dominates all the gets (logically implied by the former property)
59+
// * no other set (aside from the zero-init)
60+
// The third property is not exactly standard SSA, but is useful since we are not in
61+
// SSA form in our IR. To see why it matters, consider these:
62+
//
63+
// x = 0 // zero init
64+
// [..]
65+
// x = 10
66+
// y = x + 20
67+
// x = 30 // !!!
68+
// f(y)
69+
//
70+
// The !!! line violates that property - it is another set for x, and it may interfere
71+
// say with replacing f(y) with f(x + 20). Instead, if we know the only other possible set for x
72+
// is the zero init, then things like the !!! line cannot exist, and it is valid to replace
73+
// f(y) with f(x + 20).
74+
// (This could be simpler, but in wasm the zero init always exists.)
75+
76+
void computeSSAIndexes();
77+
78+
bool isSSA(Index x);
79+
80+
private:
81+
std::set<Index> SSAIndexes;
5582
};
5683

5784
} // namespace wasm

src/pass.h

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,26 @@ struct PassRegistry {
5656
};
5757

5858
struct PassOptions {
59-
bool debug = false; // run passes in debug mode, doing extra validation and timing checks
60-
bool validate = true; // whether to run the validator to check for errors
61-
bool validateGlobally = false; // when validating validate globally and not just locally
62-
int optimizeLevel = 0; // 0, 1, 2 correspond to -O0, -O1, -O2, etc.
63-
int shrinkLevel = 0; // 0, 1, 2 correspond to -O0, -Os, -Oz
64-
bool ignoreImplicitTraps = false; // optimize assuming things like div by 0, bad load/store, will not trap
65-
bool debugInfo = false; // whether to try to preserve debug info through, which are special calls
66-
FeatureSet features = FeatureSet::All; // Which wasm features to accept, and be allowed to use
59+
// Run passes in debug mode, doing extra validation and timing checks.
60+
bool debug = false;
61+
// Whether to run the validator to check for errors.
62+
bool validate = true;
63+
// When validating validate globally and not just locally
64+
bool validateGlobally = false;
65+
// 0, 1, 2 correspond to -O0, -O1, -O2, etc.
66+
int optimizeLevel = 0;
67+
// 0, 1, 2 correspond to -O0, -Os, -Oz
68+
int shrinkLevel = 0;
69+
// Optimize assuming things like div by 0, bad load/store, will not trap.
70+
bool ignoreImplicitTraps = false;
71+
// Optimize assuming that the low 1K of memory is not valid memory for the application
72+
// to use. In that case, we can optimize load/store offsets in many cases.
73+
bool lowMemoryUnused = false;
74+
enum { LowMemoryBound = 1024 };
75+
// Whether to try to preserve debug info through, which are special calls.
76+
bool debugInfo = false;
77+
// Which wasm features to accept, and be allowed to use.
78+
FeatureSet features = FeatureSet::All;
6779

6880
void setDefaultOptimizationOptions() {
6981
// -Os is our default

src/passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ SET(passes_SOURCES
3131
MinifyImportsAndExports.cpp
3232
NameList.cpp
3333
NoExitRuntime.cpp
34+
OptimizeAddedConstants.cpp
3435
OptimizeInstructions.cpp
3536
PickLoadSigns.cpp
3637
PostEmscripten.cpp

src/passes/CodePushing.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ struct LocalAnalyzer : public PostWalker<LocalAnalyzer> {
7777
}
7878
};
7979

80-
// Implement core optimization logic in a struct, used and then discarded entirely
81-
// for each block
80+
// Implements core optimization logic. Used and then discarded entirely
81+
// for each block.
8282
class Pusher {
8383
ExpressionList& list;
8484
LocalAnalyzer& analyzer;
@@ -92,7 +92,7 @@ class Pusher {
9292
// continuing forward.
9393
Index relevant = list.size() - 1; // we never need to push past a final element, as
9494
// we couldn't be used after it.
95-
Index nothing = -1;
95+
const Index nothing = -1;
9696
Index i = 0;
9797
Index firstPushable = nothing;
9898
while (i < relevant) {

0 commit comments

Comments
 (0)