Skip to content

Commit 0a9ddae

Browse files
authored
Global optimization fixes (#1360)
* run dfe at the very end, as it may be more effective after inlining * optimize reorder-functions * do a final dfe in asm2wasm after all other opts * make inlining deterministic: std::atomic<T> values are not zero-initialized * do global post opts at the end of asm2wasm, and don't also do them in the module builder * fix function type removing * don't inline+optimize when preserving debug info
1 parent 01b2398 commit 0a9ddae

29 files changed

+638
-47717
lines changed

src/asm2wasm.h

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -966,21 +966,6 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
966966
}
967967
// optimize relooper label variable usage at the wasm level, where it is easy
968968
passRunner.add("relooper-jump-threading");
969-
}, [&]() {
970-
// beforeGlobalOptimizations
971-
// if we added any helper functions (like non-trapping i32-div, etc.), then those
972-
// have not been optimized (the optimizing builder has just been fed the asm.js
973-
// functions). Optimize those now. Typically there are very few, just do it
974-
// sequentially.
975-
PassRunner passRunner(&wasm, passOptions);
976-
passRunner.addDefaultFunctionOptimizationPasses();
977-
for (auto& pair : trappingFunctions.getFunctions()) {
978-
auto* func = pair.second;
979-
passRunner.runOnFunction(func);
980-
}
981-
for (auto* func : extraSupportFunctions) {
982-
passRunner.runOnFunction(func);
983-
}
984969
}, debug, false /* do not validate globally yet */);
985970
}
986971

@@ -1190,6 +1175,19 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
11901175

11911176
if (runOptimizationPasses) {
11921177
optimizingBuilder->finish();
1178+
// if we added any helper functions (like non-trapping i32-div, etc.), then those
1179+
// have not been optimized (the optimizing builder has just been fed the asm.js
1180+
// functions). Optimize those now. Typically there are very few, just do it
1181+
// sequentially.
1182+
PassRunner passRunner(&wasm, passOptions);
1183+
passRunner.addDefaultFunctionOptimizationPasses();
1184+
for (auto& pair : trappingFunctions.getFunctions()) {
1185+
auto* func = pair.second;
1186+
passRunner.runOnFunction(func);
1187+
}
1188+
for (auto* func : extraSupportFunctions) {
1189+
passRunner.runOnFunction(func);
1190+
}
11931191
}
11941192
wasm.debugInfoFileNames = std::move(preprocessor.debugInfoFileNames);
11951193

@@ -1382,7 +1380,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
13821380
}
13831381
};
13841382

1385-
PassRunner passRunner(&wasm);
1383+
PassRunner passRunner(&wasm, passOptions);
13861384
passRunner.setFeatures(passOptions.features);
13871385
if (debug) {
13881386
passRunner.setDebug(true);
@@ -1411,6 +1409,11 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
14111409
passRunner.add<ApplyDebugInfo>();
14121410
passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing?
14131411
}
1412+
if (runOptimizationPasses) {
1413+
// do final global optimizations after all function work is done
1414+
// (e.g. duplicate funcs may appear thanks to that work)
1415+
passRunner.addDefaultGlobalOptimizationPostPasses();
1416+
}
14141417
passRunner.run();
14151418

14161419
// remove the debug info intrinsic

src/passes/Inlining.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,15 @@ static const int FLEXIBLE_SIZE_LIMIT = 20;
5050
struct FunctionInfo {
5151
std::atomic<Index> calls;
5252
Index size;
53-
bool lightweight = true;
54-
bool usedGlobally = false; // in a table or export
53+
std::atomic<bool> lightweight;
54+
bool usedGlobally; // in a table or export
55+
56+
FunctionInfo() {
57+
calls = 0;
58+
size = 0;
59+
lightweight = true;
60+
usedGlobally = false;
61+
}
5562

5663
bool worthInlining(PassOptions& options, bool allowMultipleInliningsPerFunction) {
5764
// if it's big, it's just not worth doing (TODO: investigate more)

src/passes/RemoveUnusedModuleElements.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ struct RemoveUnusedModuleElements : public Pass {
268268
module->functionTypes.erase(std::remove_if(module->functionTypes.begin(), module->functionTypes.end(), [&needed](std::unique_ptr<FunctionType>& type) {
269269
return needed.count(type.get()) == 0;
270270
}), module->functionTypes.end());
271+
module->updateMaps();
271272
}
272273
};
273274

src/passes/ReorderFunctions.cpp

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
// binaries because fewer bytes are needed to encode references to frequently
2020
// used functions.
2121
//
22+
// This may incur a tradeoff, though, as while it reduces binary size, it may
23+
// increase gzip size. This might be because the new order has the functions in
24+
// a less beneficial position for compression, that is, mutually-compressible
25+
// functions are no longer together (when they were before, in the original order,
26+
// the has some natural tendency one way or the other). TODO: investigate
27+
// similarity ordering here.
28+
//
2229

2330

2431
#include <memory>
@@ -28,10 +35,41 @@
2835

2936
namespace wasm {
3037

31-
struct ReorderFunctions : public WalkerPass<PostWalker<ReorderFunctions>> {
32-
std::map<Name, uint32_t> counts;
38+
typedef std::unordered_map<Name, std::atomic<Index>> NameCountMap;
39+
40+
struct CallCountScanner : public WalkerPass<PostWalker<CallCountScanner>> {
41+
bool isFunctionParallel() override { return true; }
42+
43+
CallCountScanner(NameCountMap* counts) : counts(counts) {}
44+
45+
CallCountScanner* create() override {
46+
return new CallCountScanner(counts);
47+
}
48+
49+
void visitCall(Call* curr) {
50+
assert(counts->count(curr->target) > 0); // can't add a new element in parallel
51+
(*counts)[curr->target]++;
52+
}
3353

34-
void visitModule(Module *module) {
54+
private:
55+
NameCountMap* counts;
56+
};
57+
58+
struct ReorderFunctions : public Pass {
59+
void run(PassRunner* runner, Module* module) override {
60+
NameCountMap counts;
61+
// fill in info, as we operate on it in parallel (each function to its own entry)
62+
for (auto& func : module->functions) {
63+
counts[func->name];
64+
}
65+
// find counts on function calls
66+
{
67+
PassRunner runner(module);
68+
runner.setIsNested(true);
69+
runner.add<CallCountScanner>(&counts);
70+
runner.run();
71+
}
72+
// find counts on global usages
3573
if (module->start.is()) {
3674
counts[module->start]++;
3775
}
@@ -43,19 +81,15 @@ struct ReorderFunctions : public WalkerPass<PostWalker<ReorderFunctions>> {
4381
counts[curr]++;
4482
}
4583
}
46-
std::sort(module->functions.begin(), module->functions.end(), [this](
84+
// sort
85+
std::sort(module->functions.begin(), module->functions.end(), [&counts](
4786
const std::unique_ptr<Function>& a,
4887
const std::unique_ptr<Function>& b) -> bool {
49-
if (this->counts[a->name] == this->counts[b->name]) {
88+
if (counts[a->name] == counts[b->name]) {
5089
return strcmp(a->name.str, b->name.str) > 0;
5190
}
52-
return this->counts[a->name] > this->counts[b->name];
91+
return counts[a->name] > counts[b->name];
5392
});
54-
counts.clear();
55-
}
56-
57-
void visitCall(Call *curr) {
58-
counts[curr->target]++;
5993
}
6094
};
6195

src/passes/pass.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,14 @@ void PassRunner::addDefaultGlobalOptimizationPrePasses() {
180180
}
181181

182182
void PassRunner::addDefaultGlobalOptimizationPostPasses() {
183-
add("duplicate-function-elimination"); // optimizations show more functions as duplicate
184-
add("remove-unused-module-elements");
185-
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) {
183+
// inline when working hard, and when not preserving debug info
184+
// (inlining+optimizing can remove the annotations)
185+
if ((options.optimizeLevel >= 2 || options.shrinkLevel >= 2) &&
186+
!options.debugInfo) {
186187
add("inlining-optimizing");
187188
}
189+
add("duplicate-function-elimination"); // optimizations show more functions as duplicate
190+
add("remove-unused-module-elements");
188191
add("memory-packing");
189192
}
190193

src/wasm-module-building.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ static std::mutex debug;
3535
// Helps build wasm modules efficiently. If you build a module by
3636
// adding function by function, and you want to optimize them, this class
3737
// starts optimizing using worker threads *while you are still adding*.
38-
// It runs function optimization passes at that time, and then at the end
39-
// it runs global module-level optimization passes. The result is a fully
40-
// optimized module, optimized while being generated.
38+
// It runs function optimization passes at that time. This does not
39+
// run global optimization after that by default, but you can do that
40+
// to by calling optimizeGlobally().
4141
//
4242
// This might also be faster than normal module optimization since it
4343
// runs all passes on each function, then goes on to the next function
@@ -75,7 +75,6 @@ class OptimizingIncrementalModuleBuilder {
7575
uint32_t numFunctions;
7676
PassOptions passOptions;
7777
std::function<void (PassRunner&)> addPrePasses;
78-
std::function<void ()> beforeGlobalOptimizations;
7978
Function* endMarker;
8079
std::atomic<Function*>* list;
8180
uint32_t nextFunction; // only used on main thread
@@ -93,10 +92,9 @@ class OptimizingIncrementalModuleBuilder {
9392
// this bounds helps avoid locking.
9493
OptimizingIncrementalModuleBuilder(Module* wasm, Index numFunctions, PassOptions passOptions,
9594
std::function<void (PassRunner&)> addPrePasses,
96-
std::function<void ()> beforeGlobalOptimizations,
9795
bool debug, bool validateGlobally)
9896
: wasm(wasm), numFunctions(numFunctions), passOptions(passOptions),
99-
addPrePasses(addPrePasses), beforeGlobalOptimizations(beforeGlobalOptimizations),
97+
addPrePasses(addPrePasses),
10098
endMarker(nullptr), list(nullptr), nextFunction(0),
10199
numWorkers(0), liveWorkers(0), activeWorkers(0), availableFuncs(0), finishedFuncs(0),
102100
finishing(false), debug(debug), validateGlobally(validateGlobally) {
@@ -178,7 +176,6 @@ class OptimizingIncrementalModuleBuilder {
178176
wakeAllWorkers();
179177
waitUntilAllFinished();
180178
}
181-
optimizeGlobally();
182179
// TODO: clear side thread allocators from module allocator, as these threads were transient
183180
}
184181

@@ -230,11 +227,9 @@ class OptimizingIncrementalModuleBuilder {
230227
}
231228

232229
void optimizeGlobally() {
233-
beforeGlobalOptimizations();
234230
PassRunner passRunner(wasm, passOptions);
235231
passRunner.addDefaultGlobalOptimizationPostPasses();
236232
passRunner.run();
237-
238233
}
239234

240235
// worker code

src/wasm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ class Module {
784784
void removeImport(Name name);
785785
void removeExport(Name name);
786786
void removeFunction(Name name);
787+
void removeFunctionType(Name name);
787788
// TODO: remove* for other elements
788789

789790
void updateMaps();

src/wasm/wasm.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,17 @@ void Module::removeFunction(Name name) {
747747
functionsMap.erase(name);
748748
}
749749

750-
// TODO: remove* for other elements
750+
void Module::removeFunctionType(Name name) {
751+
for (size_t i = 0; i < functionTypes.size(); i++) {
752+
if (functionTypes[i]->name == name) {
753+
functionTypes.erase(functionTypes.begin() + i);
754+
break;
755+
}
756+
}
757+
functionTypesMap.erase(name);
758+
}
759+
760+
// TODO: remove* for other elements
751761

752762
void Module::updateMaps() {
753763
functionsMap.clear();

test/debugInfo.fromasm

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
(module
2-
(type $FUNCSIG$vii (func (param i32 i32)))
32
(import "env" "memory" (memory $0 256 256))
43
(import "env" "memoryBase" (global $memoryBase i32))
54
(data (get_global $memoryBase) "debugInfo.asm.js")
@@ -30,8 +29,17 @@
3029
(i32.const 1)
3130
)
3231
)
33-
(func $opts (; 2 ;) (param $0 i32) (param $1 i32) (result i32)
34-
(local $2 i32)
32+
(func $i32s-rem (; 2 ;) (param $0 i32) (param $1 i32) (result i32)
33+
(if (result i32)
34+
(get_local $1)
35+
(i32.rem_s
36+
(get_local $0)
37+
(get_local $1)
38+
)
39+
(i32.const 0)
40+
)
41+
)
42+
(func $opts (; 3 ;) (param $0 i32) (param $1 i32) (result i32)
3543
;;@ even-opted.cpp:1:0
3644
(set_local $0
3745
(i32.add
@@ -46,22 +54,16 @@
4654
(get_local $0)
4755
)
4856
)
57+
;;@ even-opted.cpp:3:0
4958
(i32.add
50-
(if (result i32)
51-
;;@ even-opted.cpp:3:0
52-
(tee_local $2
53-
(get_local $1)
54-
)
55-
(i32.rem_s
56-
(get_local $0)
57-
(get_local $2)
58-
)
59-
(i32.const 0)
59+
(call $i32s-rem
60+
(get_local $0)
61+
(get_local $1)
6062
)
6163
(get_local $1)
6264
)
6365
)
64-
(func $fib (; 3 ;) (param $0 i32) (result i32)
66+
(func $fib (; 4 ;) (param $0 i32) (result i32)
6567
(local $1 i32)
6668
(local $2 i32)
6769
(local $3 i32)
@@ -119,7 +121,7 @@
119121
;;@ fib.c:8:0
120122
(get_local $1)
121123
)
122-
(func $switch_reach (; 4 ;) (param $0 i32) (result i32)
124+
(func $switch_reach (; 5 ;) (param $0 i32) (result i32)
123125
(local $1 i32)
124126
(set_local $1
125127
(block $__rjto$0 (result i32)
@@ -179,7 +181,7 @@
179181
;;@ /tmp/emscripten_test_binaryen2_28hnAe/src.c:59950:0
180182
(get_local $1)
181183
)
182-
(func $nofile (; 5 ;)
184+
(func $nofile (; 6 ;)
183185
;;@ (unknown):1337:0
184186
(call $nofile)
185187
)

0 commit comments

Comments
 (0)