1818// Inlining.
1919//
2020// For now, this does a conservative inlining of all functions that have
21- // exactly one use. That should not increase code size, and may have
22- // speed benefits.
21+ // exactly one use, and are fairly small . That should not increase code
22+ // size, and may have speed benefits.
2323//
2424
25+ #include < atomic>
26+
2527#include < wasm.h>
2628#include < pass.h>
2729#include < wasm-builder.h>
30+ #include < ast_utils.h>
2831#include < parsing.h>
2932
3033namespace wasm {
3134
35+ // A limit on how big a function to inline.
36+ static const int INLINING_SIZE_LIMIT = 15 ;
37+
38+ // We only inline a function with a single use.
39+ static const int SINGLE_USE = 1 ;
40+
41+ // A number of uses of a function that is too high for us to
42+ // inline it to all those locations.
43+ static const int TOO_MANY_USES_TO_INLINE = SINGLE_USE + 1 ;
44+
45+ // Map of function name => number of uses. We build the values in
46+ // parallel, using atomic increments. This is safe because we never
47+ // update the map itself in parallel, we only update the values,
48+ // and so the map never allocates or moves values which could be
49+ // a problem with atomics (in fact it would be a problem in general
50+ // as well, not just with atomics, as we don't use a lock in
51+ // parallel access, we depend on the map itself being constant
52+ // when running multiple threads).
53+ typedef std::map<Name, std::atomic<Index>> NameToAtomicIndexMap;
54+
3255struct FunctionUseCounter : public WalkerPass <PostWalker<FunctionUseCounter>> {
3356 bool isFunctionParallel () override { return true ; }
3457
35- FunctionUseCounter (std::map<Name, Index>* output ) : output(output ) {}
58+ FunctionUseCounter (NameToAtomicIndexMap* uses ) : uses(uses ) {}
3659
3760 FunctionUseCounter* create () override {
38- return new FunctionUseCounter (output );
61+ return new FunctionUseCounter (uses );
3962 }
4063
4164 void visitCall (Call *curr) {
42- (*output)[curr->target ]++;
65+ assert (uses->count (curr->target ) > 0 ); // can't add a new element in parallel
66+ (*uses)[curr->target ]++;
4367 }
4468
4569private:
46- std::map<Name, Index>* output ;
70+ NameToAtomicIndexMap* uses ;
4771};
4872
49- struct Action {
50- Call* call;
51- Block* block; // the replacement for the call, into which we should inline
73+ struct InliningAction {
74+ Expression** callSite;
5275 Function* contents;
5376
54- Action (Call* call, Block* block , Function* contents) : call(call), block(block ), contents(contents) {}
77+ InliningAction (Expression** callSite , Function* contents) : callSite(callSite ), contents(contents) {}
5578};
5679
5780struct InliningState {
5881 std::set<Name> canInline;
59- std::map<Name, std::vector<Action >> actionsForFunction; // function name => actions that can be performed in it
82+ std::map<Name, std::vector<InliningAction >> actionsForFunction; // function name => actions that can be performed in it
6083};
6184
6285struct Planner : public WalkerPass <PostWalker<Planner>> {
@@ -68,12 +91,18 @@ struct Planner : public WalkerPass<PostWalker<Planner>> {
6891 return new Planner (state);
6992 }
7093
71- void visitCall (Call *curr) {
72- if (state->canInline .count (curr->target )) {
73- auto * block = Builder (*getModule ()).makeBlock ();
74- block->type = curr->type ;
94+ void visitCall (Call* curr) {
95+ // plan to inline if we know this is valid to inline, and if the call is
96+ // actually performed - if it is dead code, it's pointless to inline
97+ if (state->canInline .count (curr->target ) &&
98+ curr->type != unreachable) {
99+ // nest the call in a block. that way the location of the pointer to the call will not
100+ // change even if we inline multiple times into the same function, otherwise
101+ // call1(call2()) might be a problem
102+ auto * block = Builder (*getModule ()).makeBlock (curr);
75103 replaceCurrent (block);
76- state->actionsForFunction [getFunction ()->name ].emplace_back (curr, block, getModule ()->getFunction (curr->target ));
104+ assert (state->actionsForFunction .count (getFunction ()->name ) > 0 ); // can't add a new element in parallel
105+ state->actionsForFunction [getFunction ()->name ].emplace_back (&block->list [0 ], getModule ()->getFunction (curr->target ));
77106 }
78107 }
79108
@@ -91,13 +120,13 @@ struct Planner : public WalkerPass<PostWalker<Planner>> {
91120
92121// Core inlining logic. Modifies the outside function (adding locals as
93122// needed), and returns the inlined code.
94- // Since we only inline once, and do not need the function afterwards, we
95- // can just reuse all the nodes and even avoid copying.
96- static Expression* doInlining (Module* module , Function* into, Action& action) {
123+ static Expression* doInlining (Module* module , Function* into, InliningAction& action) {
124+ auto * call = (*action.callSite )->cast <Call>();
97125 Builder builder (*module );
98- auto * block = action.block ;
126+ auto * block = Builder (*module ).makeBlock ();
127+ block->type = call->type ;
99128 block->name = Name (std::string (" __inlined_func$" ) + action.contents ->name .str );
100- block-> type = action. contents -> result ;
129+ *action. callSite = block ;
101130 // set up a locals mapping
102131 struct Updater : public PostWalker <Updater> {
103132 std::map<Index, Index> localMapping;
@@ -121,49 +150,59 @@ static Expression* doInlining(Module* module, Function* into, Action& action) {
121150 }
122151 // assign the operands into the params
123152 for (Index i = 0 ; i < action.contents ->params .size (); i++) {
124- block->list .push_back (builder.makeSetLocal (updater.localMapping [i], action. call ->operands [i]));
153+ block->list .push_back (builder.makeSetLocal (updater.localMapping [i], call->operands [i]));
125154 }
126- // update the inlined contents
127- updater. walk (action.contents ->body );
128- block-> list . push_back (action. contents -> body );
129- action. contents -> body = builder. makeUnreachable (); // not strictly needed, since it's going away
155+ // generate and update the inlined contents
156+ auto * contents = ExpressionManipulator::copy (action.contents ->body , * module );
157+ updater. walk ( contents);
158+ block-> list . push_back (contents);
130159 return block;
131160}
132161
133162struct Inlining : public Pass {
163+ // whether to optimize where we inline
164+ bool optimize = false ;
165+
166+ NameToAtomicIndexMap uses;
167+
134168 void run (PassRunner* runner, Module* module ) override {
135169 // keep going while we inline, to handle nesting. TODO: optimize
170+ calculateUses (module );
136171 while (iteration (runner, module )) {}
137172 }
138173
139- bool iteration (PassRunner* runner, Module* module ) {
140- // Count uses
141- std::map<Name, Index> uses;
174+ void calculateUses (Module* module ) {
142175 // fill in uses, as we operate on it in parallel (each function to its own entry)
143176 for (auto & func : module ->functions ) {
144- uses[func->name ] = 0 ;
145- }
146- {
147- PassRunner runner (module );
148- runner.setIsNested (true );
149- runner.add <FunctionUseCounter>(&uses);
150- runner.run ();
177+ uses[func->name ].store (0 );
151178 }
179+ PassRunner runner (module );
180+ runner.setIsNested (true );
181+ runner.add <FunctionUseCounter>(&uses);
182+ runner.run ();
183+ // anything exported or used in a table should not be inlined
152184 for (auto & ex : module ->exports ) {
153185 if (ex->kind == ExternalKind::Function) {
154- uses[ex->value ] = 2 ; // too many, so we ignore it
186+ uses[ex->value ]. store (TOO_MANY_USES_TO_INLINE);
155187 }
156188 }
157189 for (auto & segment : module ->table .segments ) {
158190 for (auto name : segment.data ) {
159- uses[name]++;
191+ if (module ->getFunctionOrNull (name)) {
192+ uses[name].store (TOO_MANY_USES_TO_INLINE);
193+ }
160194 }
161195 }
196+ }
197+
198+ bool iteration (PassRunner* runner, Module* module ) {
162199 // decide which to inline
163200 InliningState state;
164- for (auto iter : uses) {
165- if (iter.second == 1 ) {
166- state.canInline .insert (iter.first );
201+ for (auto & func : module ->functions ) {
202+ auto name = func->name ;
203+ auto numUses = uses[name].load ();
204+ if (canInline (numUses) && worthInlining (module ->getFunction (name))) {
205+ state.canInline .insert (name);
167206 }
168207 }
169208 // fill in actionsForFunction, as we operate on it in parallel (each function to its own entry)
@@ -182,15 +221,21 @@ struct Inlining : public Pass {
182221 std::set<Function*> inlinedInto;
183222 for (auto & func : module ->functions ) {
184223 for (auto & action : state.actionsForFunction [func->name ]) {
224+ Name inlinedName = action.contents ->name ;
185225 doInlining (module , func.get (), action);
186- inlined.insert (action. contents -> name );
226+ inlined.insert (inlinedName );
187227 inlinedInto.insert (func.get ());
228+ uses[inlinedName]--;
229+ assert (uses[inlinedName].load () == 0 );
188230 }
189231 }
190232 // anything we inlined into may now have non-unique label names, fix it up
191233 for (auto func : inlinedInto) {
192234 wasm::UniqueNameMapper::uniquify (func->body );
193235 }
236+ if (optimize && inlinedInto.size () > 0 ) {
237+ doOptimize (inlinedInto, module , runner);
238+ }
194239 // remove functions that we managed to inline, their one use is gone
195240 auto & funcs = module ->functions ;
196241 funcs.erase (std::remove_if (funcs.begin (), funcs.end (), [&inlined](const std::unique_ptr<Function>& curr) {
@@ -199,11 +244,55 @@ struct Inlining : public Pass {
199244 // return whether we did any work
200245 return inlined.size () > 0 ;
201246 }
247+
248+ bool canInline (int numUses) {
249+ return numUses == SINGLE_USE;
250+ }
251+
252+ bool worthInlining (Function* func) {
253+ return Measurer::measure (func->body ) <= INLINING_SIZE_LIMIT;
254+ }
255+
256+ // Run useful optimizations after inlining, things like removing
257+ // unnecessary new blocks, sharing variables, etc.
258+ void doOptimize (std::set<Function*>& funcs, Module* module , PassRunner* parentRunner) {
259+ // save the full list of functions on the side
260+ std::vector<std::unique_ptr<Function>> all;
261+ all.swap (module ->functions );
262+ module ->updateMaps ();
263+ for (auto & func : funcs) {
264+ module ->addFunction (func);
265+ }
266+ PassRunner runner (module , parentRunner->options );
267+ runner.setIsNested (true );
268+ runner.setValidateGlobally (false ); // not a full valid module
269+ runner.add (" remove-unused-brs" );
270+ runner.add (" remove-unused-names" );
271+ runner.add (" coalesce-locals" );
272+ runner.add (" simplify-locals" );
273+ runner.add (" vacuum" );
274+ runner.add (" reorder-locals" );
275+ runner.add (" remove-unused-brs" );
276+ runner.add (" merge-blocks" );
277+ runner.run ();
278+ // restore all the funcs
279+ for (auto & func : module ->functions ) {
280+ func.release ();
281+ }
282+ all.swap (module ->functions );
283+ module ->updateMaps ();
284+ }
202285};
203286
204287Pass *createInliningPass () {
205288 return new Inlining ();
206289}
207290
291+ Pass *createInliningOptimizingPass () {
292+ auto * ret = new Inlining ();
293+ ret->optimize = true ;
294+ return ret;
295+ }
296+
208297} // namespace wasm
209298
0 commit comments