Skip to content

Commit 152caa3

Browse files
aykevldeadprogram
authored andcommitted
compiler: do not create stack objects for functions that don't allocate
This is a useful optimization for targets with the portable garbage collector. It isn't as big as you might guess but it does optimize functions inside the garbage collector itself (which obviously should not allocate). WebAssembly output in one test is about 1% smaller.
1 parent 7ed6b45 commit 152caa3

File tree

1 file changed

+85
-1
lines changed

1 file changed

+85
-1
lines changed

compiler/gc.go

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,9 @@ func typeHasPointers(t llvm.Type) bool {
124124
// makeGCStackSlots converts all calls to runtime.trackPointer to explicit
125125
// stores to stack slots that are scannable by the GC.
126126
func (c *Compiler) makeGCStackSlots() bool {
127-
if c.mod.NamedFunction("runtime.alloc").IsNil() {
127+
// Check whether there are allocations at all.
128+
alloc := c.mod.NamedFunction("runtime.alloc")
129+
if alloc.IsNil() {
128130
// Nothing to. Make sure all remaining bits and pieces for stack
129131
// chains are neutralized.
130132
for _, call := range getUses(c.mod.NamedFunction("runtime.trackPointer")) {
@@ -142,6 +144,54 @@ func (c *Compiler) makeGCStackSlots() bool {
142144
return false // nothing to do
143145
}
144146

147+
// Look at *all* functions to see whether they are free of function pointer
148+
// calls.
149+
// This takes less than 5ms for ~100kB of WebAssembly but would perhaps be
150+
// faster when written in C++ (to avoid the CGo overhead).
151+
funcsWithFPCall := map[llvm.Value]struct{}{}
152+
n := 0
153+
for fn := c.mod.FirstFunction(); !fn.IsNil(); fn = llvm.NextFunction(fn) {
154+
n++
155+
if _, ok := funcsWithFPCall[fn]; ok {
156+
continue // already found
157+
}
158+
done := false
159+
for bb := fn.FirstBasicBlock(); !bb.IsNil() && !done; bb = llvm.NextBasicBlock(bb) {
160+
for call := bb.FirstInstruction(); !call.IsNil() && !done; call = llvm.NextInstruction(call) {
161+
if call.IsACallInst().IsNil() {
162+
continue // only looking at calls
163+
}
164+
called := call.CalledValue()
165+
if !called.IsAFunction().IsNil() {
166+
continue // only looking for function pointers
167+
}
168+
funcsWithFPCall[fn] = struct{}{}
169+
markParentFunctions(funcsWithFPCall, fn)
170+
done = true
171+
}
172+
}
173+
}
174+
175+
// Determine which functions need stack objects. Many leaf functions don't
176+
// need it: it only causes overhead for them.
177+
// Actually, in one test it was only able to eliminate stack object from 12%
178+
// of functions that had a call to runtime.trackPointer (8 out of 68
179+
// functions), so this optimization is not as big as it may seem.
180+
allocatingFunctions := map[llvm.Value]struct{}{} // set of allocating functions
181+
182+
// Work from runtime.alloc and trace all parents to check which functions do
183+
// a heap allocation (and thus which functions do not).
184+
markParentFunctions(allocatingFunctions, alloc)
185+
186+
// Also trace all functions that call a function pointer.
187+
for fn := range funcsWithFPCall {
188+
// Assume that functions that call a function pointer do a heap
189+
// allocation as a conservative guess because the called function might
190+
// do a heap allocation.
191+
allocatingFunctions[fn] = struct{}{}
192+
markParentFunctions(allocatingFunctions, fn)
193+
}
194+
145195
// Collect some variables used below in the loop.
146196
stackChainStart := c.mod.NamedGlobal("runtime.stackChainStart")
147197
if stackChainStart.IsNil() {
@@ -161,6 +211,18 @@ func (c *Compiler) makeGCStackSlots() bool {
161211
// Pick the parent function.
162212
fn := call.InstructionParent().Parent()
163213

214+
if _, ok := allocatingFunctions[fn]; !ok {
215+
// This function nor any of the functions it calls (recursively)
216+
// allocate anything from the heap, so it will not trigger a garbage
217+
// collection cycle. Thus, it does not need to track local pointer
218+
// values.
219+
// This is a useful optimization but not as big as you might guess,
220+
// as described above (it avoids stack objects for ~12% of
221+
// functions).
222+
call.EraseFromParentAsInstruction()
223+
continue
224+
}
225+
164226
// Find all calls to runtime.trackPointer in this function.
165227
var calls []llvm.Value
166228
var returns []llvm.Value
@@ -385,3 +447,25 @@ func (c *Compiler) getPointerBitmap(typ llvm.Type, name string) *big.Int {
385447
panic("unknown type kind of global: " + name)
386448
}
387449
}
450+
451+
// markParentFunctions traverses all parent function calls (recursively) and
452+
// adds them to the set of marked functions. It only considers function calls:
453+
// any other uses of such a function is ignored.
454+
func markParentFunctions(marked map[llvm.Value]struct{}, fn llvm.Value) {
455+
worklist := []llvm.Value{fn}
456+
for len(worklist) != 0 {
457+
fn := worklist[len(worklist)-1]
458+
worklist = worklist[:len(worklist)-1]
459+
for _, use := range getUses(fn) {
460+
if use.IsACallInst().IsNil() || use.CalledValue() != fn {
461+
// Not the parent function.
462+
continue
463+
}
464+
parent := use.InstructionParent().Parent()
465+
if _, ok := marked[parent]; !ok {
466+
marked[parent] = struct{}{}
467+
worklist = append(worklist, parent)
468+
}
469+
}
470+
}
471+
}

0 commit comments

Comments
 (0)