Skip to content

Commit 299c7e9

Browse files
authored
gopy,bind: fix storage leaks due to C.Cstring allocations w/ pybindgen
As noted in #186 the use of C.Cstring in conjunction with pybindgen leads to storage leaks. This CL attempts at fixing those by patching the output of pybindgen to call free on the leaked strings. NOTE, that I plan to look at pybindgen to add an option to it so that it can generate the correct code. In the meantime, I think this change is useful to have since it makes the generated code much more usable. Fixes #186.
1 parent 5266c83 commit 299c7e9

File tree

7 files changed

+338
-0
lines changed

7 files changed

+338
-0
lines changed

SUPPORT_MATRIX.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Feature |py2 | py3
88
_examples/arrays | yes | yes
99
_examples/cgo | yes | yes
1010
_examples/consts | yes | yes
11+
_examples/cstrings | yes | yes
1112
_examples/empty | yes | yes
1213
_examples/funcs | yes | yes
1314
_examples/gopygc | yes | yes

_examples/cstrings/cstrings.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2020 The go-python Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package cstrings
6+
7+
import "strings"
8+
9+
func StringValue(s string, n int) string {
10+
return strings.Repeat(s, n)
11+
}
12+
13+
type StructWithString struct {
14+
V string
15+
T int
16+
}
17+
18+
type NestedStructWithString struct {
19+
A int
20+
S StructWithString
21+
}
22+
23+
func StringInStruct(s string, n int) StructWithString {
24+
return StructWithString{
25+
V: strings.Repeat(s, n),
26+
}
27+
}
28+
29+
func StringInNestedStruct(s string, n int) NestedStructWithString {
30+
return NestedStructWithString{
31+
A: 2,
32+
S: StructWithString{
33+
V: strings.Repeat(s, n),
34+
},
35+
}
36+
}
37+
38+
func StringSlice(s string, n int) []string {
39+
return []string{
40+
strings.Repeat(s, n),
41+
}
42+
}
43+
44+
func StringMap(s string, n int) map[string]string {
45+
return map[string]string{
46+
"a": strings.Repeat(s, n),
47+
}
48+
}

_examples/cstrings/test.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright 2020 The go-python Authors. All rights reserved.
2+
# Use of this source code is governed by a BSD-style
3+
# license that can be found in the LICENSE file.
4+
5+
# py2/py3 compat
6+
from __future__ import print_function
7+
8+
import cstrings
9+
import gc
10+
import resource
11+
12+
verbose = False
13+
iterations = 10000
14+
size = 4096
15+
16+
17+
def gofnString():
18+
return cstrings.StringValue("a", size)
19+
20+
21+
def gofnStruct():
22+
s = cstrings.StringInStruct("a", size)
23+
return s.V
24+
25+
26+
def gofnNestedStruct():
27+
s = cstrings.StringInNestedStruct("a", size)
28+
return s.S.V
29+
30+
31+
def gofnSlice():
32+
s = cstrings.StringSlice("a", size)
33+
return s[0]
34+
35+
36+
def gofnMap():
37+
m = cstrings.StringMap("a", size)
38+
return m["a"]
39+
40+
41+
def print_memory(s):
42+
m = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
43+
if verbose:
44+
print(s, m)
45+
return m
46+
47+
48+
def _run_fn(fn):
49+
memoryvals = []
50+
t = [fn() for _ in range(iterations)]
51+
memoryvals.append(print_memory(
52+
"Memory usage after first list creation is:"))
53+
54+
t = [fn() for _ in range(iterations)]
55+
memoryvals.append(print_memory(
56+
"Memory usage after second list creation is:"))
57+
58+
gc.collect()
59+
memoryvals.append(print_memory("Memory usage after GC:"))
60+
61+
t = [fn() for _ in range(iterations)]
62+
memoryvals.append(print_memory(
63+
"Memory usage after third list creation is:"))
64+
65+
gc.collect()
66+
memoryvals.append(print_memory("Memory usage after GC:"))
67+
return memoryvals
68+
69+
70+
for fn in [gofnString, gofnStruct, gofnNestedStruct, gofnSlice, gofnMap]:
71+
alloced = size * iterations
72+
a = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
73+
pass1 = _run_fn(fn)
74+
b = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
75+
pass2 = _run_fn(fn)
76+
c = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
77+
if verbose:
78+
print(fn.__name__, pass1)
79+
print(fn.__name__, pass2)
80+
print(fn.__name__, a, b, c)
81+
82+
print(fn.__name__, "leaked: ", (c-b) > (size * iterations))
83+
84+
# bump up the size of each successive test to ensure that leaks
85+
# are not absorbed by previous rss growth.
86+
size += 4096
87+
88+
89+
print("OK")

bind/gen.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,8 @@ build:
316316
# use pybindgen to build the %[1]s.c file which are the CPython wrappers to cgo wrappers..
317317
# note: pip install pybindgen to get pybindgen if this fails
318318
$(PYTHON) build.py
319+
# patch storage leaks in pybindgen output
320+
go run patch-leaks.go %[1]s.c
319321
# build the _%[1]s$(LIBEXT) library that contains the cgo and CPython wrappers
320322
# generated %[1]s.py python wrapper imports this c-code package
321323
$(GCC) %[1]s.c %[6]s %[1]s_go$(LIBEXT) -o _%[1]s$(LIBEXT) $(CFLAGS) $(LDFLAGS) -fPIC --shared -w
@@ -357,11 +359,91 @@ build:
357359
# use pybindgen to build the %[1]s.c file which are the CPython wrappers to cgo wrappers..
358360
# note: pip install pybindgen to get pybindgen if this fails
359361
$(PYTHON) build.py
362+
# patch storage leaks in pybindgen output
363+
go run patch-leaks.go %[1]s.c
360364
# build the executable
361365
- rm %[1]s_go$(LIBEXT)
362366
$(GOBUILD) -o py%[1]s
363367
364368
`
369+
370+
patchLeaksPreamble = `// patch-leaks.go for post-processing %[1]s.
371+
// File is generated by gopy. Do not edit.
372+
// %[2]s
373+
// +build ignore
374+
375+
package main
376+
377+
import (
378+
"bufio"
379+
"bytes"
380+
"io/ioutil"
381+
"log"
382+
"os"
383+
"strings"
384+
)
385+
386+
const (
387+
cStringLine = " py_retval = Py_BuildValue((char *) \"s\", retval);"
388+
)
389+
var cstringFunctions = []string{
390+
`
391+
392+
patchLeaksPostamble = `
393+
}
394+
395+
func isCString(line string, names []string) bool {
396+
for _, cfn := range names {
397+
if strings.HasPrefix(line, cfn) {
398+
return true
399+
}
400+
}
401+
return false
402+
}
403+
404+
func patchCString(line string, out *bytes.Buffer) bool {
405+
out.WriteString(line)
406+
out.Write([]byte{'\n'})
407+
switch line {
408+
case "}":
409+
return false
410+
case cStringLine:
411+
out.WriteString(" free(retval);\n")
412+
return false
413+
}
414+
return true
415+
}
416+
417+
func main() {
418+
file := os.Args[1]
419+
buf, err := ioutil.ReadFile(file)
420+
if err != nil {
421+
log.Fatal(err)
422+
}
423+
sc := bufio.NewScanner(bytes.NewBuffer(buf))
424+
obuf := &bytes.Buffer{}
425+
var cstring bool
426+
for sc.Scan() {
427+
line := sc.Text()
428+
if cstring {
429+
cstring = patchCString(line, obuf)
430+
continue
431+
}
432+
cstring = isCString(line, cstringFunctions)
433+
obuf.WriteString(line)
434+
obuf.Write([]byte{'\n'})
435+
}
436+
437+
if err := sc.Err(); err != nil {
438+
log.Fatal(err)
439+
}
440+
441+
err = ioutil.WriteFile(file, obuf.Bytes(), 0644)
442+
if err != nil {
443+
log.Fatal(err)
444+
}
445+
}
446+
`
365447
)
366448

367449
// thePyGen is the current pyGen which is needed in symbols to lookup
@@ -398,6 +480,7 @@ func GenPyBind(mode BuildMode, odir, outname, cmdstr, vm, mainstr, libext, extra
398480

399481
type pyGen struct {
400482
gofile *printer
483+
leakfile *printer
401484
pybuild *printer
402485
pywrap *printer
403486
makefile *printer
@@ -445,9 +528,11 @@ func (g *pyGen) genPackageMap() {
445528

446529
func (g *pyGen) genPre() {
447530
g.gofile = &printer{buf: new(bytes.Buffer), indentEach: []byte("\t")}
531+
g.leakfile = &printer{buf: new(bytes.Buffer), indentEach: []byte("\t")}
448532
g.pybuild = &printer{buf: new(bytes.Buffer), indentEach: []byte("\t")}
449533
g.makefile = &printer{buf: new(bytes.Buffer), indentEach: []byte("\t")}
450534
g.genGoPreamble()
535+
g.genLeaksPreamble()
451536
g.genPyBuildPreamble()
452537
g.genMakefile()
453538
oinit, err := os.Create(filepath.Join(g.odir, "__init__.py"))
@@ -468,8 +553,10 @@ func (g *pyGen) genPrintOut(outfn string, pr *printer) {
468553
func (g *pyGen) genOut() {
469554
g.pybuild.Printf("\nmod.generate(open('%v.c', 'w'))\n\n", g.outname)
470555
g.gofile.Printf("\n\n")
556+
g.genLeaksPostamble()
471557
g.makefile.Printf("\n\n")
472558
g.genPrintOut(g.outname+".go", g.gofile)
559+
g.genPrintOut("patch-leaks.go", g.leakfile)
473560
g.genPrintOut("build.py", g.pybuild)
474561
g.genPrintOut("Makefile", g.makefile)
475562
}
@@ -537,6 +624,14 @@ func (g *pyGen) genGoPreamble() {
537624
g.gofile.Printf("\n// --- generated code for package: %[1]s below: ---\n\n", g.outname)
538625
}
539626

627+
func (g *pyGen) genLeaksPreamble() {
628+
g.leakfile.Printf(patchLeaksPreamble, g.outname, g.cmdstr)
629+
}
630+
631+
func (g *pyGen) genLeaksPostamble() {
632+
g.leakfile.Printf(patchLeaksPostamble)
633+
}
634+
540635
func (g *pyGen) genPyBuildPreamble() {
541636
g.pybuild.Printf(PyBuildPreamble, g.outname, g.cmdstr)
542637
}

bind/gen_func.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,73 @@ package bind
66

77
import (
88
"fmt"
9+
"go/types"
910
"strings"
1011
)
1112

13+
// genPatchLeakFuncName outputs the name of either the function or the class
14+
// 'getter' represented by its arguments that need to have a memory
15+
// leak induced by the interaction of pybindgen and gopy.
16+
// These leaks occur because the cgo bindings use C.CString
17+
// to allocate a string on the C heap, as in:
18+
//
19+
// return C.CString(cstrings.StringValue(C.GoString(s), int(n)))
20+
//
21+
// but the calling pybindgen code then takes a copy of this
22+
// newly allocated string indirectly by calling Py_BuildValue (see below)
23+
// without freeing the copy returned by the function or getter method.
24+
// Arguably pybindgen should provide a means of directly requesting
25+
// this behaviour (simularly to the transfer_ownership options it provides),
26+
// but as of now there is no means of doing so. As a work around the
27+
// output of pybindgen is modified to insert the free in the appropriate
28+
// functions identified by genPatchLeakFuncName.
29+
//
30+
// The patch-leaks.go (see gen.go) file patches these code fragments
31+
// to insert free(retval) immediately after the call to PyBuildValue.
32+
// The patch-leaks.go generated file will perform this patch for
33+
// functions whose names are embedded in it by genPatchLeakFuncName.
34+
//
35+
// .....
36+
// retval = cstrings_StringValue(s, n);
37+
// py_retval = Py_BuildValue((char *) "s", retval);
38+
// .....
39+
//
40+
// This patching is required for top level functions that return
41+
// strings directly and also for getters that return strings that
42+
// are embedded in maps, structs etc.
43+
// NOTE that it's ok to output the same function name multiple times (eg.
44+
// for a shared structs).
45+
func (g *pyGen) genPatchLeakFuncName(ret *Var, fsym *Func) {
46+
if ret == nil || fsym == nil || ret.GoType() == nil {
47+
return
48+
}
49+
g.recurse(ret.GoType(), "_wrap__"+fsym.pkg.Name()+"_", fsym.ID())
50+
}
51+
52+
func (g *pyGen) recurse(gotype types.Type, prefix, name string) {
53+
switch t := gotype.(type) {
54+
case *types.Basic:
55+
if t.Kind() == types.String {
56+
g.leakfile.Printf(" %q,\n", prefix+name)
57+
}
58+
case *types.Map:
59+
g.recurse(t.Elem(), prefix, "Map_"+t.Key().String()+"_string_elem")
60+
case *types.Named:
61+
o := t.Obj()
62+
if o == nil || o.Pkg() == nil {
63+
return
64+
}
65+
g.recurse(t.Underlying(), prefix, o.Pkg().Name()+"_"+o.Name())
66+
case *types.Struct:
67+
for i := 0; i < t.NumFields(); i++ {
68+
f := t.Field(i)
69+
g.recurse(f.Type(), prefix, name+"_"+f.Name()+"_Get")
70+
}
71+
case *types.Slice:
72+
g.recurse(t.Elem(), prefix, "Slice_string_elem")
73+
}
74+
}
75+
1276
// genFuncSig generates just the signature for binding
1377
// returns false if function is not suitable for python
1478
// binding (e.g., multiple return values)
@@ -110,6 +174,9 @@ func (g *pyGen) genFuncSig(sym *symbol, fsym *Func) bool {
110174
ret.Name(),
111175
))
112176
}
177+
178+
g.genPatchLeakFuncName(ret, fsym)
179+
113180
if sret.cpyname == "PyObject*" {
114181
g.pybuild.Printf("retval('%s', caller_owns_return=True)", sret.cpyname)
115182
} else {

0 commit comments

Comments
 (0)