Skip to content

Commit 1bb1f2b

Browse files
mknyszekgopherbot
authored andcommitted
runtime: put AddCleanup cleanup arguments in their own allocation
Currently, AddCleanup just creates a simple closure that calls `cleanup(arg)` as the actual cleanup function tracked internally. However, the argument ends up getting its own allocation. If it's tiny, then it can also end up sharing a tiny allocation slot with the object we're adding the cleanup to. Since the closure is a GC root, we can end up with cleanups that never fire. This change refactors the AddCleanup machinery to make the storage for the argument separate and explicit. With that in place, it explicitly allocates 16 bytes of storage for tiny arguments to side-step the tiny allocator. One would think this would cause an increase in memory use and more bytes allocated, but that's actually wrong! It turns out that the current "simple closure" actually creates _two_ closures. By making the argument passing explicit, we eliminate one layer of closures, so this actually results in a slightly faster AddCleanup overall and 16 bytes less memory allocated. goos: linux goarch: amd64 pkg: runtime cpu: AMD EPYC 7B13 │ before.bench │ after.bench │ │ sec/op │ sec/op vs base │ AddCleanupAndStop-64 124.5n ± 2% 103.7n ± 2% -16.71% (p=0.002 n=6) │ before.bench │ after.bench │ │ B/op │ B/op vs base │ AddCleanupAndStop-64 48.00 ± 0% 32.00 ± 0% -33.33% (p=0.002 n=6) │ before.bench │ after.bench │ │ allocs/op │ allocs/op vs base │ AddCleanupAndStop-64 3.000 ± 0% 2.000 ± 0% -33.33% (p=0.002 n=6) This change does, however, does add 16 bytes of overhead to the cleanup special itself, and makes each cleanup block entry 24 bytes instead of 8 bytes. This means the overall memory overhead delta with this change is neutral, and we just have a faster cleanup. (Cleanup block entries are transient, so I suspect any increase in memory overhead there is negligible.) Together with CL 719960, fixes #76007. Change-Id: I81bf3e44339e71c016c30d80bb4ee151c8263d5c Reviewed-on: https://go-review.googlesource.com/c/go/+/720321 Reviewed-by: Cherry Mui <[email protected]> Auto-Submit: Michael Knyszek <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 9fd2e44 commit 1bb1f2b

File tree

3 files changed

+78
-37
lines changed

3 files changed

+78
-37
lines changed

src/runtime/mcleanup.go

Lines changed: 62 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ import (
7272
// pass the object to the [KeepAlive] function after the last point
7373
// where the object must remain reachable.
7474
func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
75-
// Explicitly force ptr to escape to the heap.
75+
// Explicitly force ptr and cleanup to escape to the heap.
7676
ptr = abi.Escape(ptr)
77+
cleanup = abi.Escape(cleanup)
7778

7879
// The pointer to the object must be valid.
7980
if ptr == nil {
@@ -82,7 +83,8 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
8283
usptr := uintptr(unsafe.Pointer(ptr))
8384

8485
// Check that arg is not equal to ptr.
85-
if kind := abi.TypeOf(arg).Kind(); kind == abi.Pointer || kind == abi.UnsafePointer {
86+
argType := abi.TypeOf(arg)
87+
if kind := argType.Kind(); kind == abi.Pointer || kind == abi.UnsafePointer {
8688
if unsafe.Pointer(ptr) == *((*unsafe.Pointer)(unsafe.Pointer(&arg))) {
8789
panic("runtime.AddCleanup: ptr is equal to arg, cleanup will never run")
8890
}
@@ -98,12 +100,23 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
98100
return Cleanup{}
99101
}
100102

101-
fn := func() {
102-
cleanup(arg)
103+
// Create new storage for the argument.
104+
var argv *S
105+
if size := unsafe.Sizeof(arg); size < maxTinySize && argType.PtrBytes == 0 {
106+
// Side-step the tiny allocator to avoid liveness issues, since this box
107+
// will be treated like a root by the GC. We model the box as an array of
108+
// uintptrs to guarantee maximum allocator alignment.
109+
//
110+
// TODO(mknyszek): Consider just making space in cleanupFn for this. The
111+
// unfortunate part of this is it would grow specialCleanup by 16 bytes, so
112+
// while there wouldn't be an allocation, *every* cleanup would take the
113+
// memory overhead hit.
114+
box := new([maxTinySize / goarch.PtrSize]uintptr)
115+
argv = (*S)(unsafe.Pointer(box))
116+
} else {
117+
argv = new(S)
103118
}
104-
// Closure must escape.
105-
fv := *(**funcval)(unsafe.Pointer(&fn))
106-
fv = abi.Escape(fv)
119+
*argv = arg
107120

108121
// Find the containing object.
109122
base, _, _ := findObject(usptr, 0, 0)
@@ -120,7 +133,16 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
120133
gcCleanups.createGs()
121134
}
122135

123-
id := addCleanup(unsafe.Pointer(ptr), fv)
136+
id := addCleanup(unsafe.Pointer(ptr), cleanupFn{
137+
// Instantiate a caller function to call the cleanup, that is cleanup(*argv).
138+
//
139+
// TODO(mknyszek): This allocates because the generic dictionary argument
140+
// gets closed over, but callCleanup doesn't even use the dictionary argument,
141+
// so theoretically that could be removed, eliminating an allocation.
142+
call: callCleanup[S],
143+
fn: *(**funcval)(unsafe.Pointer(&cleanup)),
144+
arg: unsafe.Pointer(argv),
145+
})
124146
if debug.checkfinalizers != 0 {
125147
cleanupFn := *(**funcval)(unsafe.Pointer(&cleanup))
126148
setCleanupContext(unsafe.Pointer(ptr), abi.TypeFor[T](), sys.GetCallerPC(), cleanupFn.fn, id)
@@ -131,6 +153,16 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
131153
}
132154
}
133155

156+
// callCleanup is a helper for calling cleanups in a polymorphic way.
157+
//
158+
// In practice, all it does is call fn(*arg). arg must be a *T.
159+
//
160+
//go:noinline
161+
func callCleanup[T any](fn *funcval, arg unsafe.Pointer) {
162+
cleanup := *(*func(T))(unsafe.Pointer(&fn))
163+
cleanup(*(*T)(arg))
164+
}
165+
134166
// Cleanup is a handle to a cleanup call for a specific object.
135167
type Cleanup struct {
136168
// id is the unique identifier for the cleanup within the arena.
@@ -216,7 +248,17 @@ const cleanupBlockSize = 512
216248
// that the cleanup queue does not grow during marking (but it can shrink).
217249
type cleanupBlock struct {
218250
cleanupBlockHeader
219-
cleanups [(cleanupBlockSize - unsafe.Sizeof(cleanupBlockHeader{})) / goarch.PtrSize]*funcval
251+
cleanups [(cleanupBlockSize - unsafe.Sizeof(cleanupBlockHeader{})) / unsafe.Sizeof(cleanupFn{})]cleanupFn
252+
}
253+
254+
var cleanupFnPtrMask = [...]uint8{0b111}
255+
256+
// cleanupFn represents a cleanup function with it's argument, yet to be called.
257+
type cleanupFn struct {
258+
// call is an adapter function that understands how to safely call fn(*arg).
259+
call func(*funcval, unsafe.Pointer)
260+
fn *funcval // cleanup function passed to AddCleanup.
261+
arg unsafe.Pointer // pointer to argument to pass to cleanup function.
220262
}
221263

222264
var cleanupBlockPtrMask [cleanupBlockSize / goarch.PtrSize / 8]byte
@@ -245,8 +287,8 @@ type cleanupBlockHeader struct {
245287
//
246288
// Must only be called if the GC is in the sweep phase (gcphase == _GCoff),
247289
// because it does not synchronize with the garbage collector.
248-
func (b *cleanupBlock) enqueue(fn *funcval) bool {
249-
b.cleanups[b.n] = fn
290+
func (b *cleanupBlock) enqueue(c cleanupFn) bool {
291+
b.cleanups[b.n] = c
250292
b.n++
251293
return b.full()
252294
}
@@ -375,7 +417,7 @@ func (q *cleanupQueue) tryTakeWork() bool {
375417
// enqueue queues a single cleanup for execution.
376418
//
377419
// Called by the sweeper, and only the sweeper.
378-
func (q *cleanupQueue) enqueue(fn *funcval) {
420+
func (q *cleanupQueue) enqueue(c cleanupFn) {
379421
mp := acquirem()
380422
pp := mp.p.ptr()
381423
b := pp.cleanups
@@ -396,7 +438,7 @@ func (q *cleanupQueue) enqueue(fn *funcval) {
396438
}
397439
pp.cleanups = b
398440
}
399-
if full := b.enqueue(fn); full {
441+
if full := b.enqueue(c); full {
400442
q.full.push(&b.lfnode)
401443
pp.cleanups = nil
402444
q.addWork(1)
@@ -641,7 +683,8 @@ func runCleanups() {
641683

642684
gcCleanups.beginRunningCleanups()
643685
for i := 0; i < int(b.n); i++ {
644-
fn := b.cleanups[i]
686+
c := b.cleanups[i]
687+
b.cleanups[i] = cleanupFn{}
645688

646689
var racectx uintptr
647690
if raceenabled {
@@ -650,20 +693,15 @@ func runCleanups() {
650693
// the same goroutine.
651694
//
652695
// Synchronize on fn. This would fail to find races on the
653-
// closed-over values in fn (suppose fn is passed to multiple
654-
// AddCleanup calls) if fn was not unique, but it is. Update
655-
// the synchronization on fn if you intend to optimize it
656-
// and store the cleanup function and cleanup argument on the
657-
// queue directly.
658-
racerelease(unsafe.Pointer(fn))
696+
// closed-over values in fn (suppose arg is passed to multiple
697+
// AddCleanup calls) if arg was not unique, but it is.
698+
racerelease(unsafe.Pointer(c.arg))
659699
racectx = raceEnterNewCtx()
660-
raceacquire(unsafe.Pointer(fn))
700+
raceacquire(unsafe.Pointer(c.arg))
661701
}
662702

663703
// Execute the next cleanup.
664-
cleanup := *(*func())(unsafe.Pointer(&fn))
665-
cleanup()
666-
b.cleanups[i] = nil
704+
c.call(c.fn, c.arg)
667705

668706
if raceenabled {
669707
// Restore the old context.

src/runtime/mgcmark.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ func gcMarkRootCheck() {
204204
})
205205
}
206206

207-
// ptrmask for an allocation containing a single pointer.
207+
// oneptrmask for an allocation containing a single pointer.
208208
var oneptrmask = [...]uint8{1}
209209

210210
// markroot scans the i'th root.
@@ -251,7 +251,7 @@ func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
251251
// N.B. This only needs to synchronize with cleanup execution, which only resets these blocks.
252252
// All cleanup queueing happens during sweep.
253253
n := uintptr(atomic.Load(&cb.n))
254-
scanblock(uintptr(unsafe.Pointer(&cb.cleanups[0])), n*goarch.PtrSize, &cleanupBlockPtrMask[0], gcw, nil)
254+
scanblock(uintptr(unsafe.Pointer(&cb.cleanups[0])), n*unsafe.Sizeof(cleanupFn{}), &cleanupBlockPtrMask[0], gcw, nil)
255255
}
256256

257257
case work.baseSpans <= i && i < work.baseStacks:
@@ -489,7 +489,7 @@ func gcScanFinalizer(spf *specialfinalizer, s *mspan, gcw *gcWork) {
489489
// gcScanCleanup scans the relevant parts of a cleanup special as a root.
490490
func gcScanCleanup(spc *specialCleanup, gcw *gcWork) {
491491
// The special itself is a root.
492-
scanblock(uintptr(unsafe.Pointer(&spc.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
492+
scanblock(uintptr(unsafe.Pointer(&spc.cleanup)), unsafe.Sizeof(cleanupFn{}), &cleanupFnPtrMask[0], gcw, nil)
493493
}
494494

495495
// gcAssistAlloc performs GC work to make gp's assist debt positive.

src/runtime/mheap.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,7 +2161,7 @@ func removefinalizer(p unsafe.Pointer) {
21612161
type specialCleanup struct {
21622162
_ sys.NotInHeap
21632163
special special
2164-
fn *funcval
2164+
cleanup cleanupFn
21652165
// Globally unique ID for the cleanup, obtained from mheap_.cleanupID.
21662166
id uint64
21672167
}
@@ -2170,14 +2170,18 @@ type specialCleanup struct {
21702170
// cleanups are allowed on an object, and even the same pointer.
21712171
// A cleanup id is returned which can be used to uniquely identify
21722172
// the cleanup.
2173-
func addCleanup(p unsafe.Pointer, f *funcval) uint64 {
2173+
func addCleanup(p unsafe.Pointer, c cleanupFn) uint64 {
2174+
// TODO(mknyszek): Consider pooling specialCleanups on the P
2175+
// so we don't have to take the lock every time. Just locking
2176+
// is a considerable part of the cost of AddCleanup. This
2177+
// would also require reserving some cleanup IDs on the P.
21742178
lock(&mheap_.speciallock)
21752179
s := (*specialCleanup)(mheap_.specialCleanupAlloc.alloc())
21762180
mheap_.cleanupID++ // Increment first. ID 0 is reserved.
21772181
id := mheap_.cleanupID
21782182
unlock(&mheap_.speciallock)
21792183
s.special.kind = _KindSpecialCleanup
2180-
s.fn = f
2184+
s.cleanup = c
21812185
s.id = id
21822186

21832187
mp := acquirem()
@@ -2187,17 +2191,16 @@ func addCleanup(p unsafe.Pointer, f *funcval) uint64 {
21872191
// situation where it's possible that markrootSpans
21882192
// has already run but mark termination hasn't yet.
21892193
if gcphase != _GCoff {
2190-
gcw := &mp.p.ptr().gcw
21912194
// Mark the cleanup itself, since the
21922195
// special isn't part of the GC'd heap.
2193-
scanblock(uintptr(unsafe.Pointer(&s.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
2196+
gcScanCleanup(s, &mp.p.ptr().gcw)
21942197
}
21952198
releasem(mp)
2196-
// Keep f alive. There's a window in this function where it's
2197-
// only reachable via the special while the special hasn't been
2198-
// added to the specials list yet. This is similar to a bug
2199+
// Keep c and its referents alive. There's a window in this function
2200+
// where it's only reachable via the special while the special hasn't
2201+
// been added to the specials list yet. This is similar to a bug
21992202
// discovered for weak handles, see #70455.
2200-
KeepAlive(f)
2203+
KeepAlive(c)
22012204
return id
22022205
}
22032206

@@ -2800,7 +2803,7 @@ func freeSpecial(s *special, p unsafe.Pointer, size uintptr) {
28002803
// Cleanups, unlike finalizers, do not resurrect the objects
28012804
// they're attached to, so we only need to pass the cleanup
28022805
// function, not the object.
2803-
gcCleanups.enqueue(sc.fn)
2806+
gcCleanups.enqueue(sc.cleanup)
28042807
lock(&mheap_.speciallock)
28052808
mheap_.specialCleanupAlloc.free(unsafe.Pointer(sc))
28062809
unlock(&mheap_.speciallock)

0 commit comments

Comments
 (0)