Skip to content

Commit dd53dca

Browse files
committed
Add trace logs for cache leaks.
Signed-off-by: Erik Sipsma <[email protected]>
1 parent 31c870e commit dd53dca

File tree

7 files changed

+109
-15
lines changed

7 files changed

+109
-15
lines changed

cache/refs.go

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ func (cr *cacheRecord) ref(triggerLastUsed bool, descHandlers DescHandlers, pg p
107107
progress: pg,
108108
}
109109
cr.refs[ref] = struct{}{}
110+
bklog.G(context.TODO()).WithFields(map[string]any{
111+
"id": cr.ID(),
112+
"newRefCount": len(cr.refs),
113+
"mutable": false,
114+
"stack": bklog.LazyStackTrace{},
115+
}).Trace("acquired cache ref")
110116
return ref
111117
}
112118

@@ -118,6 +124,12 @@ func (cr *cacheRecord) mref(triggerLastUsed bool, descHandlers DescHandlers) *mu
118124
descHandlers: descHandlers,
119125
}
120126
cr.refs[ref] = struct{}{}
127+
bklog.G(context.TODO()).WithFields(map[string]any{
128+
"id": cr.ID(),
129+
"newRefCount": len(cr.refs),
130+
"mutable": true,
131+
"stack": bklog.LazyStackTrace{},
132+
}).Trace("acquired cache ref")
121133
return ref
122134
}
123135

@@ -438,7 +450,19 @@ func (cr *cacheRecord) mount(ctx context.Context, s session.Group) (_ snapshot.M
438450
}
439451

440452
// call when holding the manager lock
441-
func (cr *cacheRecord) remove(ctx context.Context, removeSnapshot bool) error {
453+
func (cr *cacheRecord) remove(ctx context.Context, removeSnapshot bool) (rerr error) {
454+
defer func() {
455+
l := bklog.G(ctx).WithFields(map[string]any{
456+
"id": cr.ID(),
457+
"refCount": len(cr.refs),
458+
"removeSnapshot": removeSnapshot,
459+
"stack": bklog.LazyStackTrace{},
460+
})
461+
if rerr != nil {
462+
l = l.WithError(rerr)
463+
}
464+
l.Trace("removed cache record")
465+
}()
442466
delete(cr.cm.records, cr.ID())
443467
if removeSnapshot {
444468
if err := cr.cm.LeaseManager.Delete(ctx, leases.Lease{
@@ -1294,9 +1318,21 @@ func (sr *immutableRef) updateLastUsedNow() bool {
12941318
return true
12951319
}
12961320

1297-
func (sr *immutableRef) release(ctx context.Context) error {
1298-
delete(sr.refs, sr)
1321+
func (sr *immutableRef) release(ctx context.Context) (rerr error) {
1322+
defer func() {
1323+
l := bklog.G(ctx).WithFields(map[string]any{
1324+
"id": sr.ID(),
1325+
"newRefCount": len(sr.refs),
1326+
"mutable": false,
1327+
"stack": bklog.LazyStackTrace{},
1328+
})
1329+
if rerr != nil {
1330+
l = l.WithError(rerr)
1331+
}
1332+
l.Trace("released cache ref")
1333+
}()
12991334

1335+
delete(sr.refs, sr)
13001336
if sr.updateLastUsedNow() {
13011337
sr.updateLastUsed()
13021338
if sr.equalMutable != nil {
@@ -1476,8 +1512,21 @@ func (sr *mutableRef) Release(ctx context.Context) error {
14761512
return sr.release(ctx)
14771513
}
14781514

1479-
func (sr *mutableRef) release(ctx context.Context) error {
1515+
func (sr *mutableRef) release(ctx context.Context) (rerr error) {
1516+
defer func() {
1517+
l := bklog.G(ctx).WithFields(map[string]any{
1518+
"id": sr.ID(),
1519+
"newRefCount": len(sr.refs),
1520+
"mutable": true,
1521+
"stack": bklog.LazyStackTrace{},
1522+
})
1523+
if rerr != nil {
1524+
l = l.WithError(rerr)
1525+
}
1526+
l.Trace("released cache ref")
1527+
}()
14801528
delete(sr.refs, sr)
1529+
14811530
if !sr.HasCachePolicyRetain() {
14821531
if sr.equalImmutable != nil {
14831532
if sr.equalImmutable.HasCachePolicyRetain() {

cmd/buildkitd/config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
// Config provides containerd configuration data for the server
88
type Config struct {
99
Debug bool `toml:"debug"`
10+
Trace bool `toml:"trace"`
1011

1112
// Root is the path to a directory where buildkit will store persistent data
1213
Root string `toml:"root"`

cmd/buildkitd/config/load_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ func TestLoad(t *testing.T) {
1212
const testConfig = `
1313
root = "/foo/bar"
1414
debug=true
15+
trace=true
1516
insecure-entitlements = ["security.insecure"]
1617
1718
[gc]
@@ -72,6 +73,7 @@ searchDomains=["example.com"]
7273

7374
require.Equal(t, "/foo/bar", cfg.Root)
7475
require.Equal(t, true, cfg.Debug)
76+
require.Equal(t, true, cfg.Trace)
7577
require.Equal(t, "security.insecure", cfg.Entitlements[0])
7678

7779
require.Equal(t, "buildkit.sock", cfg.GRPC.Address[0])

cmd/buildkitd/main.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ func main() {
157157
Name: "debug",
158158
Usage: "enable debug output in logs",
159159
},
160+
cli.BoolFlag{
161+
Name: "trace",
162+
Usage: "enable trace output in logs (highly verbose, could affect performance)",
163+
},
160164
cli.StringFlag{
161165
Name: "root",
162166
Usage: "path to state directory",
@@ -223,6 +227,9 @@ func main() {
223227
if cfg.Debug {
224228
logrus.SetLevel(logrus.DebugLevel)
225229
}
230+
if cfg.Trace {
231+
logrus.SetLevel(logrus.TraceLevel)
232+
}
226233

227234
if cfg.GRPC.DebugAddress != "" {
228235
if err := setupDebugHandlers(cfg.GRPC.DebugAddress); err != nil {
@@ -457,6 +464,9 @@ func applyMainFlags(c *cli.Context, cfg *config.Config) error {
457464
if c.IsSet("debug") {
458465
cfg.Debug = c.Bool("debug")
459466
}
467+
if c.IsSet("trace") {
468+
cfg.Trace = c.Bool("trace")
469+
}
460470
if c.IsSet("root") {
461471
cfg.Root = c.String("root")
462472
}

session/grpc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ func monitorHealth(ctx context.Context, cc *grpc.ClientConn, cancelConn func())
129129
}
130130
}
131131

132-
bklog.G(ctx).WithFields(logFields).Debug("healthcheck completed")
132+
bklog.G(ctx).WithFields(logFields).Trace("healthcheck completed")
133133
}
134134
}
135135
}

solver/llbsolver/mounts/mount.go

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,16 @@ type MountManager struct {
4545
}
4646

4747
func (mm *MountManager) getRefCacheDir(ctx context.Context, ref cache.ImmutableRef, id string, m *pb.Mount, sharing pb.CacheSharingOpt, s session.Group) (mref cache.MutableRef, err error) {
48+
name := fmt.Sprintf("cached mount %s from %s", m.Dest, mm.managerName)
49+
if id != m.Dest {
50+
name += fmt.Sprintf(" with id %q", id)
51+
}
4852
g := &cacheRefGetter{
4953
locker: &mm.cacheMountsMu,
5054
cacheMounts: mm.cacheMounts,
5155
cm: mm.cm,
5256
globalCacheRefs: sharedCacheRefs,
53-
name: fmt.Sprintf("cached mount %s from %s", m.Dest, mm.managerName),
57+
name: name,
5458
session: s,
5559
}
5660
return g.getRefCacheDir(ctx, ref, id, sharing)
@@ -75,19 +79,19 @@ func (g *cacheRefGetter) getRefCacheDir(ctx context.Context, ref cache.Immutable
7579
defer mu.Unlock()
7680

7781
if ref, ok := g.cacheMounts[key]; ok {
78-
return ref.clone(), nil
82+
return ref.clone(ctx), nil
7983
}
8084
defer func() {
8185
if err == nil {
8286
share := &cacheRefShare{MutableRef: mref, refs: map[*cacheRef]struct{}{}}
8387
g.cacheMounts[key] = share
84-
mref = share.clone()
88+
mref = share.clone(ctx)
8589
}
8690
}()
8791

8892
switch sharing {
8993
case pb.CacheSharingOpt_SHARED:
90-
return g.globalCacheRefs.get(key, func() (cache.MutableRef, error) {
94+
return g.globalCacheRefs.get(ctx, key, func() (cache.MutableRef, error) {
9195
return g.getRefCacheDirNoCache(ctx, key, ref, id, false)
9296
})
9397
case pb.CacheSharingOpt_PRIVATE:
@@ -101,7 +105,12 @@ func (g *cacheRefGetter) getRefCacheDir(ctx context.Context, ref cache.Immutable
101105

102106
func (g *cacheRefGetter) getRefCacheDirNoCache(ctx context.Context, key string, ref cache.ImmutableRef, id string, block bool) (cache.MutableRef, error) {
103107
makeMutable := func(ref cache.ImmutableRef) (cache.MutableRef, error) {
104-
return g.cm.New(ctx, ref, g.session, cache.WithRecordType(client.UsageRecordTypeCacheMount), cache.WithDescription(g.name), cache.CachePolicyRetain)
108+
newRef, err := g.cm.New(ctx, ref, g.session, cache.WithRecordType(client.UsageRecordTypeCacheMount), cache.WithDescription(g.name), cache.CachePolicyRetain)
109+
if err != nil {
110+
return nil, err
111+
}
112+
bklog.G(ctx).Debugf("created new ref for cache dir %q: %s", id, newRef.ID())
113+
return newRef, nil
105114
}
106115

107116
cacheRefsLocker.Lock(key)
@@ -114,10 +123,12 @@ func (g *cacheRefGetter) getRefCacheDirNoCache(ctx context.Context, key string,
114123
locked := false
115124
for _, si := range sis {
116125
if mRef, err := g.cm.GetMutable(ctx, si.ID()); err == nil {
117-
bklog.G(ctx).Debugf("reusing ref for cache dir: %s", mRef.ID())
126+
bklog.G(ctx).Debugf("reusing ref for cache dir %q: %s", id, mRef.ID())
118127
return mRef, nil
119128
} else if errors.Is(err, cache.ErrLocked) {
120129
locked = true
130+
} else {
131+
bklog.G(ctx).WithError(err).Errorf("failed to get reuse ref for cache dir %q: %s", id, si.ID())
121132
}
122133
}
123134
if block && locked {
@@ -438,7 +449,7 @@ func CacheMountsLocker() sync.Locker {
438449
return &sharedCacheRefs.mu
439450
}
440451

441-
func (r *cacheRefs) get(key string, fn func() (cache.MutableRef, error)) (cache.MutableRef, error) {
452+
func (r *cacheRefs) get(ctx context.Context, key string, fn func() (cache.MutableRef, error)) (cache.MutableRef, error) {
442453
r.mu.Lock()
443454
defer r.mu.Unlock()
444455

@@ -448,7 +459,7 @@ func (r *cacheRefs) get(key string, fn func() (cache.MutableRef, error)) (cache.
448459

449460
share, ok := r.shares[key]
450461
if ok {
451-
return share.clone(), nil
462+
return share.clone(ctx), nil
452463
}
453464

454465
mref, err := fn()
@@ -458,7 +469,7 @@ func (r *cacheRefs) get(key string, fn func() (cache.MutableRef, error)) (cache.
458469

459470
share = &cacheRefShare{MutableRef: mref, main: r, key: key, refs: map[*cacheRef]struct{}{}}
460471
r.shares[key] = share
461-
return share.clone(), nil
472+
return share.clone(ctx), nil
462473
}
463474

464475
type cacheRefShare struct {
@@ -469,7 +480,11 @@ type cacheRefShare struct {
469480
key string
470481
}
471482

472-
func (r *cacheRefShare) clone() cache.MutableRef {
483+
func (r *cacheRefShare) clone(ctx context.Context) cache.MutableRef {
484+
bklog.G(ctx).WithFields(map[string]any{
485+
"key": r.key,
486+
"stack": bklog.LazyStackTrace{},
487+
}).Trace("cloning cache mount ref share")
473488
cacheRef := &cacheRef{cacheRefShare: r}
474489
if cacheRefCloneHijack != nil {
475490
cacheRefCloneHijack()
@@ -481,6 +496,10 @@ func (r *cacheRefShare) clone() cache.MutableRef {
481496
}
482497

483498
func (r *cacheRefShare) release(ctx context.Context) error {
499+
bklog.G(ctx).WithFields(map[string]any{
500+
"key": r.key,
501+
"stack": bklog.LazyStackTrace{},
502+
}).Trace("releasing cache mount ref share main")
484503
if r.main != nil {
485504
delete(r.main.shares, r.key)
486505
}
@@ -495,6 +514,10 @@ type cacheRef struct {
495514
}
496515

497516
func (r *cacheRef) Release(ctx context.Context) error {
517+
bklog.G(ctx).WithFields(map[string]any{
518+
"key": r.key,
519+
"stack": bklog.LazyStackTrace{},
520+
}).Trace("releasing cache mount ref share")
498521
if r.main != nil {
499522
r.main.mu.Lock()
500523
defer r.main.mu.Unlock()

util/bklog/log.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package bklog
22

33
import (
44
"context"
5+
"runtime/debug"
56

67
"github.com/containerd/containerd/log"
78
"github.com/sirupsen/logrus"
@@ -61,3 +62,11 @@ func GetLogger(ctx context.Context) (l *logrus.Entry) {
6162

6263
return l
6364
}
65+
66+
// LazyStackTrace lets you include a stack trace as a field's value in a log but only
67+
// call it when the log level is actually enabled.
68+
type LazyStackTrace struct{}
69+
70+
func (LazyStackTrace) String() string {
71+
return string(debug.Stack())
72+
}

0 commit comments

Comments
 (0)