Skip to content

Commit 3cf4692

Browse files
security: fix memory accounting issue in client certificate cache
The client certificate cache is a simple utility cache which keeps track of the nearest certificate expiration times by user. It does this by reading in certificates as they are used, and then updating the gauge associated with the user so that our operators can alert on expiring certificates. It has been identified that there is a memory accounting issue with this utility class, in that if we see the same certificate multiple times, we report that our cache is growing for each one. In reality however, though we may change the reference in our cache, the old value will be cleaned up, and memory should remain relatively low. This PR both fixes the accounting bug, and adds a limit so that the client certificate cache can't negatively affect SQL operation. Epic: none Fixes: #151040 Release note: Fixes a memory accounting issue with the client certificate cache, where we were accidentally reporting multiple allocations for the same memory.
1 parent db47b48 commit 3cf4692

File tree

4 files changed

+33
-6
lines changed

4 files changed

+33
-6
lines changed

pkg/security/certificate_manager.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/security/certnames"
1414
"github.com/cockroachdb/cockroach/pkg/security/clientcert"
1515
"github.com/cockroachdb/cockroach/pkg/security/username"
16+
"github.com/cockroachdb/cockroach/pkg/settings"
17+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
1618
"github.com/cockroachdb/cockroach/pkg/util/log"
1719
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
1820
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
@@ -196,15 +198,24 @@ func (cm *CertificateManager) RegisterSignalHandler(
196198
})
197199
}
198200

201+
var CertCacheMemLimit = settings.RegisterIntSetting(
202+
settings.SystemVisible,
203+
"security.clienc_cert.cache_memory_limit",
204+
"memory limit for the client certificate expiration cache",
205+
1<<29, // 512MiB
206+
)
207+
199208
// RegisterExpirationCache registers a cache for client certificate expiration.
200209
// It is called during server startup.
201210
func (cm *CertificateManager) RegisterExpirationCache(
202211
ctx context.Context,
203212
stopper *stop.Stopper,
204213
timeSrc timeutil.TimeSource,
205214
parentMon *mon.BytesMonitor,
215+
st *cluster.Settings,
206216
) error {
207-
m := mon.NewMonitorInheritWithLimit(mon.MakeName("client-expiration-caches"), 0 /* limit */, parentMon, true /* longLiving */)
217+
limit := CertCacheMemLimit.Get(&st.SV)
218+
m := mon.NewMonitorInheritWithLimit(mon.MakeName("client-expiration-caches"), limit, parentMon, true /* longLiving */)
208219
acc := m.MakeConcurrentBoundAccount()
209220
m.StartNoReserved(ctx, parentMon)
210221

pkg/security/clientcert/cert_expiry_cache.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,13 @@ func (c *Cache) Upsert(ctx context.Context, user string, serial string, newExpir
105105
c.cache[user] = map[string]certInfo{}
106106
}
107107

108-
err := c.account.Grow(ctx, CertInfoSize)
109-
if err != nil {
110-
log.Warningf(ctx, "no memory available to cache cert expiry: %v", err)
111-
c.evictLocked(ctx, user, serial)
108+
// if the serial hasn't been seen, report it in the memory accounting.
109+
if _, ok := c.cache[user][serial]; !ok {
110+
err := c.account.Grow(ctx, CertInfoSize)
111+
if err != nil {
112+
log.Warningf(ctx, "no memory available to cache cert expiry: %v", err)
113+
return
114+
}
112115
}
113116

114117
// insert / update the certificate expiration time.

pkg/security/clientcert/cert_expiry_cache_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ func TestAllocationTracking(t *testing.T) {
313313
cache.Upsert(ctx, "user1", "serial2", 120)
314314
cache.Upsert(ctx, "user2", "serial3", 65)
315315
clock.Advance(time.Minute)
316+
require.Equal(t, (3*certInfoSize)+(4*gaugeSize), account.Used())
316317
cache.Purge(ctx)
317318
cache.Upsert(ctx, "user2", "serial4", 75)
318319
require.Equal(t, certInfoSize+(2*gaugeSize), account.Used())
@@ -324,9 +325,21 @@ func TestAllocationTracking(t *testing.T) {
324325
cache.Upsert(ctx, "user1", "serial2", 120)
325326
cache.Upsert(ctx, "user2", "serial3", 65)
326327
cache.Upsert(ctx, "user2", "serial4", 75)
328+
require.Equal(t, (4*certInfoSize)+(4*gaugeSize), account.Used())
327329
cache.Clear()
328330
require.Equal(t, int64(0), account.Used())
329331
})
332+
333+
t.Run("overwriting an existing certificate does not change the reported memory allocation", func(t *testing.T) {
334+
cache, account := newCacheAndAccount(ctx, clock, stopper)
335+
require.Equal(t, int64(0), account.Used())
336+
cache.Upsert(ctx, "user1", "serial1", 100)
337+
require.Equal(t, certInfoSize+(2*gaugeSize), account.Used())
338+
cache.Upsert(ctx, "user1", "serial1", 100)
339+
require.Equal(t, certInfoSize+(2*gaugeSize), account.Used())
340+
cache.Upsert(ctx, "user1", "serial1", 100)
341+
require.Equal(t, certInfoSize+(2*gaugeSize), account.Used())
342+
})
330343
}
331344

332345
func TestExpiration(t *testing.T) {

pkg/server/server_sql.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) {
947947
return nil, errors.Wrap(err, "initializing certificate manager")
948948
}
949949
err = certMgr.RegisterExpirationCache(
950-
ctx, cfg.stopper, &timeutil.DefaultTimeSource{}, rootSQLMemoryMonitor,
950+
ctx, cfg.stopper, &timeutil.DefaultTimeSource{}, rootSQLMemoryMonitor, cfg.Settings,
951951
)
952952
if err != nil {
953953
return nil, errors.Wrap(err, "adding clientcert cache")

0 commit comments

Comments
 (0)