Skip to content

Commit 807ff6b

Browse files
craig[bot]fqazi
andcommitted
Merge #149264
149264: catalog/lease: add memory monitoring for lease manager r=fqazi a=fqazi Previously, there was no memory monitoring to track how much memory was being used by the lease manager. This made it hard to understand how memory was being utilized with a large number of schema objects. This patch adds support for memory monitoring inside the lease manager. Fixes: #71282 Release note: None Co-authored-by: Faizan Qazi <[email protected]>
2 parents 31baea9 + d4f6ff4 commit 807ff6b

File tree

9 files changed

+155
-13
lines changed

9 files changed

+155
-13
lines changed

pkg/server/drain.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ func (s *drainServer) drainClients(
486486
// Drain all SQL table leases. This must be done after the pgServer has
487487
// given sessions a chance to finish ongoing work and after the background
488488
// tasks that may issue SQL statements have shut down.
489-
s.sqlServer.leaseMgr.SetDraining(ctx, true /* drain */, reporter)
489+
s.sqlServer.leaseMgr.SetDraining(ctx, true /* drain */, reporter, true /*assertOnLeakedDescriptor*/)
490490

491491
session, err := s.sqlServer.sqlLivenessProvider.Release(ctx)
492492
if err != nil {

pkg/server/server_sql.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) {
667667
}
668668

669669
leaseMgr := lease.NewLeaseManager(
670+
ctx,
670671
cfg.AmbientCtx,
671672
cfg.nodeIDContainer,
672673
cfg.internalDB,
@@ -678,6 +679,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) {
678679
lmKnobs,
679680
cfg.stopper,
680681
cfg.rangeFeedFactory,
682+
cfg.monitorAndMetrics.rootSQLMemoryMonitor,
681683
)
682684
cfg.registry.AddMetricStruct(leaseMgr.MetricsStruct())
683685

pkg/sql/catalog/lease/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ go_library(
5454
"//pkg/util/log",
5555
"//pkg/util/log/logcrash",
5656
"//pkg/util/metric",
57+
"//pkg/util/mon",
5758
"//pkg/util/quotapool",
5859
"//pkg/util/retry",
5960
"//pkg/util/startup",

pkg/sql/catalog/lease/descriptor_state.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,12 @@ func (t *descriptorState) upsertLeaseLocked(
135135
log.Infof(ctx, "new lease: %s", desc)
136136
}
137137
descState := newDescriptorVersionState(t, desc, hlc.Timestamp{}, session, regionEnumPrefix, true /* isLease */)
138+
if err := t.m.boundAccount.Grow(ctx, descState.getByteSize()); err != nil {
139+
// If we don't have sufficient memory, then release the lease so
140+
// that the system.lease table doesn't have a reference.
141+
t.m.storage.release(ctx, t.m.stopper, descState.mu.lease)
142+
return wrapMemoryError(err)
143+
}
138144
t.mu.active.insert(descState)
139145
t.m.names.insert(ctx, descState)
140146
return nil
@@ -196,7 +202,7 @@ func newDescriptorVersionState(
196202

197203
// removeInactiveVersions removes inactive versions in t.mu.active.data with
198204
// refcount 0. t.mu must be locked. It returns leases that need to be released.
199-
func (t *descriptorState) removeInactiveVersions() []*storedLease {
205+
func (t *descriptorState) removeInactiveVersions(ctx context.Context) []*storedLease {
200206
var leases []*storedLease
201207
// A copy of t.mu.active.data must be made since t.mu.active.data will be changed
202208
// within the loop.
@@ -206,10 +212,12 @@ func (t *descriptorState) removeInactiveVersions() []*storedLease {
206212
func() {
207213
desc.mu.Lock()
208214
defer desc.mu.Unlock()
215+
// Ensure we have a lock to allow us to clean up the usage
216+
// by the stored lease.
217+
t.m.boundAccount.Shrink(ctx, desc.getByteSize())
209218
if l := desc.mu.lease; l != nil {
210219
desc.mu.lease = nil
211220
leases = append(leases, l)
212-
213221
}
214222
}()
215223
}
@@ -272,14 +280,15 @@ func (t *descriptorState) release(ctx context.Context, s *descriptorVersionState
272280
leaseReleased := true
273281
// For testing, we will synchronously release leases, but that
274282
// exposes us to the danger of the context getting cancelled. To
275-
// eliminate this risk, we are going first remove the lease from
276-
// storage and then delete if from mqemory.
283+
// eliminate this risk, we are going to first remove the lease from
284+
// storage and then delete it from memory.
277285
if t.m.storage.testingKnobs.RemoveOnceDereferenced {
278286
leaseReleased = releaseLease(ctx, l, t.m)
279287
l = nil
280288
}
281289
if leaseReleased {
282290
t.mu.active.remove(s)
291+
s.t.m.boundAccount.Shrink(ctx, s.getByteSize())
283292
}
284293
return l
285294
}

pkg/sql/catalog/lease/descriptor_version_state.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package lease
88
import (
99
"context"
1010
"sync/atomic"
11+
"unsafe"
1112

1213
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
1314
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
@@ -39,6 +40,12 @@ func (s *storedLease) SafeFormat(w redact.SafePrinter, _ rune) {
3940
w.Printf("ID=%d ver=%d expiration=%s", s.id, s.version, s.expiration)
4041
}
4142

43+
// getByteSize returns the full size of stored lease.
44+
func (s *storedLease) getByteSize() int64 {
45+
return int64(len(s.prefix)+len(s.sessionID)+int(s.expiration.Size())) +
46+
int64(unsafe.Sizeof(*s))
47+
}
48+
4249
// descriptorVersionState holds the state for a descriptor version. This
4350
// includes the lease information for a descriptor version.
4451
// TODO(vivek): A node only needs to manage lease information on what it
@@ -94,6 +101,15 @@ func (s *descriptorVersionState) SafeFormat(w redact.SafePrinter, _ rune) {
94101
w.Print(s.redactedString())
95102
}
96103

104+
// getByteSize returns the full size of a descriptor version state structure.
105+
func (s *descriptorVersionState) getByteSize() int64 {
106+
size := s.ByteSize() + int64(unsafe.Sizeof(*s))
107+
if s.mu.lease != nil {
108+
size += s.mu.lease.getByteSize()
109+
}
110+
return size
111+
}
112+
97113
func (s *descriptorVersionState) String() string {
98114
return redact.StringWithoutMarkers(s)
99115
}

pkg/sql/catalog/lease/lease.go

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,12 @@ import (
4141
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
4242
"github.com/cockroachdb/cockroach/pkg/sql/sqlliveness"
4343
kvstorage "github.com/cockroachdb/cockroach/pkg/storage"
44+
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
4445
"github.com/cockroachdb/cockroach/pkg/util/hlc"
4546
"github.com/cockroachdb/cockroach/pkg/util/log"
4647
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
4748
"github.com/cockroachdb/cockroach/pkg/util/metric"
49+
"github.com/cockroachdb/cockroach/pkg/util/mon"
4850
"github.com/cockroachdb/cockroach/pkg/util/quotapool"
4951
"github.com/cockroachdb/cockroach/pkg/util/retry"
5052
"github.com/cockroachdb/cockroach/pkg/util/startup"
@@ -777,6 +779,12 @@ func (m *Manager) readOlderVersionForTimestamp(
777779
return descs, nil
778780
}
779781

782+
// wrapMemoryError adds a hint on memory errors to indicate
783+
// which setting should be bumped.
784+
func wrapMemoryError(err error) error {
785+
return errors.WithHint(err, "Consider increasing --max-sql-memory startup parameter.")
786+
}
787+
780788
// Insert descriptor versions. The versions provided are not in
781789
// any particular order.
782790
func (m *Manager) insertDescriptorVersions(
@@ -789,16 +797,25 @@ func (m *Manager) insertDescriptorVersions(
789797
}
790798
t.mu.Lock()
791799
defer t.mu.Unlock()
800+
newVersionsToInsert := make([]*descriptorVersionState, 0, len(versions))
792801
for i := range versions {
793802
// Since we gave up the lock while reading the versions from
794803
// the store we have to ensure that no one else inserted the
795804
// same version.
796805
existingVersion := t.mu.active.findVersion(versions[i].desc.GetVersion())
797806
if existingVersion == nil {
798-
t.mu.active.insert(
799-
newDescriptorVersionState(t, versions[i].desc, versions[i].expiration, session, nil, false))
807+
descState := newDescriptorVersionState(t, versions[i].desc, versions[i].expiration, session, nil, false)
808+
if err := t.m.boundAccount.Grow(ctx, descState.getByteSize()); err != nil {
809+
return wrapMemoryError(err)
810+
}
811+
newVersionsToInsert = append(newVersionsToInsert, descState)
800812
}
801813
}
814+
// Only insert if all versions were allocated.
815+
for _, descState := range newVersionsToInsert {
816+
t.mu.active.insert(descState)
817+
}
818+
802819
return nil
803820
}
804821

@@ -978,7 +995,7 @@ func purgeOldVersions(
978995
t.mu.Lock()
979996
defer t.mu.Unlock()
980997
t.mu.takenOffline = dropped
981-
return t.removeInactiveVersions(), t.mu.active.findPreviousToExpire(dropped)
998+
return t.removeInactiveVersions(ctx), t.mu.active.findPreviousToExpire(dropped)
982999
}()
9831000
for _, l := range leases {
9841001
releaseLease(ctx, l, m)
@@ -1122,6 +1139,11 @@ type Manager struct {
11221139
// initComplete is a fast check to confirm that initialization is complete, since
11231140
// performance testing showed select on the waitForInit channel can be expensive.
11241141
initComplete atomic.Bool
1142+
1143+
// bytesMonitor tracks the memory usage from leased descriptors.
1144+
bytesMonitor *mon.BytesMonitor
1145+
// boundAccount tracks the memory usage from leased descriptors.
1146+
boundAccount *mon.ConcurrentBoundAccount
11251147
}
11261148

11271149
const leaseConcurrencyLimit = 5
@@ -1133,6 +1155,7 @@ const leaseConcurrencyLimit = 5
11331155
//
11341156
// stopper is used to run async tasks. Can be nil in tests.
11351157
func NewLeaseManager(
1158+
ctx context.Context,
11361159
ambientCtx log.AmbientContext,
11371160
nodeIDContainer *base.SQLIDContainer,
11381161
db isql.DB,
@@ -1144,7 +1167,11 @@ func NewLeaseManager(
11441167
testingKnobs ManagerTestingKnobs,
11451168
stopper *stop.Stopper,
11461169
rangeFeedFactory *rangefeed.Factory,
1170+
rootBytesMonitor *mon.BytesMonitor,
11471171
) *Manager {
1172+
// See pkg/sql/mem_metrics.go
1173+
// log10int64times1000 = log10(math.MaxInt64) * 1000, rounded up somewhat
1174+
const log10int64times1000 = 19 * 1000
11481175
lm := &Manager{
11491176
storage: storage{
11501177
nodeIDContainer: nodeIDContainer,
@@ -1199,6 +1226,24 @@ func NewLeaseManager(
11991226
Measurement: "Number of wait for initial version routines executing",
12001227
Unit: metric.Unit_COUNT,
12011228
}),
1229+
leaseCurBytesCount: metric.NewGauge(metric.Metadata{
1230+
Name: "sql.leases.lease_cur_bytes_count",
1231+
Help: "The current number of bytes used by the lease manager.",
1232+
Measurement: "Number of bytes used by the lease manager.",
1233+
Unit: metric.Unit_BYTES,
1234+
}),
1235+
leaseMaxBytesHist: metric.NewHistogram(metric.HistogramOptions{
1236+
Metadata: metric.Metadata{
1237+
Name: "sql.leases.lease_max_bytes_hist",
1238+
Help: "Memory used by the lease manager.",
1239+
Measurement: "Number of bytes used by the lease manager.",
1240+
Unit: metric.Unit_BYTES,
1241+
},
1242+
Duration: base.DefaultHistogramWindowInterval(),
1243+
MaxVal: log10int64times1000,
1244+
SigFigs: 3,
1245+
BucketConfig: metric.MemoryUsage64MBBuckets,
1246+
}),
12021247
},
12031248
},
12041249
settings: settings,
@@ -1226,6 +1271,22 @@ func NewLeaseManager(
12261271
lm.descUpdateCh = make(chan catalog.Descriptor)
12271272
lm.descDelCh = make(chan descpb.ID)
12281273
lm.rangefeedErrCh = make(chan error)
1274+
lm.bytesMonitor = mon.NewMonitor(mon.Options{
1275+
Name: mon.MakeName("leased-descriptors"),
1276+
CurCount: lm.storage.leasingMetrics.leaseCurBytesCount,
1277+
MaxHist: lm.storage.leasingMetrics.leaseMaxBytesHist,
1278+
Res: mon.MemoryResource,
1279+
Settings: settings,
1280+
LongLiving: true,
1281+
})
1282+
lm.bytesMonitor.StartNoReserved(context.Background(), rootBytesMonitor)
1283+
lm.boundAccount = lm.bytesMonitor.MakeConcurrentBoundAccount()
1284+
// Add a stopper for the bound account that we are using to
1285+
// track memory usage.
1286+
lm.stopper.AddCloser(stop.CloserFn(func() {
1287+
lm.boundAccount.Close(ctx)
1288+
lm.bytesMonitor.Stop(ctx)
1289+
}))
12291290
return lm
12301291
}
12311292

@@ -1517,7 +1578,10 @@ func (m *Manager) IsDraining() bool {
15171578
// been done by the time this call returns. See the explanation in
15181579
// pkg/server/drain.go for details.
15191580
func (m *Manager) SetDraining(
1520-
ctx context.Context, drain bool, reporter func(int, redact.SafeString),
1581+
ctx context.Context,
1582+
drain bool,
1583+
reporter func(int, redact.SafeString),
1584+
assertOnLeakedDescriptor bool,
15211585
) {
15221586
m.draining.Store(drain)
15231587
if !drain {
@@ -1530,7 +1594,13 @@ func (m *Manager) SetDraining(
15301594
leases := func() []*storedLease {
15311595
t.mu.Lock()
15321596
defer t.mu.Unlock()
1533-
return t.removeInactiveVersions()
1597+
leasesToRelease := t.removeInactiveVersions(ctx)
1598+
// Ensure that all leases are released at this time.
1599+
if buildutil.CrdbTestBuild && assertOnLeakedDescriptor && len(t.mu.active.data) > 0 {
1600+
// Panic that a descriptor may have leaked.
1601+
panic(errors.AssertionFailedf("descriptor leak was detected for: %d (%s)", t.id, t.mu.active))
1602+
}
1603+
return leasesToRelease
15341604
}()
15351605
for _, l := range leases {
15361606
releaseLease(ctx, l, m)
@@ -2478,3 +2548,8 @@ func (m *Manager) deleteOrphanedLeasesWithSameInstanceID(
24782548
}
24792549
}
24802550
}
2551+
2552+
// TestingGetBoundAccount returns the bound account used by the lease manager.
2553+
func (m *Manager) TestingGetBoundAccount() *mon.ConcurrentBoundAccount {
2554+
return m.boundAccount
2555+
}

pkg/sql/catalog/lease/lease_test.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ func (t *leaseTest) node(nodeID uint32) *lease.Manager {
271271
)
272272
cfgCpy.SQLLiveness.Start(context.Background(), nil)
273273
mgr = lease.NewLeaseManager(
274+
context.Background(),
274275
ambientCtx,
275276
nc,
276277
cfgCpy.InternalDB,
@@ -282,6 +283,7 @@ func (t *leaseTest) node(nodeID uint32) *lease.Manager {
282283
t.leaseManagerTestingKnobs,
283284
t.server.AppStopper(),
284285
cfgCpy.RangeFeedFactory,
286+
cfgCpy.RootMemoryMonitor,
285287
)
286288
ctx := logtags.AddTag(context.Background(), "leasemgr", nodeID)
287289
mgr.RunBackgroundLeasingTask(ctx)
@@ -520,8 +522,8 @@ func TestLeaseManagerDrain(testingT *testing.T) {
520522
// starts draining.
521523
l1RemovalTracker := leaseRemovalTracker.TrackRemoval(l1.Underlying())
522524

523-
t.node(1).SetDraining(ctx, true, nil /* reporter */)
524-
t.node(2).SetDraining(ctx, true, nil /* reporter */)
525+
t.node(1).SetDraining(ctx, true, nil /* reporter */, false)
526+
t.node(2).SetDraining(ctx, true, nil /* reporter */, false)
525527

526528
// Leases cannot be acquired when in draining mode.
527529
if _, err := t.acquire(1, descID); !testutils.IsError(err, "cannot acquire lease when draining") {
@@ -544,7 +546,7 @@ func TestLeaseManagerDrain(testingT *testing.T) {
544546
{
545547
// Check that leases with a refcount of 0 are correctly kept in the
546548
// store once the drain mode has been exited.
547-
t.node(1).SetDraining(ctx, false, nil /* reporter */)
549+
t.node(1).SetDraining(ctx, false, nil /* reporter */, false /* assertOnLeakedDescriptor */)
548550
l1 := t.mustAcquire(1, descID)
549551
t.mustRelease(1, l1, nil)
550552
t.expectLeases(descID, "/1/1")
@@ -3635,3 +3637,36 @@ func BenchmarkAcquireLeaseConcurrent(b *testing.B) {
36353637
}
36363638

36373639
}
3640+
3641+
// TestLeaseManagerIsMemoryMonitored basic sanity test to confirm memory monitoring
3642+
// is working.
3643+
func TestLeaseManagerIsMemoryMonitored(t *testing.T) {
3644+
defer leaktest.AfterTest(t)()
3645+
defer log.Scope(t).Close(t)
3646+
// This test creates a large number of objects, which
3647+
// can timeout under stress / race.
3648+
skip.UnderDuress(t)
3649+
3650+
ctx := context.Background()
3651+
s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{})
3652+
defer s.Stopper().Stop(ctx)
3653+
lm := s.LeaseManager().(*lease.Manager)
3654+
startBytes := lm.TestingGetBoundAccount().Used()
3655+
lastBytes := startBytes
3656+
runner := sqlutils.MakeSQLRunner(sqlDB)
3657+
// First, acquire leases on all the tables
3658+
for i := 0; i < 100; i++ {
3659+
runner.Exec(t, fmt.Sprintf("CREATE TABLE t%d(n int)", i))
3660+
runner.Exec(t, fmt.Sprintf("INSERT INTO t%d VALUES (1)", i))
3661+
currentBytes := lm.TestingGetBoundAccount().Used()
3662+
require.Greaterf(t, currentBytes, lastBytes, "memory usage should increase after using a table")
3663+
lastBytes = currentBytes
3664+
}
3665+
// Next we will release the leases on all of them.
3666+
lastBytes = lm.TestingGetBoundAccount().Used()
3667+
for i := 0; i < 100; i++ {
3668+
runner.Exec(t, fmt.Sprintf("DROP TABLE t%d", i))
3669+
}
3670+
currentBytes := lm.TestingGetBoundAccount().Used()
3671+
require.Lessf(t, currentBytes, lastBytes, "memory usage should be decreasing after dropping a table")
3672+
}

pkg/sql/catalog/lease/storage.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ type leasingMetrics struct {
7272
longWaitForNoVersionsActive *metric.Gauge
7373
longTwoVersionInvariantViolationWaitActive *metric.Gauge
7474
longWaitForInitialVersionActive *metric.Gauge
75+
leaseMaxBytesHist metric.IHistogram
76+
leaseCurBytesCount *metric.Gauge
7577
}
7678

7779
type leaseFields struct {

0 commit comments

Comments
 (0)