Skip to content

Commit 835eda7

Browse files
craig[bot]angles-n-daemonsfqazijaywritescode
committed
151887: optbuilder: gate crdb_internal builtins from unauthorized access r=angles-n-daemons a=angles-n-daemons The epic below outlines a set of work to prevent users from unauthorized access to what we deem unsafe internals. These unsafe internals lie mostly in the crdb_internal schema and the system database. This PR makes it so crdb_internal builtins are disallowed from external use unless the specified override is enabled. Fixes: #151501 Epic: CRDB-24527 Release note (ops change): restricts access to all crdb_internal builtins unless session variable `allow_unsafe_internals` is set to true, or if the caller is internal. 152021: catalog/lease: fix assertion for no outstanding leases r=fqazi a=fqazi Previously, the assertion confirming that no leases were outstanding was too strict because it checked for remaining descriptor versions immediately after setting the drain flag. While all user queries should be drained by this point, this check was too strict for internal operations. To address this, this patch moves the assertion into a closure and ensures that anything remaining has a reference count of zero. Fixes: #151902 Release note: None 152121: authors: add jaywritescode to authors r=jaywritescode a=jaywritescode Epic: None Release note: None Co-authored-by: Brian Dillmann <[email protected]> Co-authored-by: Faizan Qazi <[email protected]> Co-authored-by: Jay Harris <[email protected]>
4 parents 8197306 + 47262e3 + 03b4077 + 8ae31ad commit 835eda7

File tree

11 files changed

+166
-113
lines changed

11 files changed

+166
-113
lines changed

AUTHORS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ Jason Chan <[email protected]> <[email protected]>
242242
Jason Chen <[email protected]>
243243
Jason E. Aten <[email protected]>
244244
Jason Young <[email protected]>
245+
245246
Jay Kominek <[email protected]>
246247
247248
Jay Rauchenstein <[email protected]>
@@ -576,4 +577,4 @@ Zachary Smith <[email protected]> Zachary.smith <[email protected]>
576577
Zakir Hussain Shaik <[email protected]> shaikzakiriitm <[email protected]>
577578
578579
何羿宏 <[email protected]>
579-
智雅楠 <[email protected]>
580+
智雅楠 <[email protected]>

pkg/server/api_v2_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ func drain(ctx context.Context, ts1 serverutils.TestServerInterface, t *testing.
547547
timeoutCtx, cancel := context.WithTimeout(ctx, testutils.SucceedsSoonDuration())
548548
defer cancel()
549549

550-
err := ts1.DrainClientsWithoutLeakCheck(ctx)
550+
err := ts1.DrainClients(ctx)
551551
require.NoError(t, err)
552552

553553
for timeoutCtx.Err() == nil {

pkg/server/drain.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ func (s *drainServer) drainClientsInternal(
492492
// Drain all SQL table leases. This must be done after the pgServer has
493493
// given sessions a chance to finish ongoing work and after the background
494494
// tasks that may issue SQL statements have shut down.
495-
s.sqlServer.leaseMgr.SetDraining(ctx, true /* drain */, reporter, assertOnLeakedDescriptor)
495+
s.sqlServer.leaseMgr.SetDraining(ctx, true, reporter)
496496

497497
session, err := s.sqlServer.sqlLivenessProvider.Release(ctx)
498498
if err != nil {

pkg/server/testserver.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,11 +1242,6 @@ func (t *testTenant) DrainClients(ctx context.Context) error {
12421242
return t.drain.drainClients(ctx, nil /* reporter */)
12431243
}
12441244

1245-
// DrainClients exports the drainClientsInternal() method for use by tests.
1246-
func (t *testTenant) DrainClientsWithoutLeakCheck(ctx context.Context) error {
1247-
return t.drain.drainClientsInternal(ctx, nil /* reporter */, false)
1248-
}
1249-
12501245
// Readiness is part of the serverutils.ApplicationLayerInterface.
12511246
func (t *testTenant) Readiness(ctx context.Context) error {
12521247
return t.t.admin.checkReadinessForHealthCheck(ctx)
@@ -1948,13 +1943,6 @@ func (ts *testServer) DrainClients(ctx context.Context) error {
19481943
return ts.drain.drainClients(ctx, nil /* reporter */)
19491944
}
19501945

1951-
// DrainClientsWithoutLeakCheck exports the drainClients() method for use by tests
1952-
// with descriptor leak checking disabled. This is useful for tests that focus on
1953-
// restart safety logic rather than lease management correctness.
1954-
func (ts *testServer) DrainClientsWithoutLeakCheck(ctx context.Context) error {
1955-
return ts.drain.drainClientsInternal(ctx, nil, false /* assertOnLeakedDescriptor */)
1956-
}
1957-
19581946
// Readiness is part of the serverutils.ApplicationLayerInterface.
19591947
func (ts *testServer) Readiness(ctx context.Context) error {
19601948
return ts.admin.checkReadinessForHealthCheck(ctx)

pkg/sql/authorization_test.go

Lines changed: 87 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ func checkUnsafeErr(t *testing.T, err error) {
311311
t.Fatal("expected unsafe access error, got", err)
312312
}
313313

314-
func TestCheckUnsafeSystemAccess(t *testing.T) {
314+
func TestCheckUnsafeInternalsAccess(t *testing.T) {
315315
defer leaktest.AfterTest(t)()
316316
defer log.Scope(t).Close(t)
317317

@@ -321,92 +321,105 @@ func TestCheckUnsafeSystemAccess(t *testing.T) {
321321
})
322322
defer s.Stopper().Stop(ctx)
323323

324-
q := "SELECT * FROM system.namespace"
325-
326-
t.Run("as an external querier", func(t *testing.T) {
327-
for _, test := range []struct {
328-
AllowUnsafeInternals bool
329-
Passes bool
330-
}{
331-
{AllowUnsafeInternals: false, Passes: false},
332-
{AllowUnsafeInternals: true, Passes: true},
333-
} {
334-
t.Run(fmt.Sprintf("%t", test), func(t *testing.T) {
335-
conn := s.SQLConn(t)
336-
_, err := conn.Exec("SET allow_unsafe_internals = $1", test.AllowUnsafeInternals)
337-
require.NoError(t, err)
338-
339-
_, err = conn.Query(q)
340-
if test.Passes {
324+
t.Run("accessing the system database", func(t *testing.T) {
325+
q := "SELECT * FROM system.namespace"
326+
327+
t.Run("as an external querier", func(t *testing.T) {
328+
for _, test := range []struct {
329+
AllowUnsafeInternals bool
330+
Passes bool
331+
}{
332+
{AllowUnsafeInternals: false, Passes: false},
333+
{AllowUnsafeInternals: true, Passes: true},
334+
} {
335+
t.Run(fmt.Sprintf("%t", test), func(t *testing.T) {
336+
conn := s.SQLConn(t)
337+
_, err := conn.Exec("SET allow_unsafe_internals = $1", test.AllowUnsafeInternals)
341338
require.NoError(t, err)
342-
} else {
343-
checkUnsafeErr(t, err)
344-
}
345-
})
346-
}
347-
})
348339

349-
t.Run("as an internal querier", func(t *testing.T) {
350-
for _, test := range []struct {
351-
AllowUnsafeInternals bool
352-
Passes bool
353-
}{
354-
{AllowUnsafeInternals: false, Passes: true},
355-
{AllowUnsafeInternals: true, Passes: true},
356-
} {
357-
t.Run(fmt.Sprintf("%t", test), func(t *testing.T) {
358-
idb := s.InternalDB().(isql.DB)
359-
err := idb.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
360-
txn.SessionData().LocalOnlySessionData.AllowUnsafeInternals = test.AllowUnsafeInternals
361-
362-
_, err := txn.QueryBuffered(ctx, "internal-query", txn.KV(), q)
363-
return err
340+
_, err = conn.Query(q)
341+
if test.Passes {
342+
require.NoError(t, err)
343+
} else {
344+
checkUnsafeErr(t, err)
345+
}
364346
})
347+
}
348+
})
365349

366-
require.NoError(t, err)
350+
t.Run("as an internal querier", func(t *testing.T) {
351+
for _, test := range []struct {
352+
AllowUnsafeInternals bool
353+
Passes bool
354+
}{
355+
{AllowUnsafeInternals: false, Passes: true},
356+
{AllowUnsafeInternals: true, Passes: true},
357+
} {
358+
t.Run(fmt.Sprintf("%t", test), func(t *testing.T) {
359+
idb := s.InternalDB().(isql.DB)
360+
err := idb.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
361+
txn.SessionData().LocalOnlySessionData.AllowUnsafeInternals = test.AllowUnsafeInternals
362+
363+
_, err := txn.QueryBuffered(ctx, "internal-query", txn.KV(), q)
364+
return err
365+
})
367366

368-
if test.Passes {
369367
require.NoError(t, err)
370-
} else {
371-
checkUnsafeErr(t, err)
372-
}
373-
})
374-
}
375-
})
376-
377-
}
378-
379-
func TestCheckUnsafeCRDBInternalAccess(t *testing.T) {
380-
defer leaktest.AfterTest(t)()
381-
defer log.Scope(t).Close(t)
382368

383-
ctx := context.Background()
384-
s := serverutils.StartServerOnly(t, base.TestServerArgs{
385-
DefaultTestTenant: base.TestControlsTenantsExplicitly,
369+
if test.Passes {
370+
require.NoError(t, err)
371+
} else {
372+
checkUnsafeErr(t, err)
373+
}
374+
})
375+
}
376+
})
386377
})
387-
defer s.Stopper().Stop(ctx)
388378

389-
// Test that with allow_unsafe_internals = false:
390-
// - Supported tables (zones) are allowed
391-
// - Unsupported tables (gossip_alerts) are denied
379+
t.Run("accessing the crdb_internal schema", func(t *testing.T) {
380+
t.Run("supported table allowed", func(t *testing.T) {
381+
conn := s.SQLConn(t)
382+
_, err := conn.Exec("SET allow_unsafe_internals = false")
383+
require.NoError(t, err)
392384

393-
t.Run("supported table allowed", func(t *testing.T) {
394-
conn := s.SQLConn(t)
395-
_, err := conn.Exec("SET allow_unsafe_internals = false")
396-
require.NoError(t, err)
385+
// Supported crdb_internal tables should be allowed even when allow_unsafe_internals = false
386+
_, err = conn.Query("SELECT * FROM crdb_internal.zones")
387+
require.NoError(t, err, "supported crdb_internal table (zones) should be accessible when allow_unsafe_internals = false")
388+
})
397389

398-
// Supported crdb_internal tables should be allowed even when allow_unsafe_internals = false
399-
_, err = conn.Query("SELECT * FROM crdb_internal.zones")
400-
require.NoError(t, err, "supported crdb_internal table (zones) should be accessible when allow_unsafe_internals = false")
390+
t.Run("unsupported table denied", func(t *testing.T) {
391+
conn := s.SQLConn(t)
392+
_, err := conn.Exec("SET allow_unsafe_internals = false")
393+
require.NoError(t, err)
394+
395+
// Unsupported crdb_internal tables should be denied when allow_unsafe_internals = false
396+
_, err = conn.Query("SELECT * FROM crdb_internal.gossip_alerts")
397+
checkUnsafeErr(t, err)
398+
})
401399
})
402400

403-
t.Run("unsupported table denied", func(t *testing.T) {
404-
conn := s.SQLConn(t)
405-
_, err := conn.Exec("SET allow_unsafe_internals = false")
406-
require.NoError(t, err)
401+
// The functionality for this lies in the pkg/sql/optbuilder/scalar.go
402+
// file, but it is tested here as that package does not setup a test
403+
// server.
404+
t.Run("accessing crdb_internal builtins", func(t *testing.T) {
405+
t.Run("non crdb_internal builtin allowed", func(t *testing.T) {
406+
conn := s.SQLConn(t)
407+
_, err := conn.Exec("SET allow_unsafe_internals = false")
408+
require.NoError(t, err)
409+
410+
// Non crdb_internal tables should be allowed.
411+
_, err = conn.Query("SELECT * FROM generate_series(1,5)")
412+
require.NoError(t, err)
413+
})
414+
415+
t.Run("crdb_internal builtin not allowed", func(t *testing.T) {
416+
conn := s.SQLConn(t)
417+
_, err := conn.Exec("SET allow_unsafe_internals = false")
418+
require.NoError(t, err)
407419

408-
// Unsupported crdb_internal tables should be denied when allow_unsafe_internals = false
409-
_, err = conn.Query("SELECT * FROM crdb_internal.gossip_alerts")
410-
checkUnsafeErr(t, err)
420+
// Unsupported crdb_internal builtins should be denied.
421+
_, err = conn.Query("SELECT * FROM crdb_internal.tenant_span_stats()")
422+
checkUnsafeErr(t, err)
423+
})
411424
})
412425
}

pkg/sql/catalog/lease/lease.go

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,7 +1267,7 @@ type Manager struct {
12671267
// that we have all the updates for.
12681268
closeTimestamp atomic.Value
12691269

1270-
draining atomic.Value
1270+
draining atomic.Bool
12711271

12721272
// names is a cache for name -> id mappings. A mapping for the cache
12731273
// should only be used if we currently have an active lease on the respective
@@ -1417,6 +1417,7 @@ func NewLeaseManager(
14171417
lm.storage.regionPrefix.Store(enum.One)
14181418
lm.storage.writer = newKVWriter(codec, db.KV(), keys.LeaseTableID, settingsWatcher)
14191419
lm.stopper.AddCloser(lm.sem.Closer("stopper"))
1420+
lm.stopper.AddCloser(stop.CloserFn(lm.AssertAllLeasesAreReleasedAfterDrain))
14201421
lm.mu.descriptors = make(map[descpb.ID]*descriptorState)
14211422
lm.waitForInit = make(chan struct{})
14221423
// We are going to start the range feed later when StartRefreshLeasesTask
@@ -1731,7 +1732,7 @@ func (m *Manager) removeOnceDereferenced() bool {
17311732

17321733
// IsDraining returns true if this node's lease manager is draining.
17331734
func (m *Manager) IsDraining() bool {
1734-
return m.draining.Load().(bool)
1735+
return m.draining.Load()
17351736
}
17361737

17371738
// SetDraining (when called with 'true') removes all inactive leases. Any leases
@@ -1742,10 +1743,7 @@ func (m *Manager) IsDraining() bool {
17421743
// been done by the time this call returns. See the explanation in
17431744
// pkg/server/drain.go for details.
17441745
func (m *Manager) SetDraining(
1745-
ctx context.Context,
1746-
drain bool,
1747-
reporter func(int, redact.SafeString),
1748-
assertOnLeakedDescriptor bool,
1746+
ctx context.Context, drain bool, reporter func(int, redact.SafeString),
17491747
) {
17501748
m.draining.Store(drain)
17511749
if !drain {
@@ -1759,15 +1757,6 @@ func (m *Manager) SetDraining(
17591757
t.mu.Lock()
17601758
defer t.mu.Unlock()
17611759
leasesToRelease := t.removeInactiveVersions(ctx)
1762-
// Ensure that all leases are released at this time. Ignore any descriptors
1763-
// that have acquisitions in progress.
1764-
if buildutil.CrdbTestBuild &&
1765-
assertOnLeakedDescriptor &&
1766-
len(t.mu.active.data) > 0 &&
1767-
t.mu.acquisitionsInProgress == 0 {
1768-
// Panic that a descriptor may have leaked.
1769-
panic(errors.AssertionFailedf("descriptor leak was detected for: %d (%s)", t.id, t.mu.active))
1770-
}
17711760
return leasesToRelease
17721761
}()
17731762
for _, l := range leases {
@@ -1780,6 +1769,45 @@ func (m *Manager) SetDraining(
17801769
}
17811770
}
17821771

1772+
// AssertAllLeasesAreReleasedAfterDrain asserts that all leases are released after
1773+
// draining.
1774+
func (m *Manager) AssertAllLeasesAreReleasedAfterDrain() {
1775+
if !buildutil.CrdbTestBuild || !m.draining.Load() {
1776+
return
1777+
}
1778+
m.mu.Lock()
1779+
defer m.mu.Unlock()
1780+
for _, t := range m.mu.descriptors {
1781+
func() {
1782+
descriptorStr := strings.Builder{}
1783+
panicWithErr := false
1784+
t.mu.Lock()
1785+
defer t.mu.Unlock()
1786+
// Ensure that all leases are released at this time.
1787+
if len(t.mu.active.data) > 0 {
1788+
// Check if any of these have a non-zero ref count indicating some type of
1789+
// leak. It may be possible for the entry to exist if we were interrupted
1790+
// mid-acquisition by the draining process. But the reference count should
1791+
// *never* be non-zero.
1792+
for _, l := range t.mu.active.data {
1793+
if l.refcount.Load() == 0 {
1794+
continue
1795+
}
1796+
panicWithErr = true
1797+
if descriptorStr.Len() > 0 {
1798+
descriptorStr.WriteString(",")
1799+
}
1800+
descriptorStr.WriteString(fmt.Sprintf("{%s}", l.String()))
1801+
}
1802+
if panicWithErr {
1803+
panic(errors.AssertionFailedf("descriptor leak was detected for ID: %d, "+
1804+
"with versions [%s]", t.id, descriptorStr.String()))
1805+
}
1806+
}
1807+
}()
1808+
}
1809+
}
1810+
17831811
// isDescriptorStateEmpty determines if a descriptor state exists and
17841812
// has any active versions inside it.
17851813
func (m *Manager) isDescriptorStateEmpty(id descpb.ID) bool {

pkg/sql/catalog/lease/lease_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,8 @@ func TestLeaseManagerDrain(testingT *testing.T) {
522522
// starts draining.
523523
l1RemovalTracker := leaseRemovalTracker.TrackRemoval(l1.Underlying())
524524

525-
t.node(1).SetDraining(ctx, true, nil /* reporter */, false)
526-
t.node(2).SetDraining(ctx, true, nil /* reporter */, false)
525+
t.node(1).SetDraining(ctx, true, nil /* reporter */)
526+
t.node(2).SetDraining(ctx, true, nil)
527527

528528
// Leases cannot be acquired when in draining mode.
529529
if _, err := t.acquire(1, descID); !testutils.IsError(err, "cannot acquire lease when draining") {
@@ -546,7 +546,7 @@ func TestLeaseManagerDrain(testingT *testing.T) {
546546
{
547547
// Check that leases with a refcount of 0 are correctly kept in the
548548
// store once the drain mode has been exited.
549-
t.node(1).SetDraining(ctx, false, nil /* reporter */, false /* assertOnLeakedDescriptor */)
549+
t.node(1).SetDraining(ctx, false, nil)
550550
l1 := t.mustAcquire(1, descID)
551551
t.mustRelease(1, l1, nil)
552552
t.expectLeases(descID, "/1/1")

pkg/sql/opt/optbuilder/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ go_library(
101101
"//pkg/sql/sqltelemetry",
102102
"//pkg/sql/syntheticprivilege",
103103
"//pkg/sql/types",
104+
"//pkg/sql/unsafesql",
104105
"//pkg/util",
105106
"//pkg/util/buildutil",
106107
"//pkg/util/errorutil",

0 commit comments

Comments
 (0)