Skip to content

Commit d40ce69

Browse files
committed
sql/catalog/lease: skip dropped regions when counting leases
countLeasesByRegion now skips regions that were just dropped instead of reporting “failed to count leases”. This was found in the TestMultiRegionTenantRegions test. We did have pre-existing logic to handle missing regions in handleRegionLivenessErrors, but it wasn't handled correctly by all the callers. Resolves: #157122 Release note: none
1 parent 55edba2 commit d40ce69

File tree

4 files changed

+56
-11
lines changed

4 files changed

+56
-11
lines changed

pkg/sql/catalog/lease/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ go_test(
7777
name = "lease_test",
7878
size = "large",
7979
srcs = [
80+
"count_test.go",
8081
"helpers_test.go",
8182
"ie_writer_test.go",
8283
"kv_writer_test.go",

pkg/sql/catalog/lease/count.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,13 @@ func countLeasesByRegion(
208208
} else {
209209
err = queryRegionRows(ctx)
210210
}
211-
if err := handleRegionLivenessErrors(ctx, prober, region, err); err != nil {
211+
skipRegion, err := handleRegionLivenessErrors(ctx, prober, region, err)
212+
if err != nil {
212213
return err
213214
}
215+
if skipRegion {
216+
return nil
217+
}
214218
if values == nil {
215219
return errors.New("failed to count leases")
216220
}
@@ -231,27 +235,27 @@ func getCountLeaseColumns() string {
231235
}
232236

233237
// handleRegionLivenessErrors handles errors that are linked to region liveness
234-
// timeouts.
238+
// timeouts. Return true if the region should be skipped.
235239
func handleRegionLivenessErrors(
236240
ctx context.Context, prober regionliveness.Prober, region string, err error,
237-
) error {
241+
) (bool, error) {
238242
if err != nil {
239243
if regionliveness.IsQueryTimeoutErr(err) {
240244
// Probe and mark the region potentially.
241245
probeErr := prober.ProbeLiveness(ctx, region)
242246
if probeErr != nil {
243247
err = errors.WithSecondaryError(err, probeErr)
244-
return err
248+
return false, err
245249
}
246-
return errors.Wrapf(err, "count-lease timed out reading from a region")
250+
return false, errors.Wrapf(err, "count-lease timed out reading from a region")
247251
} else if regionliveness.IsMissingRegionEnumErr(err) {
248252
// Skip this region because we were unable to find region in
249253
// type descriptor. Since the database regions are cached, they
250254
// may be stale and have dropped regions.
251255
log.Dev.Infof(ctx, "count-lease skipping region %s due to error: %v", region, err)
252-
return nil
256+
return true, nil
253257
}
254-
return err
258+
return false, err
255259
}
256-
return err
260+
return false, err
257261
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package lease
7+
8+
import (
9+
"context"
10+
"testing"
11+
12+
"github.com/cockroachdb/cockroach/pkg/sql/types"
13+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
14+
"github.com/cockroachdb/cockroach/pkg/util/log"
15+
"github.com/stretchr/testify/require"
16+
)
17+
18+
func TestHandleRegionLivenessErrorsSkipsMissingRegion(t *testing.T) {
19+
defer leaktest.AfterTest(t)()
20+
defer log.Scope(t).Close(t)
21+
22+
ctx := context.Background()
23+
skip, err := handleRegionLivenessErrors(ctx, nil /* prober */, "us-east2", types.EnumValueNotYetPublicError)
24+
require.NoError(t, err)
25+
require.True(t, skip)
26+
}

pkg/sql/catalog/lease/lease.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,11 @@ func (m *Manager) WaitForInitialVersion(
425425
sessionIDs, err = getSessionsHoldingDescriptor(ctx, txn, schemaID, region)
426426
}
427427
if err != nil {
428-
return handleRegionLivenessErrors(ctx, prober, region, err)
428+
skipRegion, handledErr := handleRegionLivenessErrors(ctx, prober, region, err)
429+
if skipRegion {
430+
return nil
431+
}
432+
return handledErr
429433
}
430434
sessionsPerRegion[region] = sessionIDs
431435
expectedSessions += len(sessionIDs)
@@ -469,9 +473,13 @@ func (m *Manager) WaitForInitialVersion(
469473
} else {
470474
regionCount, err = countDescriptorsHeldBySessionIDs(ctx, txn, descIDsForSchema, region, sessionIDs)
471475
}
472-
if err := handleRegionLivenessErrors(ctx, prober, region, err); err != nil {
476+
skipRegion, err := handleRegionLivenessErrors(ctx, prober, region, err)
477+
if err != nil {
473478
return err
474479
}
480+
if skipRegion {
481+
return nil
482+
}
475483
count += regionCount
476484
return nil
477485
})
@@ -620,7 +628,13 @@ func (m *Manager) WaitForNewVersion(
620628
regionStaleSessionCount, err = countSessionsHoldingStaleDescriptor(ctx, txn, desc, region)
621629
}
622630
if err != nil {
623-
return handleRegionLivenessErrors(ctx, prober, region, err)
631+
skipRegion, handledErr := handleRegionLivenessErrors(ctx, prober, region, err)
632+
if handledErr != nil {
633+
return handledErr
634+
}
635+
if skipRegion {
636+
continue
637+
}
624638
}
625639

626640
staleSessionCount += regionStaleSessionCount

0 commit comments

Comments
 (0)