Skip to content

Commit 1781fc1

Browse files
craig[bot]spilchen
andcommitted
Merge #158003
158003: sql/catalog/lease: skip dropped regions when counting leases r=spilchen a=spilchen countLeasesByRegion now skips regions that were just dropped instead of reporting “failed to count leases”. This was found in the TestMultiRegionTenantRegions test. We did have pre-existing logic to handle missing regions in handleRegionLivenessErrors, but it wasn't handled correctly by all the callers. Resolves: #157122 Release note: none Co-authored-by: Matt Spilchen <[email protected]>
2 parents 736723e + d40ce69 commit 1781fc1

File tree

4 files changed

+56
-11
lines changed

4 files changed

+56
-11
lines changed

pkg/sql/catalog/lease/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ go_test(
7777
name = "lease_test",
7878
size = "large",
7979
srcs = [
80+
"count_test.go",
8081
"helpers_test.go",
8182
"ie_writer_test.go",
8283
"kv_writer_test.go",

pkg/sql/catalog/lease/count.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,13 @@ func countLeasesByRegion(
208208
} else {
209209
err = queryRegionRows(ctx)
210210
}
211-
if err := handleRegionLivenessErrors(ctx, prober, region, err); err != nil {
211+
skipRegion, err := handleRegionLivenessErrors(ctx, prober, region, err)
212+
if err != nil {
212213
return err
213214
}
215+
if skipRegion {
216+
return nil
217+
}
214218
if values == nil {
215219
return errors.New("failed to count leases")
216220
}
@@ -231,27 +235,27 @@ func getCountLeaseColumns() string {
231235
}
232236

233237
// handleRegionLivenessErrors handles errors that are linked to region liveness
234-
// timeouts.
238+
// timeouts. Return true if the region should be skipped.
235239
func handleRegionLivenessErrors(
236240
ctx context.Context, prober regionliveness.Prober, region string, err error,
237-
) error {
241+
) (bool, error) {
238242
if err != nil {
239243
if regionliveness.IsQueryTimeoutErr(err) {
240244
// Probe and mark the region potentially.
241245
probeErr := prober.ProbeLiveness(ctx, region)
242246
if probeErr != nil {
243247
err = errors.WithSecondaryError(err, probeErr)
244-
return err
248+
return false, err
245249
}
246-
return errors.Wrapf(err, "count-lease timed out reading from a region")
250+
return false, errors.Wrapf(err, "count-lease timed out reading from a region")
247251
} else if regionliveness.IsMissingRegionEnumErr(err) {
248252
// Skip this region because we were unable to find region in
249253
// type descriptor. Since the database regions are cached, they
250254
// may be stale and have dropped regions.
251255
log.Dev.Infof(ctx, "count-lease skipping region %s due to error: %v", region, err)
252-
return nil
256+
return true, nil
253257
}
254-
return err
258+
return false, err
255259
}
256-
return err
260+
return false, err
257261
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package lease
7+
8+
import (
9+
"context"
10+
"testing"
11+
12+
"github.com/cockroachdb/cockroach/pkg/sql/types"
13+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
14+
"github.com/cockroachdb/cockroach/pkg/util/log"
15+
"github.com/stretchr/testify/require"
16+
)
17+
18+
func TestHandleRegionLivenessErrorsSkipsMissingRegion(t *testing.T) {
19+
defer leaktest.AfterTest(t)()
20+
defer log.Scope(t).Close(t)
21+
22+
ctx := context.Background()
23+
skip, err := handleRegionLivenessErrors(ctx, nil /* prober */, "us-east2", types.EnumValueNotYetPublicError)
24+
require.NoError(t, err)
25+
require.True(t, skip)
26+
}

pkg/sql/catalog/lease/lease.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,11 @@ func (m *Manager) WaitForInitialVersion(
425425
sessionIDs, err = getSessionsHoldingDescriptor(ctx, txn, schemaID, region)
426426
}
427427
if err != nil {
428-
return handleRegionLivenessErrors(ctx, prober, region, err)
428+
skipRegion, handledErr := handleRegionLivenessErrors(ctx, prober, region, err)
429+
if skipRegion {
430+
return nil
431+
}
432+
return handledErr
429433
}
430434
sessionsPerRegion[region] = sessionIDs
431435
expectedSessions += len(sessionIDs)
@@ -469,9 +473,13 @@ func (m *Manager) WaitForInitialVersion(
469473
} else {
470474
regionCount, err = countDescriptorsHeldBySessionIDs(ctx, txn, descIDsForSchema, region, sessionIDs)
471475
}
472-
if err := handleRegionLivenessErrors(ctx, prober, region, err); err != nil {
476+
skipRegion, err := handleRegionLivenessErrors(ctx, prober, region, err)
477+
if err != nil {
473478
return err
474479
}
480+
if skipRegion {
481+
return nil
482+
}
475483
count += regionCount
476484
return nil
477485
})
@@ -620,7 +628,13 @@ func (m *Manager) WaitForNewVersion(
620628
regionStaleSessionCount, err = countSessionsHoldingStaleDescriptor(ctx, txn, desc, region)
621629
}
622630
if err != nil {
623-
return handleRegionLivenessErrors(ctx, prober, region, err)
631+
skipRegion, handledErr := handleRegionLivenessErrors(ctx, prober, region, err)
632+
if handledErr != nil {
633+
return handledErr
634+
}
635+
if skipRegion {
636+
continue
637+
}
624638
}
625639

626640
staleSessionCount += regionStaleSessionCount

0 commit comments

Comments
 (0)