Skip to content

Commit 2f88a29

Browse files
craig[bot]jeffswensonfqaziRaduBerindeasg0451
committed
154684: backup: split up the multiregion datadriven test r=jeffswenson a=jeffswenson This splits up the multiregion datadriven test so that each test has at most 2 clusters in it. We've been seeing some stuck server shutdowns and this should make them easier to troubleshoot. Release note: none Informs: #145079 154687: backup: improve datadriven test cleanup r=jeffswenson a=jeffswenson Previously, the datadriven test harness would tear down clusters in order. This makes it difficult to troubleshoot stuck tear downs because there are goroutines for a running server mixed in with goroutines for a server with a stuck shutdown. Release note: none Informs: #145079 154752: sql/schemachanger: fix incorrect filter for pk index swaps r=fqazi a=fqazi Previously, we adjusted the schema changer rules to ensure that old secondary indexes are only dropped when the new secondary index is usable. Unfortunately, one of the rules had an incorrect filter. To address this, this patch fixes the filter to expect the new secondary index, which will have the new flag. Additionally, the index recreation logic for ALTER PRIMARY KEY was delaying when the new secondary indexes could be made public. Fixes: #154751 Release note: None 154867: go.mod: bump datadriven r=RaduBerinde a=RaduBerinde Bump datadriven to incorporate a fix (cockroachdb/datadriven#60). Epic: none Release note: None 154870: changefeedccl: make bulk delivery of rangefeed events optional r=aerfrei a=asg0451 This is a temporary opt-out until we can properly test the performance impact of bulk delivery. Epic: none Release note (general change): The changefeed bulk delivery setting was made optional. Co-authored-by: Jeff Swenson <[email protected]> Co-authored-by: Faizan Qazi <[email protected]> Co-authored-by: Radu Berinde <[email protected]> Co-authored-by: Miles Frankel <[email protected]>
6 parents e7cb6d4 + 0599a96 + 7170345 + 3e026ce + 10c45ca + de7c778 commit 2f88a29

File tree

107 files changed

+3946
-2944
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+3946
-2944
lines changed

DEPS.bzl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,10 +1764,10 @@ def go_deps():
17641764
name = "com_github_cockroachdb_datadriven",
17651765
build_file_proto_mode = "disable_global",
17661766
importpath = "github.com/cockroachdb/datadriven",
1767-
sha256 = "417027b5ff27774000129768f79e9ae62021b95d3ac9d3181e132dbe46b44da3",
1768-
strip_prefix = "github.com/cockroachdb/[email protected].20250911232732-d959cf14706c",
1767+
sha256 = "a7ffcef0b264d9c28c36b2f9b737ff739542f472d7614938ae507e2da269f6c2",
1768+
strip_prefix = "github.com/cockroachdb/[email protected].20251006155849-f84f9e519edd",
17691769
urls = [
1770-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/datadriven/com_github_cockroachdb_datadriven-v1.0.3-0.20250911232732-d959cf14706c.zip",
1770+
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/datadriven/com_github_cockroachdb_datadriven-v1.0.3-0.20251006155849-f84f9e519edd.zip",
17711771
],
17721772
)
17731773
go_repository(

build/bazelutil/distdir_files.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ DISTDIR_FILES = {
347347
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/cockroach-go/v2/com_github_cockroachdb_cockroach_go_v2-v2.4.1.zip": "ba646db91152f3121a6812c7b74d12d8c0e126f7b4d3b927618b159692ceb424",
348348
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/crlfmt/com_github_cockroachdb_crlfmt-v0.0.0-20221214225007-b2fc5c302548.zip": "fedc01bdd6d964da0425d5eaac8efadc951e78e13f102292cc0774197f09ab63",
349349
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/crlib/com_github_cockroachdb_crlib-v0.0.0-20251001180057-2a49e1873587.zip": "539dd737ca1da53ee5c296ea0f3aad92f6d59f577e0d169dffb5a4ad706e1728",
350-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/datadriven/com_github_cockroachdb_datadriven-v1.0.3-0.20250911232732-d959cf14706c.zip": "417027b5ff27774000129768f79e9ae62021b95d3ac9d3181e132dbe46b44da3",
350+
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/datadriven/com_github_cockroachdb_datadriven-v1.0.3-0.20251006155849-f84f9e519edd.zip": "a7ffcef0b264d9c28c36b2f9b737ff739542f472d7614938ae507e2da269f6c2",
351351
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/drpc/com_github_cockroachdb_drpc-v0.0.0-20250924114114-78d4e121902a.zip": "98b44a51f82873f93f77da80230212ab40f35044e8d38645cb1392ae03462f0b",
352352
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/errors/com_github_cockroachdb_errors-v1.12.0.zip": "f73d8a5f4d8fcbc4ed61db2b47f17e2601d8b32e9a49c0665667489d9d9d6e7c",
353353
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/go-test-teamcity/com_github_cockroachdb_go_test_teamcity-v0.0.0-20191211140407-cff980ad0a55.zip": "bac30148e525b79d004da84d16453ddd2d5cd20528e9187f1d7dac708335674b",

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ require (
137137
github.com/cockroachdb/cockroach-go/v2 v2.4.1
138138
github.com/cockroachdb/crlfmt v0.0.0-20221214225007-b2fc5c302548
139139
github.com/cockroachdb/crlib v0.0.0-20251001180057-2a49e1873587
140-
github.com/cockroachdb/datadriven v1.0.3-0.20250911232732-d959cf14706c
140+
github.com/cockroachdb/datadriven v1.0.3-0.20251006155849-f84f9e519edd
141141
github.com/cockroachdb/errors v1.12.0
142142
github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55
143143
github.com/cockroachdb/gostdlib v1.19.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,8 @@ github.com/cockroachdb/crlib v0.0.0-20251001180057-2a49e1873587 h1:qjG2TrBrPbGRV
559559
github.com/cockroachdb/crlib v0.0.0-20251001180057-2a49e1873587/go.mod h1:ae57yNis2F1FThSNdPdoXfiPOVi8G1TLreCBQYPOdqo=
560560
github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8=
561561
github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU=
562-
github.com/cockroachdb/datadriven v1.0.3-0.20250911232732-d959cf14706c h1:a0m7gmtv2mzJQ4wP9BkxCmJAnjZ7fsvCi2IORGD1als=
563-
github.com/cockroachdb/datadriven v1.0.3-0.20250911232732-d959cf14706c/go.mod h1:jsaKMvD3RBCATk1/jbUZM8C9idWBJME9+VRZ5+Liq1g=
562+
github.com/cockroachdb/datadriven v1.0.3-0.20251006155849-f84f9e519edd h1:vpWCe7VvdQbQ/9wGtlH3i+Oj+9OggKci3lsASL1ydvg=
563+
github.com/cockroachdb/datadriven v1.0.3-0.20251006155849-f84f9e519edd/go.mod h1:jsaKMvD3RBCATk1/jbUZM8C9idWBJME9+VRZ5+Liq1g=
564564
github.com/cockroachdb/drpc v0.0.0-20250924114114-78d4e121902a h1:zXCfk52Hpu2IoejmDm4Bkxmb5Nh9vxwaYOCiqA6f3YA=
565565
github.com/cockroachdb/drpc v0.0.0-20250924114114-78d4e121902a/go.mod h1:Ag2/Yfl22WZ8ywFUasRQ2brdltpX5QvY63jnYTZ3N5U=
566566
github.com/cockroachdb/errors v1.12.0 h1:d7oCs6vuIMUQRVbi6jWWWEJZahLCfJpnJSVobd1/sUo=

pkg/backup/datadriven_test.go

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
4646
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
4747
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
48+
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
4849
"github.com/cockroachdb/datadriven"
4950
"github.com/cockroachdb/errors"
5051
"github.com/lib/pq"
@@ -91,10 +92,9 @@ type sqlDBKey struct {
9192

9293
type datadrivenTestState struct {
9394
// cluster maps the user defined cluster name to its cluster
94-
clusters map[string]serverutils.TestClusterInterface
95+
clusters map[string]serverutils.TestClusterInterface
96+
clusterCleanup []func()
9597

96-
// firstNode maps the cluster name to the first node in the cluster
97-
firstNode map[string]serverutils.TestServerInterface
9898
dataDirs map[string]string
9999
sqlDBs map[sqlDBKey]*gosql.DB
100100
jobTags map[string]jobspb.JobID
@@ -107,7 +107,6 @@ type datadrivenTestState struct {
107107
func newDatadrivenTestState() datadrivenTestState {
108108
return datadrivenTestState{
109109
clusters: make(map[string]serverutils.TestClusterInterface),
110-
firstNode: make(map[string]serverutils.TestServerInterface),
111110
dataDirs: make(map[string]string),
112111
sqlDBs: make(map[sqlDBKey]*gosql.DB),
113112
jobTags: make(map[string]jobspb.JobID),
@@ -120,16 +119,27 @@ func (d *datadrivenTestState) cleanup(ctx context.Context, t *testing.T) {
120119
// While the testCluster cleanupFns would close the dbConn and clusters, close
121120
// them manually to ensure all queries finish on tests that share these
122121
// resources.
122+
for _, f := range d.cleanupFns {
123+
f()
124+
}
125+
123126
for _, db := range d.sqlDBs {
124127
backuptestutils.CheckForInvalidDescriptors(t, db)
125128
db.Close()
126129
}
127-
for _, s := range d.firstNode {
128-
s.Stopper().Stop(ctx)
129-
}
130-
for _, f := range d.cleanupFns {
131-
f()
130+
131+
// Clean up the clusters in parallel so that if one gets stuck we don't see
132+
// goroutines for running clusters mixed in with goroutines for the stuck
133+
// cluster.
134+
group := ctxgroup.WithContext(ctx)
135+
for _, cleanup := range d.clusterCleanup {
136+
group.Go(func() error {
137+
cleanup()
138+
return nil
139+
})
132140
}
141+
require.NoError(t, group.Wait())
142+
133143
d.noticeBuffer = nil
134144
}
135145

@@ -223,9 +233,8 @@ func (d *datadrivenTestState) addCluster(t *testing.T, cfg clusterCfg) error {
223233
var cleanup func()
224234
tc, _, cfg.iodir, cleanup = backuptestutils.StartBackupRestoreTestCluster(t, clusterSize, opts...)
225235
d.clusters[cfg.name] = tc
226-
d.firstNode[cfg.name] = tc.Server(0)
227236
d.dataDirs[cfg.name] = cfg.iodir
228-
d.cleanupFns = append(d.cleanupFns, cleanup)
237+
d.clusterCleanup = append(d.clusterCleanup, cleanup)
229238

230239
return nil
231240
}
@@ -262,15 +271,15 @@ func (d *datadrivenTestState) getSQLDBForVC(
262271
serverutils.User(user),
263272
}
264273

265-
s := d.firstNode[name].ApplicationLayer()
274+
s := d.clusters[name].ApplicationLayer(0)
266275
switch vc {
267276
case "default":
268277
// Nothing to do.
269278
case "system":
270279
// We use the system layer since in the case of
271280
// external SQL server's the application layer can't
272281
// route to the system tenant.
273-
s = d.firstNode[name].SystemLayer()
282+
s = d.clusters[name].SystemLayer(0)
274283
default:
275284
opts = append(opts, serverutils.DBName("cluster:"+vc))
276285
}
@@ -956,7 +965,7 @@ func runTestDataDriven(t *testing.T, testFilePathFromWorkspace string) {
956965
return ""
957966

958967
case "create-dummy-system-table":
959-
al := ds.firstNode[lastCreatedCluster].ApplicationLayer()
968+
al := ds.clusters[lastCreatedCluster].ApplicationLayer(0)
960969
db := al.DB()
961970
execCfg := al.ExecutorConfig().(sql.ExecutorConfig)
962971
codec := execCfg.Codec
@@ -1037,7 +1046,7 @@ func handleKVRequest(
10371046
},
10381047
UseRangeTombstone: true,
10391048
}
1040-
if _, err := kv.SendWrapped(ctx, ds.firstNode[cluster].SystemLayer().DistSenderI().(*kvcoord.DistSender), &dr); err != nil {
1049+
if _, err := kv.SendWrapped(ctx, ds.clusters[cluster].SystemLayer(0).DistSenderI().(*kvcoord.DistSender), &dr); err != nil {
10411050
t.Fatal(err)
10421051
}
10431052
} else {
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# disabled to run within tenant because multiregion primitives are not supported within tenant
2+
3+
skip-under-duress
4+
----
5+
6+
new-cluster name=s1 allow-implicit-access disable-tenant localities=us-east-1,us-west-1,eu-central-1
7+
----
8+
9+
set-cluster-setting setting=sql.multiregion.system_database_multiregion.enabled value=true
10+
----
11+
12+
exec-sql
13+
ALTER DATABASE system SET PRIMARY REGION "us-east-1";
14+
CREATE DATABASE d PRIMARY REGION "us-east-1" REGIONS "us-west-1", "eu-central-1" SURVIVE REGION FAILURE;
15+
CREATE TABLE d.t (x INT);
16+
INSERT INTO d.t VALUES (1), (2), (3);
17+
----
18+
19+
query-sql
20+
SELECT region FROM [SHOW REGIONS FROM DATABASE d] ORDER BY 1;
21+
----
22+
eu-central-1
23+
us-east-1
24+
us-west-1
25+
26+
exec-sql
27+
BACKUP DATABASE d INTO 'nodelocal://1/database_backup/';
28+
----
29+
30+
exec-sql
31+
BACKUP INTO 'nodelocal://1/full_cluster_backup/';
32+
----
33+
34+
# A new cluster with different localities settings.
35+
new-cluster name=s3 share-io-dir=s1 allow-implicit-access disable-tenant localities=eu-central-1,eu-north-1
36+
----
37+
38+
exec-sql
39+
RESTORE DATABASE d FROM LATEST IN 'nodelocal://1/database_backup/';
40+
----
41+
pq: detected a mismatch in regions between the restore cluster and the backup cluster, missing regions detected: us-east-1, us-west-1.
42+
HINT: there are two ways you can resolve this issue: 1) update the cluster to which you're restoring to ensure that the regions present on the nodes' --locality flags match those present in the backup image, or 2) restore with the "skip_localities_check" option
43+
44+
exec-sql
45+
RESTORE FROM LATEST IN 'nodelocal://1/full_cluster_backup/';
46+
----
47+
pq: detected a mismatch in regions between the restore cluster and the backup cluster, missing regions detected: us-east-1, us-west-1.
48+
HINT: there are two ways you can resolve this issue: 1) update the cluster to which you're restoring to ensure that the regions present on the nodes' --locality flags match those present in the backup image, or 2) restore with the "skip_localities_check" option
49+
50+
exec-sql
51+
RESTORE FROM LATEST IN 'nodelocal://1/full_cluster_backup/' WITH skip_localities_check;
52+
----
53+
54+
exec-sql
55+
INSERT INTO d.t VALUES (4);
56+
----
57+
58+
exec-sql
59+
SET enable_multiregion_placement_policy='true';
60+
ALTER DATABASE d SURVIVE ZONE FAILURE;
61+
ALTER DATABASE d PLACEMENT RESTRICTED;
62+
ALTER DATABASE d SET PRIMARY REGION 'eu-central-1';
63+
ALTER DATABASE d DROP REGION 'us-east-1';
64+
ALTER DATABASE d DROP REGION 'us-west-1';
65+
ALTER DATABASE d ADD REGION 'eu-north-1';
66+
ALTER DATABASE d SET SECONDARY REGION 'eu-north-1';
67+
----
68+
69+
exec-sql
70+
RESTORE DATABASE d FROM LATEST IN 'nodelocal://1/database_backup/' WITH skip_localities_check, new_db_name='d_new';
71+
----
72+
73+
exec-sql
74+
INSERT INTO d_new.t VALUES (4);
75+
----
76+
77+
exec-sql
78+
SET enable_multiregion_placement_policy='true';
79+
ALTER DATABASE d_new SURVIVE ZONE FAILURE;
80+
ALTER DATABASE d PLACEMENT RESTRICTED;
81+
ALTER DATABASE d_new SET PRIMARY REGION 'eu-central-1';
82+
ALTER DATABASE d_new DROP REGION 'us-east-1';
83+
ALTER DATABASE d_new DROP REGION 'us-west-1';
84+
ALTER DATABASE d_new ADD REGION 'eu-north-1';
85+
ALTER DATABASE d_new SET SECONDARY REGION 'eu-north-1';
86+
----
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# disabled to run within tenant because multiregion primitives are not supported within tenant
2+
3+
skip-under-duress
4+
----
5+
6+
new-cluster name=s1 allow-implicit-access disable-tenant localities=us-east-1,us-west-1,eu-central-1
7+
----
8+
9+
set-cluster-setting setting=sql.multiregion.system_database_multiregion.enabled value=true
10+
----
11+
12+
exec-sql
13+
ALTER DATABASE system SET PRIMARY REGION "us-east-1";
14+
CREATE DATABASE d PRIMARY REGION "us-east-1" REGIONS "us-west-1", "eu-central-1" SURVIVE REGION FAILURE;
15+
CREATE TABLE d.t (x INT);
16+
INSERT INTO d.t VALUES (1), (2), (3);
17+
----
18+
19+
query-sql
20+
SELECT region FROM [SHOW REGIONS FROM DATABASE d] ORDER BY 1;
21+
----
22+
eu-central-1
23+
us-east-1
24+
us-west-1
25+
26+
exec-sql
27+
BACKUP DATABASE d INTO 'nodelocal://1/database_backup/';
28+
----
29+
30+
exec-sql
31+
BACKUP INTO 'nodelocal://1/full_cluster_backup/';
32+
----
33+
34+
# A new cluster with the same locality settings.
35+
new-cluster name=s2 share-io-dir=s1 allow-implicit-access disable-tenant localities=us-east-1,us-west-1,eu-central-1
36+
----
37+
38+
exec-sql
39+
RESTORE FROM LATEST IN 'nodelocal://1/full_cluster_backup/';
40+
----
41+
42+
exec-sql
43+
DROP DATABASE d;
44+
----
45+
46+
exec-sql
47+
RESTORE DATABASE d FROM LATEST IN 'nodelocal://1/database_backup/';
48+
----
49+
50+
query-sql
51+
SHOW DATABASES;
52+
----
53+
d root us-east-1 {eu-central-1,us-east-1,us-west-1} region
54+
data root <nil> <nil> {} <nil>
55+
defaultdb root <nil> <nil> {} <nil>
56+
postgres root <nil> <nil> {} <nil>
57+
system node <nil> <nil> {} <nil>

0 commit comments

Comments
 (0)