Skip to content

Commit cc94980

Browse files
craig[bot]jeffswensonmsbutler
committed
147774: conflict: create conflict workload for testing LDR r=jeffswenson a=jeffswenson This creates a conflict workload and roachtest for stress testing LDR with random schemas. The `conflict` workload accepts connections for two independent clusters. For each randomly generated row, mutated versions of the row are inserted into the peer clusters at the same time. The test validates that the two clusters eventually converge and there are no entries in the DLQ. Release note: none Part of: #148386 153890: backup: add deprecation warning for incremental_location r=dt a=msbutler Epic: none Release note (ops change): the incremental_location option is now deprecated and will be removed in a future release. This feature was added so customers could define different TTL policies for incremental backups vs full backups. Users can still do this since incremental backups are by default stored in a distinct directory relative to full backups ({collection_root}/incrementals). Co-authored-by: Jeff Swenson <[email protected]> Co-authored-by: Michael Butler <[email protected]>
3 parents 84fd608 + e47879d + eb46297 commit cc94980

File tree

23 files changed

+919
-129
lines changed

23 files changed

+919
-129
lines changed

pkg/BUILD.bazel

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ ALL_TESTS = [
177177
"//pkg/config/zonepb:zonepb_test",
178178
"//pkg/config:config_disallowed_imports_test",
179179
"//pkg/config:config_test",
180+
"//pkg/crosscluster/ldrrandgen:ldrrandgen_test",
180181
"//pkg/crosscluster/logical:logical_disallowed_imports_test",
181182
"//pkg/crosscluster/logical:logical_test",
182183
"//pkg/crosscluster/physical:physical_test",
@@ -848,6 +849,7 @@ ALL_TESTS = [
848849
"//pkg/util:util_test",
849850
"//pkg/workload/bank:bank_test",
850851
"//pkg/workload/cli:cli_test",
852+
"//pkg/workload/conflict:conflict_test",
851853
"//pkg/workload/faker:faker_test",
852854
"//pkg/workload/histogram/exporter:exporter_test",
853855
"//pkg/workload/histogram:histogram_test",
@@ -1354,6 +1356,8 @@ GO_TARGETS = [
13541356
"//pkg/config/zonepb:zonepb_test",
13551357
"//pkg/config:config",
13561358
"//pkg/config:config_test",
1359+
"//pkg/crosscluster/ldrrandgen:ldrrandgen",
1360+
"//pkg/crosscluster/ldrrandgen:ldrrandgen_test",
13571361
"//pkg/crosscluster/logical:logical",
13581362
"//pkg/crosscluster/logical:logical_test",
13591363
"//pkg/crosscluster/physical:physical",
@@ -2863,6 +2867,8 @@ GO_TARGETS = [
28632867
"//pkg/workload/bulkingest:bulkingest",
28642868
"//pkg/workload/cli:cli",
28652869
"//pkg/workload/cli:cli_test",
2870+
"//pkg/workload/conflict:conflict",
2871+
"//pkg/workload/conflict:conflict_test",
28662872
"//pkg/workload/connectionlatency:connectionlatency",
28672873
"//pkg/workload/debug:debug",
28682874
"//pkg/workload/examples:examples",

pkg/backup/backup_planning.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ const (
4949
deprecatedPrivilegesRestorePreamble = "The existing privileges are being deprecated " +
5050
"in favour of a fine-grained privilege model explained here " +
5151
"https://www.cockroachlabs.com/docs/stable/restore.html#required-privileges. In a future release, to run"
52+
53+
deprecatedIncrementalLocationMessage = "the incremental_location option is deprecated and will be removed in a future release"
5254
)
5355

5456
type tableAndIndex struct {
@@ -442,6 +444,10 @@ func backupPlanHook(
442444
return nil, nil, false, err
443445
}
444446

447+
if len(incrementalStorage) > 0 {
448+
p.BufferClientNotice(ctx, pgnotice.Newf(deprecatedIncrementalLocationMessage))
449+
}
450+
445451
var revisionHistory bool
446452
if backupStmt.Options.CaptureRevisionHistory != nil {
447453
revisionHistory, err = exprEval.Bool(

pkg/backup/create_scheduled_backup.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ func doCreateBackupSchedules(
339339

340340
var incDests []string
341341
if eval.incrementalStorage != nil {
342+
p.BufferClientNotice(ctx, pgnotice.Newf(deprecatedIncrementalLocationMessage))
342343
incDests = eval.incrementalStorage
343344
for _, incDest := range incDests {
344345
backupNode.Options.IncrementalStorage = append(backupNode.Options.IncrementalStorage, tree.NewStrVal(incDest))

pkg/backup/testdata/backup-restore/alter-schedule/backup-options

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ exec-sql
7474
alter backup schedule $incID set with encryption_passphrase = '';
7575
alter backup schedule $incID set with kms = ('aws:///key1?region=r1', 'aws:///key2?region=r2'), set with incremental_location = 'inc';
7676
----
77+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
7778

7879
query-sql
7980
with schedules as (show schedules for backup) select command from schedules where label='datatest' order by backup_type asc;

pkg/backup/testdata/backup-restore/encrypted-backups

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ BACKUP INTO 'nodelocal://1/full' WITH encryption_passphrase='123';
1616
exec-sql
1717
BACKUP INTO 'nodelocal://1/full2' WITH encryption_passphrase='456', incremental_location='nodelocal://1/inc';
1818
----
19+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
1920

2021
exec-sql
2122
BACKUP INTO 'nodelocal://1/full3' WITH kms='testkms:///cmk?AUTH=implicit';
@@ -92,6 +93,7 @@ BACKUP INTO LATEST IN 'nodelocal://1/full' WITH encryption_passphrase='123';
9293
exec-sql
9394
BACKUP INTO LATEST IN 'nodelocal://1/full2' WITH encryption_passphrase='456', incremental_location='nodelocal://1/inc';
9495
----
96+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
9597

9698
exec-sql
9799
BACKUP INTO LATEST IN 'nodelocal://1/full3' WITH kms='testkms:///cmk?AUTH=implicit';

pkg/backup/testdata/backup-restore/external-connections-nodelocal

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ BACKUP DATABASE d INTO 'external://full';
178178
exec-sql
179179
BACKUP DATABASE d INTO LATEST IN 'external://full' WITH incremental_location = 'external://inc';
180180
----
181+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
181182

182183
query-sql
183184
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN 'external://full' WITH
@@ -203,6 +204,7 @@ BACKUP DATABASE d INTO 'external://full/nested';
203204
exec-sql
204205
BACKUP DATABASE d INTO LATEST IN 'external://full/nested' WITH incremental_location = 'external://inc/nested';
205206
----
207+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
206208

207209
query-sql
208210
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN 'external://full/nested'

pkg/backup/testdata/backup-restore/external-connections-userfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ BACKUP DATABASE d INTO 'external://full';
157157
exec-sql
158158
BACKUP DATABASE d INTO LATEST IN 'external://full' WITH incremental_location = 'external://inc';
159159
----
160+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
160161

161162
query-sql
162163
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN 'external://full' WITH
@@ -182,6 +183,7 @@ BACKUP DATABASE d INTO 'external://full/nested';
182183
exec-sql
183184
BACKUP DATABASE d INTO LATEST IN 'external://full/nested' WITH incremental_location = 'external://inc/nested';
184185
----
186+
NOTICE: the incremental_location option is deprecated and will be removed in a future release
185187

186188
query-sql
187189
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN 'external://full/nested'

pkg/ccl/workloadccl/allccl/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ go_library(
99
"//pkg/ccl/workloadccl/roachmartccl",
1010
"//pkg/workload/bank",
1111
"//pkg/workload/bulkingest",
12+
"//pkg/workload/conflict",
1213
"//pkg/workload/connectionlatency",
1314
"//pkg/workload/debug",
1415
"//pkg/workload/examples",

pkg/ccl/workloadccl/allccl/all.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
_ "github.com/cockroachdb/cockroach/pkg/ccl/workloadccl/roachmartccl"
1414
_ "github.com/cockroachdb/cockroach/pkg/workload/bank"
1515
_ "github.com/cockroachdb/cockroach/pkg/workload/bulkingest"
16+
_ "github.com/cockroachdb/cockroach/pkg/workload/conflict"
1617
_ "github.com/cockroachdb/cockroach/pkg/workload/connectionlatency"
1718
_ "github.com/cockroachdb/cockroach/pkg/workload/debug"
1819
_ "github.com/cockroachdb/cockroach/pkg/workload/examples"

pkg/cmd/roachtest/tests/logical_data_replication.go

Lines changed: 94 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -216,18 +216,35 @@ func registerLogicalDataReplicationTests(r registry.Registry) {
216216
},
217217
run: TestLDRCreateTablesTPCC,
218218
},
219+
{
220+
name: "ldr/conflict",
221+
clusterSpec: multiClusterSpec{
222+
leftNodes: 3,
223+
rightNodes: 3,
224+
clusterOpts: []spec.Option{
225+
spec.CPU(4),
226+
spec.WorkloadNode(),
227+
spec.WorkloadNodeCPU(4),
228+
spec.VolumeSize(100),
229+
},
230+
},
231+
ldrConfig: ldrConfig{
232+
createTables: true,
233+
},
234+
run: TestLDRConflict,
235+
},
219236
}
220237

221238
for _, sp := range specs {
222-
223239
r.Add(registry.TestSpec{
224-
Name: sp.name,
225-
Owner: registry.OwnerDisasterRecovery,
226-
Timeout: 60 * time.Minute,
227-
CompatibleClouds: registry.OnlyGCE,
228-
Suites: registry.Suites(registry.Nightly),
229-
Cluster: sp.clusterSpec.ToSpec(r),
230-
Leases: registry.MetamorphicLeases,
240+
Name: sp.name,
241+
Owner: registry.OwnerDisasterRecovery,
242+
Timeout: 60 * time.Minute,
243+
CompatibleClouds: registry.OnlyGCE,
244+
Suites: registry.Suites(registry.Nightly),
245+
Cluster: sp.clusterSpec.ToSpec(r),
246+
Leases: registry.MetamorphicLeases,
247+
RequiresDeprecatedWorkload: true, // TODO(jeffswenson): require this only for conflict test.
231248
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
232249
rng, seed := randutil.NewPseudoRand()
233250
t.L().Printf("random seed is %d", seed)
@@ -388,6 +405,73 @@ func TestLDRTPCC(
388405
VerifyCorrectness(ctx, c, t, setup, leftJobID, rightJobID, 2*time.Minute, workload)
389406
}
390407

408+
func TestLDRConflict(
409+
ctx context.Context, t test.Test, c cluster.Cluster, setup multiClusterSetup, ldrConfig ldrConfig,
410+
) {
411+
setup.left.sysSQL.Exec(t, "CREATE DATABASE conflict")
412+
setup.right.sysSQL.Exec(t, "CREATE DATABASE conflict")
413+
414+
var leftJobID, rightJobID int
415+
setup.right.sysSQL.Exec(t, fmt.Sprintf("CREATE EXTERNAL CONNECTION IF NOT EXISTS left AS '%s'", setup.left.PgURLForDatabase("conflict")))
416+
setup.left.sysSQL.Exec(t, fmt.Sprintf("CREATE EXTERNAL CONNECTION IF NOT EXISTS right AS '%s'", setup.right.PgURLForDatabase("conflict")))
417+
418+
leftURLs, err := c.InternalPGUrl(ctx, t.L(), setup.left.gatewayNodes, roachprod.PGURLOptions{
419+
Database: "conflict",
420+
})
421+
require.NoError(t, err)
422+
rightURLs, err := c.InternalPGUrl(ctx, t.L(), setup.right.gatewayNodes, roachprod.PGURLOptions{
423+
Database: "conflict",
424+
})
425+
require.NoError(t, err)
426+
427+
leftURL := fmt.Sprintf("\"%s\"", leftURLs[0])
428+
rightURL := fmt.Sprintf("\"%s\"", rightURLs[0])
429+
430+
c.Run(ctx, option.WithNodes(setup.workloadNode), "./workload", "init", "conflict", leftURL)
431+
432+
t.Status("creating bidirectional replication job")
433+
setup.right.sysSQL.QueryRow(t, `
434+
CREATE LOGICALLY REPLICATED TABLE conflict.conflict FROM TABLE conflict.conflict
435+
ON 'external://left'
436+
WITH BIDIRECTIONAL ON 'external://right'
437+
`).Scan(&rightJobID)
438+
439+
t.Status("waiting for right job to start up")
440+
waitForReplicatedTime(t, rightJobID, setup.right.db, getLogicalDataReplicationJobInfo, 2*time.Minute)
441+
442+
t.Status("waiting for left job to be created")
443+
testutils.SucceedsWithin(t, func() error {
444+
return setup.left.db.QueryRow("SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'LOGICAL REPLICATION'").Scan(&leftJobID)
445+
}, 2*time.Minute)
446+
447+
t.Status("waiting for left job to start up")
448+
waitForReplicatedTime(t, leftJobID, setup.left.db, getLogicalDataReplicationJobInfo, 2*time.Minute)
449+
450+
// TODO(jeffswenson): mix in random schema changes. The high level plan is:
451+
// 1. Pause the workload.
452+
// 2. Wait for LDR replication to catch up.
453+
// 3. Stop the LDR jobs.
454+
// 4. Make random schema changes.
455+
// 5. Start the LDR jobs again using now() as the cursor.
456+
// 6. Resume the workload.
457+
t.Status("running workload")
458+
c.Run(ctx, option.WithNodes(setup.workloadNode),
459+
"./workload", "run", "conflict", "--duration=15m",
460+
// Tolerate errors because there are some we can't easily avoid in random schemas that
461+
// contain computed columns. For example, the computed column a+b may cause an insert error
462+
// if a+b overflows the type.
463+
"--tolerate-errors",
464+
"--peer_url", rightURL,
465+
leftURL)
466+
467+
t.Status("verifying results")
468+
VerifyCorrectness(ctx, c, t, setup, leftJobID, rightJobID, 2*time.Minute, LDRWorkload{
469+
dbName: "conflict",
470+
tableNames: []string{"conflict"},
471+
manualSchemaSetup: true,
472+
})
473+
}
474+
391475
// TestLDRCreateTablesTPCC inits the left cluster with 1000 warehouse tpcc,
392476
// begins unidirectional fast initial scan LDR, starts a tpcc 1000 wh workload
393477
// on the left, and observes initial scan, catchup scan, and steady state
@@ -990,15 +1074,15 @@ func VerifyCorrectness(
9901074
ldrWorkload LDRWorkload,
9911075
) {
9921076
now := timeutil.Now()
993-
t.L().Printf("Waiting for replicated times to catchup before verifying left and right clusters")
1077+
t.Status("waiting for replicated times to catchup before verifying left and right clusters")
9941078
if leftJobID != 0 {
9951079
waitForReplicatedTimeToReachTimestamp(t, leftJobID, setup.left.db, getLogicalDataReplicationJobInfo, waitTime, now)
9961080
require.NoError(t, replicationtestutils.CheckEmptyDLQs(ctx, setup.left.db, ldrWorkload.dbName))
9971081
}
9981082
waitForReplicatedTimeToReachTimestamp(t, rightJobID, setup.right.db, getLogicalDataReplicationJobInfo, waitTime, now)
9991083
require.NoError(t, replicationtestutils.CheckEmptyDLQs(ctx, setup.right.db, ldrWorkload.dbName))
10001084

1001-
t.L().Printf("Verifying equality of left and right clusters")
1085+
t.Status("verifying equality of left and right clusters")
10021086

10031087
type fingerprint struct {
10041088
table string

0 commit comments

Comments
 (0)