@@ -216,18 +216,35 @@ func registerLogicalDataReplicationTests(r registry.Registry) {
216
216
},
217
217
run : TestLDRCreateTablesTPCC ,
218
218
},
219
+ {
220
+ name : "ldr/conflict" ,
221
+ clusterSpec : multiClusterSpec {
222
+ leftNodes : 3 ,
223
+ rightNodes : 3 ,
224
+ clusterOpts : []spec.Option {
225
+ spec .CPU (4 ),
226
+ spec .WorkloadNode (),
227
+ spec .WorkloadNodeCPU (4 ),
228
+ spec .VolumeSize (100 ),
229
+ },
230
+ },
231
+ ldrConfig : ldrConfig {
232
+ createTables : true ,
233
+ },
234
+ run : TestLDRConflict ,
235
+ },
219
236
}
220
237
221
238
for _ , sp := range specs {
222
-
223
239
r .Add (registry.TestSpec {
224
- Name : sp .name ,
225
- Owner : registry .OwnerDisasterRecovery ,
226
- Timeout : 60 * time .Minute ,
227
- CompatibleClouds : registry .OnlyGCE ,
228
- Suites : registry .Suites (registry .Nightly ),
229
- Cluster : sp .clusterSpec .ToSpec (r ),
230
- Leases : registry .MetamorphicLeases ,
240
+ Name : sp .name ,
241
+ Owner : registry .OwnerDisasterRecovery ,
242
+ Timeout : 60 * time .Minute ,
243
+ CompatibleClouds : registry .OnlyGCE ,
244
+ Suites : registry .Suites (registry .Nightly ),
245
+ Cluster : sp .clusterSpec .ToSpec (r ),
246
+ Leases : registry .MetamorphicLeases ,
247
+ RequiresDeprecatedWorkload : true , // TODO(jeffswenson): require this only for conflict test.
231
248
Run : func (ctx context.Context , t test.Test , c cluster.Cluster ) {
232
249
rng , seed := randutil .NewPseudoRand ()
233
250
t .L ().Printf ("random seed is %d" , seed )
@@ -388,6 +405,73 @@ func TestLDRTPCC(
388
405
VerifyCorrectness (ctx , c , t , setup , leftJobID , rightJobID , 2 * time .Minute , workload )
389
406
}
390
407
408
+ func TestLDRConflict (
409
+ ctx context.Context , t test.Test , c cluster.Cluster , setup multiClusterSetup , ldrConfig ldrConfig ,
410
+ ) {
411
+ setup .left .sysSQL .Exec (t , "CREATE DATABASE conflict" )
412
+ setup .right .sysSQL .Exec (t , "CREATE DATABASE conflict" )
413
+
414
+ var leftJobID , rightJobID int
415
+ setup .right .sysSQL .Exec (t , fmt .Sprintf ("CREATE EXTERNAL CONNECTION IF NOT EXISTS left AS '%s'" , setup .left .PgURLForDatabase ("conflict" )))
416
+ setup .left .sysSQL .Exec (t , fmt .Sprintf ("CREATE EXTERNAL CONNECTION IF NOT EXISTS right AS '%s'" , setup .right .PgURLForDatabase ("conflict" )))
417
+
418
+ leftURLs , err := c .InternalPGUrl (ctx , t .L (), setup .left .gatewayNodes , roachprod.PGURLOptions {
419
+ Database : "conflict" ,
420
+ })
421
+ require .NoError (t , err )
422
+ rightURLs , err := c .InternalPGUrl (ctx , t .L (), setup .right .gatewayNodes , roachprod.PGURLOptions {
423
+ Database : "conflict" ,
424
+ })
425
+ require .NoError (t , err )
426
+
427
+ leftURL := fmt .Sprintf ("\" %s\" " , leftURLs [0 ])
428
+ rightURL := fmt .Sprintf ("\" %s\" " , rightURLs [0 ])
429
+
430
+ c .Run (ctx , option .WithNodes (setup .workloadNode ), "./workload" , "init" , "conflict" , leftURL )
431
+
432
+ t .Status ("creating bidirectional replication job" )
433
+ setup .right .sysSQL .QueryRow (t , `
434
+ CREATE LOGICALLY REPLICATED TABLE conflict.conflict FROM TABLE conflict.conflict
435
+ ON 'external://left'
436
+ WITH BIDIRECTIONAL ON 'external://right'
437
+ ` ).Scan (& rightJobID )
438
+
439
+ t .Status ("waiting for right job to start up" )
440
+ waitForReplicatedTime (t , rightJobID , setup .right .db , getLogicalDataReplicationJobInfo , 2 * time .Minute )
441
+
442
+ t .Status ("waiting for left job to be created" )
443
+ testutils .SucceedsWithin (t , func () error {
444
+ return setup .left .db .QueryRow ("SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'LOGICAL REPLICATION'" ).Scan (& leftJobID )
445
+ }, 2 * time .Minute )
446
+
447
+ t .Status ("waiting for left job to start up" )
448
+ waitForReplicatedTime (t , leftJobID , setup .left .db , getLogicalDataReplicationJobInfo , 2 * time .Minute )
449
+
450
+ // TODO(jeffswenson): mix in random schema changes. The high level plan is:
451
+ // 1. Pause the workload.
452
+ // 2. Wait for LDR replication to catch up.
453
+ // 3. Stop the LDR jobs.
454
+ // 4. Make random schema changes.
455
+ // 5. Start the LDR jobs again using now() as the cursor.
456
+ // 6. Resume the workload.
457
+ t .Status ("running workload" )
458
+ c .Run (ctx , option .WithNodes (setup .workloadNode ),
459
+ "./workload" , "run" , "conflict" , "--duration=15m" ,
460
+ // Tolerate errors because there are some we can't easily avoid in random schemas that
461
+ // contain computed columns. For example, the computed column a+b may cause an insert error
462
+ // if a+b overflows the type.
463
+ "--tolerate-errors" ,
464
+ "--peer_url" , rightURL ,
465
+ leftURL )
466
+
467
+ t .Status ("verifying results" )
468
+ VerifyCorrectness (ctx , c , t , setup , leftJobID , rightJobID , 2 * time .Minute , LDRWorkload {
469
+ dbName : "conflict" ,
470
+ tableNames : []string {"conflict" },
471
+ manualSchemaSetup : true ,
472
+ })
473
+ }
474
+
391
475
// TestLDRCreateTablesTPCC inits the left cluster with 1000 warehouse tpcc,
392
476
// begins unidirectional fast initial scan LDR, starts a tpcc 1000 wh workload
393
477
// on the left, and observes initial scan, catchup scan, and steady state
@@ -990,15 +1074,15 @@ func VerifyCorrectness(
990
1074
ldrWorkload LDRWorkload ,
991
1075
) {
992
1076
now := timeutil .Now ()
993
- t .L (). Printf ( "Waiting for replicated times to catchup before verifying left and right clusters" )
1077
+ t .Status ( "waiting for replicated times to catchup before verifying left and right clusters" )
994
1078
if leftJobID != 0 {
995
1079
waitForReplicatedTimeToReachTimestamp (t , leftJobID , setup .left .db , getLogicalDataReplicationJobInfo , waitTime , now )
996
1080
require .NoError (t , replicationtestutils .CheckEmptyDLQs (ctx , setup .left .db , ldrWorkload .dbName ))
997
1081
}
998
1082
waitForReplicatedTimeToReachTimestamp (t , rightJobID , setup .right .db , getLogicalDataReplicationJobInfo , waitTime , now )
999
1083
require .NoError (t , replicationtestutils .CheckEmptyDLQs (ctx , setup .right .db , ldrWorkload .dbName ))
1000
1084
1001
- t .L (). Printf ( "Verifying equality of left and right clusters" )
1085
+ t .Status ( "verifying equality of left and right clusters" )
1002
1086
1003
1087
type fingerprint struct {
1004
1088
table string
0 commit comments