Skip to content

Commit 75ff3ab

Browse files
committed
cmd/roachtest: fix cdc/multi-region-execution-locality-tpcc
This roachtest hangs waiting for changefeed to complete. Changing changefeed to an initial scan. Additionally, this test was flaky. This is fixed by removing the check of the exact span distribution and checking only that more than one aggregator was planned. Additionally, before not all lease-holders would always be set to the same region. Now, all leaseholders will be in the specified region. Additionally, adding to nightly test suite. Epic: CRDB-38755 Fixes: #153825 Release note: None
1 parent df939a7 commit 75ff3ab

File tree

1 file changed

+39
-52
lines changed

1 file changed

+39
-52
lines changed

pkg/cmd/roachtest/tests/cdc.go

Lines changed: 39 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"encoding/pem"
2121
"fmt"
2222
"io"
23+
"maps"
2324
"math/big"
2425
"math/rand"
2526
"net"
@@ -31,6 +32,7 @@ import (
3132
"path/filepath"
3233
"regexp"
3334
"runtime"
35+
"slices"
3436
"sort"
3537
"strconv"
3638
"strings"
@@ -1951,26 +1953,18 @@ func getDiagramProcessors(ctx context.Context, db *gosql.DB) ([]any, error) {
19511953
}
19521954

19531955
type ChangefeedDistribution struct {
1954-
NodeToSpansWatched map[int]int
19551956
ZoneToSpansWatched map[string]int
19561957
TotalSpansWatched int
19571958
TotalAggregators int
1958-
TotalLeaseHolders int
1959-
TotalRanges int
1960-
NodeToZone map[int]string
19611959
}
19621960

19631961
func getChangefeedDistribution(
19641962
processors []any, nodeToZone map[int]string, t test.Test,
19651963
) ChangefeedDistribution {
19661964
changefeedDistribution := ChangefeedDistribution{
1967-
NodeToSpansWatched: make(map[int]int),
19681965
ZoneToSpansWatched: make(map[string]int),
19691966
TotalSpansWatched: 0,
19701967
TotalAggregators: 0,
1971-
TotalLeaseHolders: 0,
1972-
TotalRanges: 0,
1973-
NodeToZone: nodeToZone,
19741968
}
19751969
for _, p := range processors {
19761970
procMap, ok := p.(map[string]any)
@@ -1993,10 +1987,8 @@ func getChangefeedDistribution(
19931987
if len(matches) > 1 {
19941988
numWatches, err := strconv.Atoi(matches[1])
19951989
require.NoError(t, err)
1996-
changefeedDistribution.NodeToSpansWatched[int(nodeIdx)] += numWatches
19971990
changefeedDistribution.TotalSpansWatched += numWatches
1998-
changefeedDistribution.ZoneToSpansWatched[changefeedDistribution.NodeToZone[int(nodeIdx)]] += numWatches
1999-
1991+
changefeedDistribution.ZoneToSpansWatched[nodeToZone[int(nodeIdx)]] += numWatches
20001992
}
20011993
}
20021994
}
@@ -2005,42 +1997,36 @@ func getChangefeedDistribution(
20051997
return changefeedDistribution
20061998
}
20071999

2008-
func veryifyLeaseHolderDistribution(
2009-
db *gosql.DB, t test.Test, nodeToZone map[int]string,
2010-
) map[string]int {
2011-
var rows *gosql.Rows
2012-
// Get lease holders for all ranges in tpcc database.
2013-
leaseHolderQuery := `SELECT r.start_pretty, r.replicas, r.replica_localities, r.lease_holder
2014-
FROM crdb_internal.ranges r
2015-
JOIN crdb_internal.tables t ON r.start_pretty like concat('/Table/', t.table_id::STRING,'%')
2016-
WHERE t.database_name = 'tpcc'`
2017-
rows, err := db.Query(leaseHolderQuery)
2018-
zoneToLeaseHolderCount := make(map[string]int)
2019-
require.NoError(t, err)
2020-
defer rows.Close()
2021-
for rows.Next() {
2022-
var startKeyPretty string
2023-
var replicas []uint8
2024-
var replicaLocalities []uint8
2025-
var leaseHolder int
2026-
require.NoError(t, rows.Scan(&startKeyPretty, &replicas, &replicaLocalities, &leaseHolder))
2027-
for indx := range replicas {
2028-
require.NotEqual(t, replicas[indx], 0)
2029-
replicas[indx]--
2000+
func verifyLeaseHolderLocality(db *gosql.DB, t test.Test, primaryRegion string) {
2001+
leaseHolderQuery := `SELECT NOT EXISTS (
2002+
SELECT 1
2003+
FROM [SHOW CLUSTER RANGES WITH TABLES, DETAILS]
2004+
WHERE database_name = 'tpcc'
2005+
AND (lease_holder_locality IS DISTINCT FROM $1::STRING OR lease_holder_locality IS NULL)
2006+
)`
2007+
t.L().Printf("Waiting for all lease holders to be in region %s", primaryRegion)
2008+
start := timeutil.Now()
2009+
ok := false
2010+
for {
2011+
if timeutil.Since(start) > 5*time.Minute {
2012+
t.Fatalf("Timeout waiting for lease holders to be in region %s; waited for %s", primaryRegion, timeutil.Since(start))
20302013
}
2031-
leaseHolder--
2032-
zoneToLeaseHolderCount[nodeToZone[leaseHolder]]++
2014+
require.NoError(t, db.QueryRow(leaseHolderQuery, primaryRegion).Scan(&ok))
2015+
if ok {
2016+
break
2017+
}
2018+
time.Sleep(time.Second)
20332019
}
2034-
return zoneToLeaseHolderCount
20352020
}
20362021

20372022
func registerCDC(r registry.Registry) {
20382023
r.Add(registry.TestSpec{
20392024
// This test
2040-
// 1. Creates a cluster with 3 nodes each in us-east and us-west
2041-
// 2. Runs a tpcc workload, then sets tpcc database to primary region us-west
2042-
// 3. Creates a changefeed with execution locality set to us-east
2043-
// 4. Gets the changefeed diagram and creates mappings
2025+
// 1. Creates a cluster with 3 nodes each in us-east and us-west;
2026+
// 2. Runs a tpcc workload, then congigures tpcc database to have lease holders in region us-west;
2027+
// 3. Creates a changefeed with execution locality set to us-east;
2028+
// 4. Gets the changefeed diagram and creates mappings;
2029+
// 5. Verifies that spans are assigned to multiple change aggregators in region us-east.
20442030

20452031
// This test is used to verify that ranges are evenly distributed across
20462032
// change aggregators in the execution_locality region while targeting tables
@@ -2052,7 +2038,7 @@ func registerCDC(r registry.Registry) {
20522038
Owner: registry.OwnerCDC,
20532039
Cluster: r.MakeClusterSpec(7, spec.Geo(), spec.GatherCores(), spec.GCEZones("us-east1-b,us-west1-b")),
20542040
CompatibleClouds: registry.OnlyGCE,
2055-
Suites: registry.Suites(),
2041+
Suites: registry.Suites(registry.Nightly),
20562042
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
20572043
nodeToZone := map[int]string{
20582044
0: "us-east1-b",
@@ -2065,17 +2051,24 @@ func registerCDC(r registry.Registry) {
20652051
ct := newCDCTester(ctx, t, c)
20662052
defer ct.Close()
20672053

2068-
ct.runTPCCWorkload(tpccArgs{warehouses: 100})
2054+
ct.runTPCCWorkload(tpccArgs{warehouses: 20})
20692055

20702056
var err error
2071-
_, err = ct.DB().Exec("ALTER DATABASE tpcc SET PRIMARY REGION 'us-west1'")
2057+
_, err = ct.DB().Exec(`ALTER DATABASE tpcc
2058+
CONFIGURE ZONE USING
2059+
constraints = '{+region=us-west1: 1, +region=us-east1: 1}',
2060+
lease_preferences = '[[+region=us-west1]]', num_replicas = 3`)
20722061
require.NoError(t, err)
20732062

2063+
// Verify lease holders are in us-west1-b.
2064+
verifyLeaseHolderLocality(ct.DB(), t, "cloud=gce,region=us-west1,zone=us-west1-b")
2065+
20742066
feed := ct.newChangefeed(feedArgs{
20752067
sinkType: cloudStorageSink,
20762068
targets: allTpccTargets,
20772069
opts: map[string]string{
20782070
"execution_locality": "'region=us-east1'",
2071+
"initial_scan": "'only'",
20792072
},
20802073
})
20812074
ct.waitForWorkload()
@@ -2084,18 +2077,12 @@ func registerCDC(r registry.Registry) {
20842077
processors, err := getDiagramProcessors(ctx, ct.DB())
20852078
require.NoError(t, err)
20862079

2080+
// Verify changefeed aggregators are distributed across nodes in region us-east.
20872081
changefeedDistribution := getChangefeedDistribution(processors, nodeToZone, t)
20882082
require.Greater(t, changefeedDistribution.TotalAggregators, 1)
2089-
for nodeIdx, spansWatched := range changefeedDistribution.NodeToSpansWatched {
2090-
require.LessOrEqual(t, spansWatched, changefeedDistribution.TotalSpansWatched/2, "nodeIdx %d watched %d spans, total spans watched %d", nodeIdx, spansWatched, changefeedDistribution.TotalSpansWatched)
2091-
}
2092-
require.Equal(t, 1, len(changefeedDistribution.ZoneToSpansWatched))
2083+
require.ElementsMatch(t, []string{"us-east1-b"}, slices.Collect(maps.Keys(changefeedDistribution.ZoneToSpansWatched)))
20932084
require.Equal(t, changefeedDistribution.ZoneToSpansWatched["us-east1-b"], changefeedDistribution.TotalSpansWatched)
2094-
zoneToLeaseHolderCount := veryifyLeaseHolderDistribution(ct.DB(), t, nodeToZone)
2095-
// Majority of lease holders should be in us-west1-b. Some may not, but most should.
2096-
if zoneToLeaseHolderCount["us-east1-b"] != 0 {
2097-
require.Greater(t, zoneToLeaseHolderCount["us-west1-b"]/zoneToLeaseHolderCount["us-east1-b"], 10)
2098-
}
2085+
require.Greater(t, changefeedDistribution.TotalSpansWatched, 0)
20992086
},
21002087
})
21012088
r.Add(registry.TestSpec{

0 commit comments

Comments
 (0)