@@ -41,7 +41,6 @@ import (
4141 "github.com/cockroachdb/cockroach/pkg/server/serverpb"
4242 "github.com/cockroachdb/cockroach/pkg/settings/cluster"
4343 "github.com/cockroachdb/cockroach/pkg/spanconfig"
44- "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigstore"
4544 "github.com/cockroachdb/cockroach/pkg/sql/catalog/bootstrap"
4645 "github.com/cockroachdb/cockroach/pkg/testutils"
4746 "github.com/cockroachdb/cockroach/pkg/testutils/skip"
@@ -618,7 +617,7 @@ func checkReplicaCount(
618617 rangeDesc * roachpb.RangeDescriptor ,
619618 voterCount , nonVoterCount int ,
620619) (bool , error ) {
621- err := forceScanOnAllReplicationQueues (tc )
620+ err := forceScanOnAllReplicationAndSplitQueues (tc )
622621 if err != nil {
623622 log .KvDistribution .Infof (ctx , "store.ForceReplicationScanAndProcess() failed with: %s" , err )
624623 return false , err
@@ -1544,7 +1543,7 @@ func TestReplicateQueueSwapVotersWithNonVoters(t *testing.T) {
15441543 " num_replicas=5, num_voters=1" )
15451544 require .NoError (t , err )
15461545 testutils .SucceedsSoon (t , func () error {
1547- if err := forceScanOnAllReplicationQueues (tc ); err != nil {
1546+ if err := forceScanOnAllReplicationAndSplitQueues (tc ); err != nil {
15481547 return err
15491548 }
15501549 scratchRange := tc .LookupRangeOrFatal (t , scratchKey )
@@ -1559,7 +1558,7 @@ func TestReplicateQueueSwapVotersWithNonVoters(t *testing.T) {
15591558
15601559 checkRelocated := func (t * testing.T , voterStores , nonVoterStores []roachpb.StoreID ) {
15611560 testutils .SucceedsSoon (t , func () error {
1562- if err := forceScanOnAllReplicationQueues (tc ); err != nil {
1561+ if err := forceScanOnAllReplicationAndSplitQueues (tc ); err != nil {
15631562 return err
15641563 }
15651564 scratchRange := tc .LookupRangeOrFatal (t , scratchKey )
@@ -1665,7 +1664,7 @@ func TestReplicateQueueShouldQueueNonVoter(t *testing.T) {
16651664 // Make sure that the range has conformed to the constraints we just set
16661665 // above.
16671666 require .Eventually (t , func () bool {
1668- if err := forceScanOnAllReplicationQueues (tc ); err != nil {
1667+ if err := forceScanOnAllReplicationAndSplitQueues (tc ); err != nil {
16691668 log .KvDistribution .Warningf (ctx , "received error while forcing a replicateQueue scan: %s" , err )
16701669 return false
16711670 }
@@ -1785,13 +1784,24 @@ func toggleReplicationQueues(tc *testcluster.TestCluster, active bool) {
17851784 }
17861785}
17871786
1788- func forceScanOnAllReplicationQueues (tc * testcluster.TestCluster ) (err error ) {
1787+ func forceScanOnAllReplicationAndSplitQueues (tc * testcluster.TestCluster ) (err error ) {
1788+ // We force scans on the replication and split queues because sometimes splits
1789+ // are necessary to apply zone config changes, which then lead to further
1790+ // replication decisions. See #156530 for an example of this.
17891791 for _ , s := range tc .Servers {
1790- err = s .GetStores ().(* kvserver.Stores ).VisitStores (func (store * kvserver.Store ) error {
1791- return store .ForceReplicationScanAndProcess ()
1792- })
1792+ if err := s .GetStores ().(* kvserver.Stores ).VisitStores (func (store * kvserver.Store ) error {
1793+ if err := store .ForceReplicationScanAndProcess (); err != nil {
1794+ return err
1795+ }
1796+ if err := store .ForceSplitScanAndProcess (); err != nil {
1797+ return err
1798+ }
1799+ return nil
1800+ }); err != nil {
1801+ return err
1802+ }
17931803 }
1794- return err
1804+ return nil
17951805}
17961806
17971807func toggleSplitQueues (tc * testcluster.TestCluster , active bool ) {
@@ -2189,20 +2199,13 @@ func TestPromoteNonVoterInAddVoter(t *testing.T) {
21892199 defer testutils .StartExecTrace (t , scope .GetDirectory ()).Finish (t )
21902200
21912201 ctx := context .Background ()
2192- st := cluster .MakeTestingClusterSettings ()
2193- // NB: Ensure that tables created by the test start off on their own range.
2194- // This ensures that any span config changes made by the test don't have to
2195- // first induce a split, which is a known source of flakiness.
2196- spanconfigstore .StorageCoalesceAdjacentSetting .Override (ctx , & st .SV , false )
2197- spanconfigstore .TenantCoalesceAdjacentSetting .Override (ctx , & st .SV , false )
21982202
21992203 // Create 7 stores: 3 in Region 1, 2 in Region 2, and 2 in Region 3.
22002204 const numNodes = 7
22012205 serverArgs := make (map [int ]base.TestServerArgs )
22022206 regions := [numNodes ]int {1 , 1 , 1 , 2 , 2 , 3 , 3 }
22032207 for i := 0 ; i < numNodes ; i ++ {
22042208 serverArgs [i ] = base.TestServerArgs {
2205- Settings : st ,
22062209 Locality : roachpb.Locality {
22072210 Tiers : []roachpb.Tier {
22082211 {
@@ -2237,7 +2240,7 @@ func TestPromoteNonVoterInAddVoter(t *testing.T) {
22372240 setConstraintFn ("RANGE meta" , 7 , 7 , "" )
22382241 setConstraintFn ("RANGE default" , 7 , 7 , "" )
22392242 testutils .SucceedsSoon (t , func () error {
2240- if err := forceScanOnAllReplicationQueues (tc ); err != nil {
2243+ if err := forceScanOnAllReplicationAndSplitQueues (tc ); err != nil {
22412244 return err
22422245 }
22432246 s , err := sqlutils .RowsToDataDrivenOutput (sqlutils .MakeSQLRunner (tc .Conns [0 ]).Query (t , `
@@ -2271,36 +2274,64 @@ SELECT * FROM (
22712274 t * testing.T ,
22722275 tc * testcluster.TestCluster ,
22732276 db * gosql.DB ,
2274- ) (numVoters , numNonVoters int , err error ) {
2275- if err := forceScanOnAllReplicationQueues (tc ); err != nil {
2276- return 0 , 0 , err
2277+ ) (numVoters , numNonVoters int , _ roachpb. RangeDescriptor , err error ) {
2278+ if err := forceScanOnAllReplicationAndSplitQueues (tc ); err != nil {
2279+ return 0 , 0 , roachpb. RangeDescriptor {}, err
22772280 }
22782281
2279- var rangeID roachpb.RangeID
2280- if err := db .QueryRow ("SELECT range_id FROM [SHOW RANGES FROM TABLE t] LIMIT 1" ).Scan (& rangeID ); err != nil {
2281- return 0 , 0 , err
2282+ var key roachpb.Key
2283+ require .NoError (t ,
2284+ db .QueryRow (`
2285+ SELECT start_key from crdb_internal.ranges_no_leases WHERE range_id IN
2286+ (SELECT range_id FROM [SHOW RANGES FROM TABLE t] LIMIT 1);
2287+ ` ).Scan (& key ))
2288+ desc , err := tc .LookupRange (key )
2289+ if err != nil {
2290+ return 0 , 0 , roachpb.RangeDescriptor {}, err
22822291 }
2292+
22832293 iterateOverAllStores (t , tc , func (s * kvserver.Store ) error {
2284- if replica , err := s .GetReplica (rangeID ); err == nil && replica .OwnsValidLease (ctx , replica .Clock ().NowAsClockTimestamp ()) {
2294+ if replica , err := s .GetReplica (desc .RangeID ); err == nil && replica .OwnsValidLease (ctx ,
2295+ replica .Clock ().NowAsClockTimestamp ()) {
22852296 desc := replica .Desc ()
22862297 numVoters = len (desc .Replicas ().VoterDescriptors ())
22872298 numNonVoters = len (desc .Replicas ().NonVoterDescriptors ())
22882299 }
22892300 return nil
22902301 })
2291- return numVoters , numNonVoters , nil
2302+ return numVoters , numNonVoters , desc , nil
22922303 }
22932304
22942305 // Ensure we are meeting our ZONE survival configuration.
2306+ logMore := time .After (15 * time .Second )
22952307 testutils .SucceedsSoon (t , func () error {
2296- numVoters , numNonVoters , err := computeNumberOfReplicas (t , tc , db )
2308+ numVoters , numNonVoters , desc , err := computeNumberOfReplicas (t , tc , db )
2309+
22972310 require .NoError (t , err )
2311+ select {
2312+ default :
2313+ case <- logMore :
2314+ // If the retry loop has been stuck for a while, log the span config
2315+ // as seen by each store. For it to apply, the range's span needs to
2316+ // be contained in the start key's span config. If this is not the
2317+ // case, it can explain why the replication changes are not being made.
2318+ iterateOverAllStores (t , tc , func (s * kvserver.Store ) error {
2319+ cfg , sp , err := s .GetStoreConfig ().SpanConfigSubscriber .GetSpanConfigForKey (ctx , desc .StartKey )
2320+ if err != nil {
2321+ return err
2322+ }
2323+ t .Logf ("s%d: r%d %s -> span config %s %s" , s .StoreID (), desc .RangeID , desc .RSpan (), sp , & cfg )
2324+ return nil
2325+ })
2326+ }
2327+
22982328 if numVoters != 3 {
2299- return errors .Newf ("expected 3 voters; got %d" , numVoters )
2329+ return errors .Newf ("expected 3 voters for r%d ; got %d" , desc . RangeID , numVoters )
23002330 }
23012331 if numNonVoters != 2 {
2302- return errors .Newf ("expected 2 non-voters; got %v" , numNonVoters )
2332+ return errors .Newf ("expected 2 non-voters for r%d ; got %v" , desc . RangeID , numNonVoters )
23032333 }
2334+ t .Logf ("success: %d has %d voters and %d non-voters" , desc .RangeID , numVoters , numNonVoters )
23042335 return nil
23052336 })
23062337
@@ -2316,13 +2347,13 @@ SELECT * FROM (
23162347
23172348 // Ensure we are meeting our REGION survival configuration.
23182349 testutils .SucceedsSoon (t , func () error {
2319- numVoters , numNonVoters , err := computeNumberOfReplicas (t , tc , db )
2350+ numVoters , numNonVoters , desc , err := computeNumberOfReplicas (t , tc , db )
23202351 require .NoError (t , err )
23212352 if numVoters != 5 {
2322- return errors .Newf ("expected 5 voters; got %d" , numVoters )
2353+ return errors .Newf ("expected 5 voters for r%d ; got %d" , desc . RangeID , numVoters )
23232354 }
23242355 if numNonVoters != 0 {
2325- return errors .Newf ("expected 0 non-voters; got %v" , numNonVoters )
2356+ return errors .Newf ("expected 0 non-voters for r%d ; got %v" , desc . RangeID , numNonVoters )
23262357 }
23272358 return nil
23282359 })
0 commit comments