Skip to content

Commit 61e9ff7

Browse files
committed
kvserver: add more logging to TestProcessSplitAfterRightHandSideHasBeenRemoved
This is a complex test that modifies the state of the test server multiple times. As a result, the server logs can be hard to interpret because many errors are expected. Here we add some logging via our own `log` package so that the test actions can be correlated with errors other server log entries. Informs #149669 Release note: None
1 parent 9ddf582 commit 61e9ff7

File tree

3 files changed

+52
-10
lines changed

3 files changed

+52
-10
lines changed

pkg/kv/kvserver/client_raft_helpers_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ package kvserver_test
88
import (
99
"context"
1010
"fmt"
11+
"maps"
12+
"slices"
1113
"sync/atomic"
1214
"testing"
1315

@@ -244,6 +246,15 @@ type testClusterPartitionedRange struct {
244246
handlers []kvserver.IncomingRaftMessageHandler
245247
}
246248

249+
func (p *testClusterPartitionedRange) String() string {
250+
p.mu.RLock()
251+
defer p.mu.RUnlock()
252+
return fmt.Sprintf("partition[rangeID: %s, replicas: %v; stores: %v]",
253+
p.rangeID,
254+
slices.Collect(maps.Keys(p.mu.partitionedReplicas)),
255+
slices.Collect(maps.Keys(p.mu.partitionedStores)))
256+
}
257+
247258
// setupPartitionedRange sets up an testClusterPartitionedRange for the provided
248259
// TestCluster, rangeID, and node index in the TestCluster. The range is
249260
// initially not partitioned.

pkg/kv/kvserver/client_raft_test.go

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ import (
6969
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
7070
"github.com/cockroachdb/cockroach/pkg/util/uuid"
7171
"github.com/cockroachdb/errors"
72+
"github.com/cockroachdb/logtags"
7273
"github.com/cockroachdb/redact"
7374
"github.com/stretchr/testify/require"
7475
"google.golang.org/grpc"
@@ -5289,6 +5290,7 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
52895290
ctx := context.Background()
52905291

52915292
testutils.RunValues(t, "lease-type", roachpb.TestingAllLeaseTypes(), func(t *testing.T, leaseType roachpb.LeaseType) {
5293+
ctx = logtags.AddTag(ctx, "gotest", t.Name())
52925294
noopProposalFilter := kvserverbase.ReplicaProposalFilter(func(args kvserverbase.ProposalFilterArgs) *kvpb.Error {
52935295
return nil
52945296
})
@@ -5299,6 +5301,7 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
52995301
}
53005302

53015303
increment := func(t *testing.T, db *kv.DB, key roachpb.Key, by int64) {
5304+
t.Helper()
53025305
b := &kv.Batch{}
53035306
b.AddRawRequest(incrementArgs(key, by))
53045307
require.NoError(t, db.Run(ctx, b))
@@ -5312,6 +5315,7 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
53125315
getHardState := func(
53135316
t *testing.T, store *kvserver.Store, rangeID roachpb.RangeID,
53145317
) raftpb.HardState {
5318+
t.Helper()
53155319
hs, err := stateloader.Make(rangeID).LoadHardState(ctx, store.TODOEngine())
53165320
require.NoError(t, err)
53175321
return hs
@@ -5340,6 +5344,7 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
53405344
var err error
53415345
*partRange, err = basePartition.extend(tc, split.RightDesc.RangeID, replDesc.ReplicaID,
53425346
0 /* partitionedNode */, true /* activated */, unreliableRaftHandlerFuncs{})
5347+
log.Infof(ctx, "partition installed before proposing split: %s", *partRange)
53435348
require.NoError(t, err)
53445349
proposalFilter.Store(noopProposalFilter)
53455350
})
@@ -5432,6 +5437,7 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
54325437
// Make sure everybody knows about that transfer.
54335438
increment(t, db, keyA, 1)
54345439
tc.WaitForValues(t, keyA, []int64{6, 6, 6})
5440+
log.Infof(ctx, "activating LHS partition: %s", lhsPartition)
54355441
lhsPartition.activate()
54365442

54375443
increment(t, db, keyA, 1)
@@ -5467,13 +5473,15 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
54675473

54685474
// Remove and re-add the RHS to create a new uninitialized replica at
54695475
// a higher replica ID. This will lead to a tombstone being written.
5470-
tc.RemoveVotersOrFatal(t, keyB, tc.Target(0))
5476+
target := tc.Target(0)
5477+
log.Infof(ctx, "removing voter: %v", target)
5478+
tc.RemoveVotersOrFatal(t, keyB, target)
54715479
// Unsuccessful because the RHS will not accept the learner snapshot
54725480
// and will be rolled back. Nevertheless it will have learned that it
54735481
// has been removed at the old replica ID.
54745482
_, err = tc.Servers[0].DB().AdminChangeReplicas(
54755483
ctx, keyB, tc.LookupRangeOrFatal(t, keyB),
5476-
kvpb.MakeReplicationChanges(roachpb.ADD_VOTER, tc.Target(0)),
5484+
kvpb.MakeReplicationChanges(roachpb.ADD_VOTER, target),
54775485
)
54785486
require.True(t, kvserver.IsRetriableReplicationChangeError(err), err)
54795487

@@ -5483,12 +5491,14 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
54835491
// it will find out about its new replica ID and write a tombstone for the
54845492
// old one.
54855493
waitForTombstone(t, tc.GetFirstStoreFromServer(t, 0).StateEngine(), rhsID)
5494+
log.Infof(ctx, "deactivating LHS partition: %s", lhsPartition)
54865495
lhsPartition.deactivate()
54875496
tc.WaitForValues(t, keyA, []int64{8, 8, 8})
54885497
hs := getHardState(t, tc.GetFirstStoreFromServer(t, 0), rhsID)
54895498
require.Equal(t, uint64(0), hs.Commit)
5499+
log.Infof(ctx, "adding voter: %v", target)
54905500
testutils.SucceedsSoon(t, func() error {
5491-
_, err := tc.AddVoters(keyB, tc.Target(0))
5501+
_, err := tc.AddVoters(keyB, target)
54925502
return err
54935503
})
54945504
tc.WaitForValues(t, keyB, []int64{6, 6, 6})
@@ -5519,13 +5529,15 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
55195529

55205530
// Remove and re-add the RHS to create a new uninitialized replica at
55215531
// a higher replica ID. This will lead to a tombstone being written.
5522-
tc.RemoveVotersOrFatal(t, keyB, tc.Target(0))
5532+
target := tc.Target(0)
5533+
log.Infof(ctx, "removing voter: %v", target)
5534+
tc.RemoveVotersOrFatal(t, keyB, target)
55235535
// Unsuccessfuly because the RHS will not accept the learner snapshot
55245536
// and will be rolled back. Nevertheless it will have learned that it
55255537
// has been removed at the old replica ID.
55265538
_, err = tc.Servers[0].DB().AdminChangeReplicas(
55275539
ctx, keyB, tc.LookupRangeOrFatal(t, keyB),
5528-
kvpb.MakeReplicationChanges(roachpb.ADD_VOTER, tc.Target(0)),
5540+
kvpb.MakeReplicationChanges(roachpb.ADD_VOTER, target),
55295541
)
55305542
require.True(t, kvserver.IsRetriableReplicationChangeError(err), err)
55315543

@@ -5549,15 +5561,19 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
55495561
}
55505562

55515563
// Restart store 0 so that it forgets about the newer replicaID.
5564+
log.Infof(ctx, "stopping server 0")
55525565
tc.StopServer(0)
5566+
log.Infof(ctx, "deactivating LHS partition: %s", lhsPartition)
55535567
lhsPartition.deactivate()
5568+
log.Infof(ctx, "starting server 0")
55545569
require.NoError(t, tc.RestartServer(0))
55555570

55565571
tc.WaitForValues(t, keyA, []int64{8, 8, 8})
55575572
hs := getHardState(t, tc.GetFirstStoreFromServer(t, 0), rhsID)
55585573
require.Equal(t, uint64(0), hs.Commit)
5574+
log.Infof(ctx, "adding voter: %v", target)
55595575
testutils.SucceedsSoon(t, func() error {
5560-
_, err := tc.AddVoters(keyB, tc.Target(0))
5576+
_, err := tc.AddVoters(keyB, target)
55615577
return err
55625578
})
55635579
tc.WaitForValues(t, keyB, []int64{curB, curB, curB})
@@ -5592,35 +5608,41 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
55925608

55935609
// Remove and re-add the RHS to create a new uninitialized replica at
55945610
// a higher replica ID. This will lead to a tombstone being written.
5595-
tc.RemoveVotersOrFatal(t, keyB, tc.Target(0))
5611+
target := tc.Target(0)
5612+
log.Infof(ctx, "removing voter: %v", target)
5613+
tc.RemoveVotersOrFatal(t, keyB, target)
55965614
// Unsuccessful because the RHS will not accept the learner snapshot and
55975615
// will be rolled back. Nevertheless it will have learned that it has been
55985616
// removed at the old replica ID. We don't use tc.AddVoters because that
55995617
// will retry until it runs out of time, since we're creating a
56005618
// retriable-looking situation here that will persist.
56015619
_, err = tc.Servers[0].DB().AdminChangeReplicas(
56025620
ctx, keyB, tc.LookupRangeOrFatal(t, keyB),
5603-
kvpb.MakeReplicationChanges(roachpb.ADD_VOTER, tc.Target(0)),
5621+
kvpb.MakeReplicationChanges(roachpb.ADD_VOTER, target),
56045622
)
56055623
require.True(t, kvserver.IsRetriableReplicationChangeError(err), err)
56065624
// Ensure that the replica exists with the higher replica ID.
56075625
repl, err := tc.GetFirstStoreFromServer(t, 0).GetReplica(rhsInfo.Desc.RangeID)
56085626
require.NoError(t, err)
56095627
require.Equal(t, repl.ReplicaID(), rhsInfo.Desc.NextReplicaID)
56105628
rhsPartition.addReplica(rhsInfo.Desc.NextReplicaID)
5629+
log.Infof(ctx, "added %d to RHS partition %s", rhsInfo.Desc.NextReplicaID, rhsPartition)
56115630
// Ensure that there's no tombstone.
56125631
// The RHS on store 0 never should have heard about its original ID.
56135632
ensureNoTombstone(t, tc.GetFirstStoreFromServer(t, 0), rhsID)
5633+
log.Infof(ctx, "deactivating LHS partition: %s", lhsPartition)
56145634
lhsPartition.deactivate()
5635+
log.Infof(ctx, "deactivating RHS partition: %s", rhsPartition)
56155636
rhsPartition.deactivate()
56165637
tc.WaitForValues(t, keyA, []int64{8, 8, 8})
56175638
hs := getHardState(t, tc.GetFirstStoreFromServer(t, 0), rhsID)
56185639
require.Equal(t, uint64(0), hs.Commit)
56195640
// Now succeed in adding the RHS. Use SucceedsSoon because in rare cases
56205641
// the learner snapshot can fail due to a race with a raft snapshot from
56215642
// a raft leader on a different node.
5643+
log.Infof(ctx, "adding voter: %v", target)
56225644
testutils.SucceedsSoon(t, func() error {
5623-
_, err := tc.AddVoters(keyB, tc.Target(0))
5645+
_, err := tc.AddVoters(keyB, target)
56245646
return err
56255647
})
56265648
tc.WaitForValues(t, keyB, []int64{6, 6, 6})
@@ -5666,7 +5688,9 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
56665688

56675689
// Remove and re-add the RHS to create a new uninitialized replica at
56685690
// a higher replica ID. This will lead to a tombstone being written.
5669-
tc.RemoveVotersOrFatal(t, keyB, tc.Target(0))
5691+
target := tc.Target(0)
5692+
log.Infof(ctx, "removing voter: %v", target)
5693+
tc.RemoveVotersOrFatal(t, keyB, target)
56705694
// Unsuccessfuly because the RHS will not accept the learner snapshot
56715695
// and will be rolled back. Nevertheless it will have learned that it
56725696
// has been removed at the old replica ID.
@@ -5685,6 +5709,7 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
56855709
// Now, before we deactivate the LHS partition, partition the newer replica
56865710
// on the RHS too.
56875711
rhsPartition.addReplica(rhsInfo.Desc.NextReplicaID)
5712+
log.Infof(ctx, "added %d to RHS partition: %s", rhsInfo.Desc.NextReplicaID, rhsPartition)
56885713

56895714
// We do all of this incrementing to ensure that nobody will ever
56905715
// succeed in sending a message the new RHS replica after we restart
@@ -5698,8 +5723,11 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
56985723
tc.WaitForValues(t, keyB, []int64{0, curB, curB})
56995724
}
57005725

5726+
log.Infof(ctx, "stopping server 0")
57015727
tc.StopServer(0)
5728+
log.Infof(ctx, "deactivate LHS partition: %s", lhsPartition)
57025729
lhsPartition.deactivate()
5730+
log.Infof(ctx, "restarting server 0")
57035731
require.NoError(t, tc.RestartServer(0))
57045732

57055733
tc.WaitForValues(t, keyA, []int64{8, 8, 8})
@@ -5714,7 +5742,9 @@ func TestProcessSplitAfterRightHandSideHasBeenRemoved(t *testing.T) {
57145742
}
57155743
return nil
57165744
})
5745+
log.Infof(ctx, "deactivate RHS partition: %s", rhsPartition)
57175746
rhsPartition.deactivate()
5747+
log.Infof(ctx, "adding voter: %v", target)
57185748
testutils.SucceedsSoon(t, func() error {
57195749
_, err := tc.AddVoters(keyB, tc.Target(0))
57205750
return err

pkg/kv/kvserver/client_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ func assertRecomputedStats(
207207
func waitForTombstone(
208208
t *testing.T, reader storage.Reader, rangeID roachpb.RangeID,
209209
) (tombstone kvserverpb.RangeTombstone) {
210+
t.Helper()
210211
sl := stateloader.Make(rangeID)
211212
testutils.SucceedsSoon(t, func() error {
212213
ts, err := sl.LoadRangeTombstone(context.Background(), reader)

0 commit comments

Comments
 (0)