Skip to content

Commit 1c3040a

Browse files
craig[bot]arulajmani
andcommitted
Merge #156472
156472: kvserver: add destroy replica to TestReplicaLifecycleDataDriven r=pav-kv a=arulajmani See individual commits for details. Co-authored-by: Arul Ajmani <[email protected]>
2 parents 0c7c037 + a91324d commit 1c3040a

File tree

5 files changed

+266
-79
lines changed

5 files changed

+266
-79
lines changed

pkg/kv/kvserver/batcheval/cmd_end_transaction.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,9 @@ type SplitTriggerHelperInput struct {
13021302
// splitTriggerHelper continues the work begun by splitTrigger, but has a
13031303
// reduced scope that has all stats-related concerns bundled into a
13041304
// splitStatsHelper.
1305+
//
1306+
// TODO(arul): consider having this function write keys to the batch in sorted
1307+
// order, much like how destroyReplicaImpl does.
13051308
func splitTriggerHelper(
13061309
ctx context.Context,
13071310
rec EvalContext,

pkg/kv/kvserver/replica_lifecycle_datadriven_test.go

Lines changed: 175 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,19 @@ import (
1212
"cmp"
1313
"context"
1414
"fmt"
15+
"strconv"
1516
"strings"
1617
"testing"
1718
"time"
1819

20+
"github.com/cockroachdb/cockroach/pkg/keys"
21+
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
1922
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/abortspan"
2023
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
24+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
2125
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
2226
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvstorage"
27+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
2328
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/print"
2429
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
2530
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -31,6 +36,7 @@ import (
3136
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
3237
"github.com/cockroachdb/cockroach/pkg/util/log"
3338
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
39+
"github.com/cockroachdb/cockroach/pkg/util/uuid"
3440
"github.com/cockroachdb/datadriven"
3541
"github.com/cockroachdb/errors"
3642
"github.com/stretchr/testify/require"
@@ -81,6 +87,26 @@ import (
8187
//
8288
// Executes the split trigger for the specified range on n1.
8389
//
90+
// destroy-replica range-id=<int>
91+
// ----
92+
//
93+
// Destroys the replica on n1 for the specified range. The replica's state
94+
// must have already been created via create-replica.
95+
//
96+
// append-raft-entries range-id=<int> num-entries=<int>
97+
// ----
98+
//
99+
// Appends the specified number of dummy raft entries to the raft log for
100+
// the replica on n1/s1. The replica must have already been created via
101+
// create-replica.
102+
//
103+
// create-range-data range-id=<int> [num-user-keys=<int>] [num-system-keys=<int>] [num-lock-table-keys=<int>]
104+
// ----
105+
//
106+
// Creates the specified number of user, system, and lock table keys for the
107+
// range. At least one parameter should be non-zero to ensure this directive is
108+
// not nonsensical.
109+
//
84110
// print-range-state [sort-keys=<bool>]
85111
// ----
86112
//
@@ -91,6 +117,8 @@ func TestReplicaLifecycleDataDriven(t *testing.T) {
91117
defer leaktest.AfterTest(t)()
92118
defer log.Scope(t).Close(t)
93119

120+
storage.DisableMetamorphicSimpleValueEncoding(t) // for deterministic output
121+
94122
datadriven.Walk(t, "testdata/replica_lifecycle", func(t *testing.T, path string) {
95123
tc := newTestCtx()
96124
defer tc.close()
@@ -149,37 +177,24 @@ func TestReplicaLifecycleDataDriven(t *testing.T) {
149177
if rs.replica != nil {
150178
return errors.New("initialized replica already exists on n1/s1").Error()
151179
}
152-
repl := rs.getReplicaDescriptor(t, roachpb.NodeID(1))
153-
154-
batch := tc.storage.NewBatch()
155-
defer batch.Close()
156-
157-
if initialized {
158-
require.NoError(t, kvstorage.WriteInitialRangeState(
159-
ctx, batch, batch,
160-
rs.desc, repl.ReplicaID, rs.version,
161-
))
162-
} else {
163-
require.NoError(t, kvstorage.CreateUninitializedReplica(
164-
ctx, kvstorage.TODOState(batch), batch, 1, /* StoreID */
165-
roachpb.FullReplicaID{RangeID: rs.desc.RangeID, ReplicaID: repl.ReplicaID},
166-
))
167-
}
168-
tc.updatePostReplicaCreateState(t, ctx, rs, batch)
180+
repl := rs.mustGetReplicaDescriptor(t, roachpb.NodeID(1))
181+
182+
output := tc.mutate(t, func(batch storage.Batch) {
183+
if initialized {
184+
require.NoError(t, kvstorage.WriteInitialRangeState(
185+
ctx, batch, batch,
186+
rs.desc, repl.ReplicaID, rs.version,
187+
))
188+
} else {
189+
require.NoError(t, kvstorage.CreateUninitializedReplica(
190+
ctx, kvstorage.TODOState(batch), batch, 1, /* StoreID */
191+
roachpb.FullReplicaID{RangeID: rs.desc.RangeID, ReplicaID: repl.ReplicaID},
192+
))
193+
}
194+
tc.updatePostReplicaCreateState(t, ctx, rs, batch)
195+
})
169196

170-
// Print the descriptor and batch output.
171-
var sb strings.Builder
172-
output, err := print.DecodeWriteBatch(batch.Repr())
173-
require.NoError(t, err, "error decoding batch")
174-
sb.WriteString(fmt.Sprintf("created replica: %v", repl))
175-
if output != "" {
176-
sb.WriteString("\n")
177-
sb.WriteString(output)
178-
}
179-
// Commit the batch.
180-
err = batch.Commit(true)
181-
require.NoError(t, err, "error committing batch")
182-
return sb.String()
197+
return fmt.Sprintf("created replica: %v\n%s", repl, output)
183198

184199
case "create-split":
185200
rangeID := dd.ScanArg[roachpb.RangeID](t, d, "range-id")
@@ -210,7 +225,8 @@ func TestReplicaLifecycleDataDriven(t *testing.T) {
210225
replicaNodeID := dd.ScanArg[roachpb.NodeID](t, d, "replica")
211226
leaseType := dd.ScanArgOr(t, d, "lease-type", "leader-lease")
212227
rs := tc.mustGetRangeState(t, rangeID)
213-
targetReplica := rs.getReplicaDescriptor(t, replicaNodeID)
228+
targetReplica := rs.mustGetReplicaDescriptor(t, replicaNodeID)
229+
214230
// NB: The details of the lease are not important to the test;
215231
// only the type is.
216232
var lease roachpb.Lease
@@ -243,45 +259,108 @@ func TestReplicaLifecycleDataDriven(t *testing.T) {
243259
require.True(t, ok, "split trigger not found for range-id %d", rangeID)
244260
rs := tc.mustGetRangeState(t, rangeID)
245261
desc := rs.desc
246-
batch := tc.storage.NewBatch()
247-
defer batch.Close()
248-
249-
rec := (&batcheval.MockEvalCtx{
250-
ClusterSettings: tc.st,
251-
Desc: &desc,
252-
Clock: tc.clock,
253-
AbortSpan: rs.abortspan,
254-
LastReplicaGCTimestamp: rs.lastGCTimestamp,
255-
RangeLeaseDuration: tc.rangeLeaseDuration,
256-
}).EvalContext()
257-
258-
in := batcheval.SplitTriggerHelperInput{
259-
LeftLease: rs.lease,
260-
GCThreshold: &rs.gcThreshold,
261-
GCHint: &rs.gcHint,
262-
ReplicaVersion: rs.version,
262+
263+
return tc.mutate(t, func(batch storage.Batch) {
264+
rec := (&batcheval.MockEvalCtx{
265+
ClusterSettings: tc.st,
266+
Desc: &desc,
267+
Clock: tc.clock,
268+
AbortSpan: rs.abortspan,
269+
LastReplicaGCTimestamp: rs.lastGCTimestamp,
270+
RangeLeaseDuration: tc.rangeLeaseDuration,
271+
}).EvalContext()
272+
273+
in := batcheval.SplitTriggerHelperInput{
274+
LeftLease: rs.lease,
275+
GCThreshold: &rs.gcThreshold,
276+
GCHint: &rs.gcHint,
277+
ReplicaVersion: rs.version,
278+
}
279+
_, _, err := batcheval.TestingSplitTrigger(
280+
ctx, rec, batch, enginepb.MVCCStats{}, split, in, hlc.Timestamp{},
281+
)
282+
require.NoError(t, err)
283+
284+
tc.updatePostSplitRangeState(t, ctx, batch, rangeID, split)
285+
})
286+
287+
case "destroy-replica":
288+
rangeID := dd.ScanArg[roachpb.RangeID](t, d, "range-id")
289+
rs := tc.mustGetRangeState(t, rangeID)
290+
rs.mustGetReplicaDescriptor(t, roachpb.NodeID(1)) // ensure replica exists
291+
292+
output := tc.mutate(t, func(batch storage.Batch) {
293+
require.NoError(t, kvstorage.DestroyReplica(
294+
ctx,
295+
kvstorage.TODOReadWriter(batch),
296+
kvstorage.DestroyReplicaInfo{
297+
FullReplicaID: rs.replica.FullReplicaID,
298+
Keys: rs.desc.RSpan(),
299+
},
300+
rs.desc.NextReplicaID,
301+
))
302+
})
303+
rs.replica = nil // clear the replica from the range state
304+
return output
305+
306+
case "append-raft-entries":
307+
rangeID := dd.ScanArg[roachpb.RangeID](t, d, "range-id")
308+
numEntries := dd.ScanArg[int](t, d, "num-entries")
309+
rs := tc.mustGetRangeState(t, rangeID)
310+
require.NotNil(t, rs.replica, "replica must be created before appending entries")
311+
312+
sl := logstore.NewStateLoader(rangeID)
313+
lastIndex := rs.replica.lastIdx
314+
rs.replica.lastIdx += kvpb.RaftIndex(numEntries)
315+
term := rs.replica.hs.Term
316+
317+
return tc.mutate(t, func(batch storage.Batch) {
318+
for i := 0; i < numEntries; i++ {
319+
entryIndex := lastIndex + 1 + kvpb.RaftIndex(i)
320+
require.NoError(t, storage.MVCCBlindPutProto(
321+
ctx, batch,
322+
sl.RaftLogKey(entryIndex), hlc.Timestamp{},
323+
&raftpb.Entry{Index: uint64(entryIndex), Term: term},
324+
storage.MVCCWriteOptions{},
325+
))
326+
}
327+
})
328+
329+
case "create-range-data":
330+
rangeID := dd.ScanArg[roachpb.RangeID](t, d, "range-id")
331+
numUserKeys := dd.ScanArgOr(t, d, "num-user-keys", 0)
332+
numSystemKeys := dd.ScanArgOr(t, d, "num-system-keys", 0)
333+
numLockTableKeys := dd.ScanArgOr(t, d, "num-lock-table-keys", 0)
334+
require.True(t, numUserKeys > 0 || numSystemKeys > 0 || numLockTableKeys > 0)
335+
336+
rs := tc.mustGetRangeState(t, rangeID)
337+
ts := hlc.Timestamp{WallTime: 1}
338+
getUserKey := func(i int) roachpb.Key {
339+
return append(rs.desc.StartKey.AsRawKey(), strconv.Itoa(i)...)
263340
}
264-
// Actually run the split trigger.
265-
_, _, err := batcheval.TestingSplitTrigger(
266-
ctx, rec, batch /* bothDeltaMS */, enginepb.MVCCStats{}, split, in, hlc.Timestamp{},
267-
)
268-
require.NoError(t, err)
269-
270-
// Update the test context's notion of the range state after the
271-
// split.
272-
tc.updatePostSplitRangeState(t, ctx, batch, rangeID, split)
273-
// Print the state of the batch (all keys/values written as part
274-
// of the split trigger).
275-
output, err := print.DecodeWriteBatch(batch.Repr())
276-
require.NoError(t, err)
277-
// Commit the batch.
278-
err = batch.Commit(true)
279-
require.NoError(t, err, "error committing batch")
280-
// TODO(arul): There are double lines in the output (see tryTxn
281-
// in debug_print.go) that we need to strip out for the benefit
282-
// of the datadriven test driver. Until that TODO is addressed,
283-
// we manually split things out here.
284-
return strings.ReplaceAll(output, "\n\n", "\n")
341+
342+
return tc.mutate(t, func(batch storage.Batch) {
343+
// 1. User keys.
344+
for i := 0; i < numUserKeys; i++ {
345+
require.NoError(t, batch.PutMVCC(
346+
storage.MVCCKey{Key: getUserKey(i), Timestamp: ts}, storage.MVCCValue{},
347+
))
348+
}
349+
// 2. System keys.
350+
for i := 0; i < numSystemKeys; i++ {
351+
key := keys.TransactionKey(getUserKey(i), uuid.NamespaceDNS)
352+
require.NoError(t, batch.PutMVCC(
353+
storage.MVCCKey{Key: key, Timestamp: ts}, storage.MVCCValue{},
354+
))
355+
}
356+
// 3. Lock table keys.
357+
for i := 0; i < numLockTableKeys; i++ {
358+
ek, _ := storage.LockTableKey{
359+
Key: getUserKey(i), Strength: lock.Intent, TxnUUID: uuid.UUID{},
360+
}.ToEngineKey(nil)
361+
require.NoError(t, batch.PutEngineKey(ek, nil))
362+
}
363+
})
285364

286365
case "print-range-state":
287366
var sb strings.Builder
@@ -329,8 +408,9 @@ type rangeState struct {
329408
// engine (both raft log and state machine) state.
330409
type replicaInfo struct {
331410
roachpb.FullReplicaID
332-
hs raftpb.HardState
333-
ts kvserverpb.RaftTruncatedState
411+
hs raftpb.HardState
412+
ts kvserverpb.RaftTruncatedState
413+
lastIdx kvpb.RaftIndex
334414
}
335415

336416
// testCtx is a single test's context. It tracks the state of all ranges and any
@@ -369,6 +449,23 @@ func (tc *testCtx) close() {
369449
tc.storage.Close()
370450
}
371451

452+
// mutate executes a write operation on a batch and commits it. All KVs written
453+
// as part of the batch are returned as a string for the benefit of the
454+
// datadriven test output.
455+
func (tc *testCtx) mutate(t *testing.T, write func(storage.Batch)) string {
456+
batch := tc.storage.NewBatch()
457+
defer batch.Close()
458+
write(batch)
459+
output, err := print.DecodeWriteBatch(batch.Repr())
460+
require.NoError(t, err)
461+
require.NoError(t, batch.Commit(false))
462+
// TODO(arul): There may be double new lines in the output (see tryTxn in
463+
// debug_print.go) that we need to strip out for the benefit of the
464+
// datadriven test driver. Until that TODO is addressed, we manually split
465+
// things out here.
466+
return strings.ReplaceAll(output, "\n\n", "\n")
467+
}
468+
372469
// newRangeState constructs a new rangeState for the supplied descriptor.
373470
func newRangeState(desc roachpb.RangeDescriptor) *rangeState {
374471
gcThreshold := hlc.Timestamp{WallTime: 4}
@@ -408,8 +505,9 @@ func (tc *testCtx) updatePostReplicaCreateState(
408505
RangeID: rs.desc.RangeID,
409506
ReplicaID: replID.ReplicaID,
410507
},
411-
hs: hs,
412-
ts: ts,
508+
hs: hs,
509+
ts: ts,
510+
lastIdx: ts.Index,
413511
}
414512
}
415513

@@ -440,7 +538,7 @@ func (tc *testCtx) updatePostSplitRangeState(
440538
tc.ranges[split.RightDesc.RangeID] = rhsRangeState
441539
}
442540

443-
func (rs *rangeState) getReplicaDescriptor(
541+
func (rs *rangeState) mustGetReplicaDescriptor(
444542
t *testing.T, nodeID roachpb.NodeID,
445543
) *roachpb.ReplicaDescriptor {
446544
for i, repl := range rs.desc.InternalReplicas {
@@ -470,8 +568,9 @@ func (r *replicaInfo) String() string {
470568
if r.hs == (raftpb.HardState{}) {
471569
sb.WriteString("uninitialized")
472570
} else {
473-
sb.WriteString(fmt.Sprintf("HardState={Term:%d,Vote:%d,Commit:%d} ", r.hs.Term, r.hs.Vote, r.hs.Commit))
474-
sb.WriteString(fmt.Sprintf("TruncatedState={Index:%d,Term:%d}", r.ts.Index, r.ts.Term))
571+
sb.WriteString(fmt.Sprintf("HardState={Term:%d,Vote:%d,Commit:%d}", r.hs.Term, r.hs.Vote, r.hs.Commit))
572+
sb.WriteString(fmt.Sprintf(" TruncatedState={Index:%d,Term:%d}", r.ts.Index, r.ts.Term))
573+
sb.WriteString(fmt.Sprintf(" LastIdx=%d", r.lastIdx))
475574
}
476575
return sb.String()
477576
}

pkg/kv/kvserver/testdata/replica_lifecycle/create_replica.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ print-range-state
2929
range desc: r1:{a-d} [(n1,s1):1, (n2,s2):2, (n3,s3):3, next=4, gen=0]
3030
replica (n1/s1): id=1 uninitialized
3131
range desc: r2:{d-k} [(n1,s1):1, (n2,s2):2, (n3,s3):3, next=4, gen=0]
32-
replica (n1/s1): id=1 HardState={Term:5,Vote:0,Commit:10} TruncatedState={Index:10,Term:5}
32+
replica (n1/s1): id=1 HardState={Term:5,Vote:0,Commit:10} TruncatedState={Index:10,Term:5} LastIdx=10

0 commit comments

Comments
 (0)