@@ -12,6 +12,8 @@ import (
12
12
"github.com/cockroachdb/cockroach/pkg/base"
13
13
"github.com/cockroachdb/cockroach/pkg/jobs"
14
14
"github.com/cockroachdb/cockroach/pkg/keys"
15
+ "github.com/cockroachdb/cockroach/pkg/kv/kvpb"
16
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver"
15
17
"github.com/cockroachdb/cockroach/pkg/roachpb"
16
18
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
17
19
"github.com/cockroachdb/cockroach/pkg/sql"
@@ -26,6 +28,8 @@ import (
26
28
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
27
29
"github.com/cockroachdb/cockroach/pkg/util/log"
28
30
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
31
+ "github.com/cockroachdb/cockroach/pkg/util/randutil"
32
+ "github.com/cockroachdb/cockroach/pkg/util/syncutil"
29
33
"github.com/cockroachdb/errors"
30
34
"github.com/stretchr/testify/require"
31
35
)
@@ -243,6 +247,128 @@ func TestRunGenerativeSplitAndScatterRandomizedDestOnFailScatter(t *testing.T) {
243
247
))
244
248
}
245
249
250
+ func TestGenerativeSplitAndScatterWithAdminSplitFailures (t * testing.T ) {
251
+ defer leaktest .AfterTest (t )()
252
+ defer log .Scope (t ).Close (t )
253
+
254
+ const numAccounts = 1000
255
+ rng , seed := randutil .NewPseudoRand ()
256
+ t .Logf ("random seed: %d" , seed )
257
+
258
+ ctx := context .Background ()
259
+
260
+ var mu syncutil.Mutex
261
+ allowAdminSplitFailures := false
262
+ keySplitFailures := make (map [string ]int )
263
+
264
+ clusterArgs := base.TestClusterArgs {
265
+ ServerArgs : base.TestServerArgs {
266
+ DefaultTestTenant : base .TestIsSpecificToStorageLayerAndNeedsASystemTenant ,
267
+ Knobs : base.TestingKnobs {
268
+ Store : & kvserver.StoreTestingKnobs {
269
+ TestingRequestFilter : func (ctx context.Context , ba * kvpb.BatchRequest ) * kvpb.Error {
270
+ mu .Lock ()
271
+ defer mu .Unlock ()
272
+ if ! allowAdminSplitFailures {
273
+ return nil
274
+ }
275
+ for _ , req := range ba .Requests {
276
+ kvReq := req .GetInner ()
277
+ if kvReq .Method () == kvpb .AdminSplit {
278
+ splitKey := kvReq .Header ().Key .String ()
279
+ nFails := keySplitFailures [splitKey ]
280
+ if nFails < maxAdminSplitAttempts - 1 && rng .Intn (2 ) == 0 {
281
+ keySplitFailures [splitKey ]++
282
+ return kvpb .NewErrorf ("injected admin split failure for testing" )
283
+ }
284
+ }
285
+ }
286
+ return nil
287
+ },
288
+ },
289
+ },
290
+ },
291
+ }
292
+ tc , sqlDB , _ , cleanupFn := backupRestoreTestSetupWithParams (
293
+ t , singleNode , numAccounts , InitManualReplication , clusterArgs ,
294
+ )
295
+ defer cleanupFn ()
296
+
297
+ st := cluster .MakeTestingClusterSettings ()
298
+ evalCtx := eval .MakeTestingEvalContext (st )
299
+ testDiskMonitor := execinfra .NewTestDiskMonitor (ctx , st )
300
+ defer testDiskMonitor .Stop (ctx )
301
+
302
+ s0 := tc .ApplicationLayer (0 )
303
+ registry := s0 .JobRegistry ().(* jobs.Registry )
304
+ execCfg := s0 .ExecutorConfig ().(sql.ExecutorConfig )
305
+ flowCtx := execinfra.FlowCtx {
306
+ Cfg : & execinfra.ServerConfig {
307
+ Settings : st ,
308
+ DB : s0 .InternalDB ().(descs.DB ),
309
+ JobRegistry : registry ,
310
+ ExecutorConfig : & execCfg ,
311
+ },
312
+ EvalCtx : & evalCtx ,
313
+ Mon : evalCtx .TestingMon ,
314
+ DiskMonitor : testDiskMonitor ,
315
+ NodeID : evalCtx .NodeID ,
316
+ }
317
+
318
+ sqlDB .Exec (t , `SET CLUSTER SETTING bulkio.backup.file_size = '1'` )
319
+ sqlDB .Exec (t , `BACKUP INTO $1` , localFoo )
320
+
321
+ backups := sqlDB .QueryStr (t , `SHOW BACKUPS IN $1` , localFoo )
322
+ require .Equal (t , 1 , len (backups ))
323
+ uri := localFoo + "/" + backups [0 ][0 ]
324
+
325
+ codec := keys .MakeSQLCodec (s0 .RPCContext ().TenantID )
326
+ backupTableDesc := desctestutils .TestingGetPublicTableDescriptor (s0 .DB (), codec , "data" , "bank" )
327
+ backupStartKey := backupTableDesc .PrimaryIndexSpan (codec ).Key
328
+
329
+ spec := makeTestingGenerativeSplitAndScatterSpec (
330
+ []string {uri },
331
+ []roachpb.Span {{
332
+ Key : backupStartKey ,
333
+ EndKey : backupStartKey .PrefixEnd (),
334
+ }},
335
+ )
336
+
337
+ oldID := backupTableDesc .GetID ()
338
+ newID := backupTableDesc .GetID () + 1
339
+ newDesc := protoutil .Clone (backupTableDesc .TableDesc ()).(* descpb.TableDescriptor )
340
+ newDesc .ID = newID
341
+ tableRekeys := []execinfrapb.TableRekey {
342
+ {
343
+ OldID : uint32 (oldID ),
344
+ NewDesc : mustMarshalDesc (t , newDesc ),
345
+ },
346
+ }
347
+
348
+ kr , err := MakeKeyRewriterFromRekeys (keys .SystemSQLCodec , tableRekeys , nil , false )
349
+ require .NoError (t , err )
350
+
351
+ baseSplitScatter := makeSplitAndScatterer (flowCtx .Cfg .DB .KV (), kr )
352
+ chunkSplitScatterers := []splitAndScatterer {makeSplitAndScatterer (flowCtx .Cfg .DB .KV (), kr )}
353
+ chunkEntrySpliterScatterers := []splitAndScatterer {makeSplitAndScatterer (flowCtx .Cfg .DB .KV (), kr )}
354
+
355
+ cache := routingDatumCache {
356
+ cache : make (map [roachpb.NodeID ]rowenc.EncDatum ),
357
+ }
358
+
359
+ // Large enough so doneScatterCh never blocks.
360
+ doneScatterCh := make (chan entryNode , 1000 )
361
+ mu .Lock ()
362
+ allowAdminSplitFailures = true
363
+ mu .Unlock ()
364
+ err = runGenerativeSplitAndScatter (
365
+ ctx , & flowCtx , & spec , baseSplitScatter , chunkSplitScatterers ,
366
+ chunkEntrySpliterScatterers , doneScatterCh , & cache ,
367
+ )
368
+
369
+ require .NoError (t , err )
370
+ }
371
+
246
372
// scatterAlwaysFailsSplitScatterer always fails the scatter and returns 0 as
247
373
// the chunk destination.
248
374
type scatterAlwaysFailsSplitScatterer struct {
0 commit comments