@@ -60,6 +60,7 @@ type OperationConfig struct {
60
60
ChangeLease ChangeLeaseConfig
61
61
ChangeSetting ChangeSettingConfig
62
62
ChangeZone ChangeZoneConfig
63
+ Fault FaultConfig
63
64
}
64
65
65
66
// ClosureTxnConfig configures the relative probability of running some
@@ -392,6 +393,20 @@ type SavepointConfig struct {
392
393
SavepointRollback int
393
394
}
394
395
396
+ // FaultConfig configures the relative probabilities of generating different
397
+ // types of faults. Network partitions can be symmetric or asymmetric, partial
398
+ // or full, but they may need multiple operations to set up; e.g. a symmetric
399
+ // partition between node A and node B requires to partitions: from A to B, and
400
+ // from B to A.
401
+ type FaultConfig struct {
402
+ // AddNetworkPartition is an operation that simulates a network partition.
403
+ AddNetworkPartition int
404
+ // RemoveNetworkPartition is an operation that simulates healing a network
405
+ // partition.
406
+ RemoveNetworkPartition int
407
+ // Disk stalls and node crashes belong here.
408
+ }
409
+
395
410
// newAllOperationsConfig returns a GeneratorConfig that exercises *all*
396
411
// options. You probably want NewDefaultConfig. Most of the time, these will be
397
412
// the same, but having both allows us to merge code for operations that do not
@@ -511,6 +526,10 @@ func newAllOperationsConfig() GeneratorConfig {
511
526
ChangeZone : ChangeZoneConfig {
512
527
ToggleGlobalReads : 1 ,
513
528
},
529
+ Fault : FaultConfig {
530
+ AddNetworkPartition : 1 ,
531
+ RemoveNetworkPartition : 1 ,
532
+ },
514
533
}}
515
534
}
516
535
@@ -603,6 +622,11 @@ func NewDefaultConfig() GeneratorConfig {
603
622
config .Ops .ClosureTxn .CommitBatchOps .FlushLockTable = 0
604
623
config .Ops .ClosureTxn .TxnClientOps .FlushLockTable = 0
605
624
config .Ops .ClosureTxn .TxnBatchOps .Ops .FlushLockTable = 0
625
+
626
+ // Network partitions can result in unavailability and need to be enabled with
627
+ // care by specific test variants.
628
+ config .Ops .Fault .AddNetworkPartition = 0
629
+ config .Ops .Fault .RemoveNetworkPartition = 0
606
630
return config
607
631
}
608
632
@@ -663,13 +687,27 @@ func MakeGenerator(config GeneratorConfig, replicasFn GetReplicasFn) (*Generator
663
687
return nil , errors .Errorf (`NumReplicas (%d) must <= NumNodes (%d)` ,
664
688
config .NumReplicas , config .NumNodes )
665
689
}
690
+ p := partitions {
691
+ healthy : make (map [connection ]struct {}),
692
+ partitioned : make (map [connection ]struct {}),
693
+ }
694
+ for i := 1 ; i <= config .NumNodes ; i ++ {
695
+ for j := 1 ; j <= config .NumNodes ; j ++ {
696
+ if i == j {
697
+ continue
698
+ }
699
+ conn := connection {from : i , to : j }
700
+ p .healthy [conn ] = struct {}{}
701
+ }
702
+ }
666
703
g := & Generator {}
667
704
g .mu .generator = generator {
668
705
Config : config ,
669
706
replicasFn : replicasFn ,
670
707
keys : make (map [string ]string ),
671
708
currentSplits : make (map [string ]struct {}),
672
709
historicalSplits : make (map [string ]struct {}),
710
+ partitions : p ,
673
711
}
674
712
return g , nil
675
713
}
@@ -703,6 +741,20 @@ type generator struct {
703
741
// emitted, regardless of whether the split has since been applied or been
704
742
// merged away again.
705
743
historicalSplits map [string ]struct {}
744
+
745
+ // partitions contains the sets of healthy and partitioned connections
746
+ // between nodes.
747
+ partitions
748
+ }
749
+
750
+ type connection struct {
751
+ from int // node ID
752
+ to int // node ID
753
+ }
754
+
755
+ type partitions struct {
756
+ healthy map [connection ]struct {}
757
+ partitioned map [connection ]struct {}
706
758
}
707
759
708
760
// RandStep returns a single randomly generated next operation to execute.
@@ -763,6 +815,8 @@ func (g *generator) RandStep(rng *rand.Rand) Step {
763
815
764
816
addOpGen (& allowed , setLeaseType , g .Config .Ops .ChangeSetting .SetLeaseType )
765
817
addOpGen (& allowed , toggleGlobalReads , g .Config .Ops .ChangeZone .ToggleGlobalReads )
818
+ addOpGen (& allowed , addRandNetworkPartition , g .Config .Ops .Fault .AddNetworkPartition )
819
+ addOpGen (& allowed , removeRandNetworkPartition , g .Config .Ops .Fault .RemoveNetworkPartition )
766
820
767
821
return step (g .selectOp (rng , allowed ))
768
822
}
@@ -1643,6 +1697,34 @@ func toggleGlobalReads(_ *generator, _ *rand.Rand) Operation {
1643
1697
return changeZone (ChangeZoneType_ToggleGlobalReads )
1644
1698
}
1645
1699
1700
+ func addRandNetworkPartition (g * generator , rng * rand.Rand ) Operation {
1701
+ if len (g .partitions .healthy ) == 0 {
1702
+ return addNetworkPartition (0 , 0 )
1703
+ }
1704
+ all := make ([]connection , 0 , len (g .partitions .healthy ))
1705
+ for conn := range g .partitions .healthy {
1706
+ all = append (all , conn )
1707
+ }
1708
+ randConn := all [rng .Intn (len (all ))]
1709
+ delete (g .partitions .healthy , randConn )
1710
+ g .partitions .partitioned [randConn ] = struct {}{}
1711
+ return addNetworkPartition (randConn .from , randConn .to )
1712
+ }
1713
+
1714
+ func removeRandNetworkPartition (g * generator , rng * rand.Rand ) Operation {
1715
+ if len (g .partitions .partitioned ) == 0 {
1716
+ return removeNetworkPartition (0 , 0 )
1717
+ }
1718
+ all := make ([]connection , 0 , len (g .partitions .partitioned ))
1719
+ for conn := range g .partitions .partitioned {
1720
+ all = append (all , conn )
1721
+ }
1722
+ randConn := all [rng .Intn (len (all ))]
1723
+ delete (g .partitions .partitioned , randConn )
1724
+ g .partitions .healthy [randConn ] = struct {}{}
1725
+ return removeNetworkPartition (randConn .from , randConn .to )
1726
+ }
1727
+
1646
1728
func makeRandBatch (c * ClientOperationConfig ) opGenFunc {
1647
1729
return func (g * generator , rng * rand.Rand ) Operation {
1648
1730
var allowed []opGen
@@ -2252,6 +2334,18 @@ func rollbackSavepoint(id int) Operation {
2252
2334
return Operation {SavepointRollback : & SavepointRollbackOperation {ID : int32 (id )}}
2253
2335
}
2254
2336
2337
+ func addNetworkPartition (from int , to int ) Operation {
2338
+ return Operation {
2339
+ AddNetworkPartition : & AddNetworkPartitionOperation {FromNode : int32 (from ), ToNode : int32 (to )},
2340
+ }
2341
+ }
2342
+
2343
+ func removeNetworkPartition (from int , to int ) Operation {
2344
+ return Operation {
2345
+ RemoveNetworkPartition : & RemoveNetworkPartitionOperation {FromNode : int32 (from ), ToNode : int32 (to )},
2346
+ }
2347
+ }
2348
+
2255
2349
type countingRandSource struct {
2256
2350
count atomic.Uint64
2257
2351
inner rand.Source64
0 commit comments