@@ -129,12 +129,16 @@ func (d disablingClientStream) RecvMsg(m interface{}) error {
129
129
130
130
// Partitioner is used to create partial partitions between nodes at the GRPC
131
131
// layer. It uses StreamInterceptors to fail requests to nodes that are not
132
- // connected. Usage of it is something like the following:
132
+ // connected. Node addresses need to be registered before enabling the
133
+ // partition, but partitions can be added and removed at any point (before or
134
+ // after starting the cluster or enabling the partition).
135
+ //
136
+ // Usage of it is something like the following:
133
137
//
134
138
// var p rpc.Partitioner
135
139
//
136
140
// for i := 0; i < numServers; i++ {
137
- // p.RegisterTestingKnobs(id, partitions, ContextTestingKnobs{})
141
+ // p.RegisterTestingKnobs(id, ContextTestingKnobs{})
138
142
// }
139
143
//
140
144
// TestCluster.Start()
@@ -143,57 +147,102 @@ func (d disablingClientStream) RecvMsg(m interface{}) error {
143
147
// p.RegisterNodeAddr()
144
148
// }
145
149
//
146
- // p.EnablePartition(true)
147
- // ... run operations
150
+ // p.AddPartition(from, to)
151
+ //
152
+ // p.EnablePartitions(true)
148
153
//
149
- // TODO(baptist): This could be enhanced to allow dynamic partition injection.
154
+ // p.{Add,Remove}Partition(from, to)
155
+ // ... run operations
156
+ // p.{Add,Remove}Partition(from, to)
150
157
type Partitioner struct {
151
- partitionEnabled atomic.Bool
152
- nodeAddrMap syncutil.Map [string , roachpb.NodeID ]
158
+ partitionsEnabled atomic.Bool
159
+ nodeAddrMap syncutil.Map [string , roachpb.NodeID ]
160
+ mu struct {
161
+ syncutil.Mutex
162
+ // partitions is a map from NodeID to a set of NodeIDs that the node should
163
+ // not be able to connect to.
164
+ partitions map [roachpb.NodeID ]map [roachpb.NodeID ]struct {}
165
+ }
153
166
}
154
167
155
- // EnablePartition will enable or disable the partition.
156
- func (p * Partitioner ) EnablePartition (enable bool ) {
157
- p .partitionEnabled .Store (enable )
168
+ // EnablePartitions will enable or disable the partition.
169
+ func (p * Partitioner ) EnablePartitions (enable bool ) {
170
+ p .partitionsEnabled .Store (enable )
158
171
}
159
172
160
173
// RegisterNodeAddr is called after the cluster is started, but before
161
- // EnablePartition is called on every node to register the mapping from the
174
+ // EnablePartitions is called on every node to register the mapping from the
162
175
// address of the node to the NodeID.
163
176
func (p * Partitioner ) RegisterNodeAddr (addr string , id roachpb.NodeID ) {
164
- if p .partitionEnabled .Load () {
177
+ if p .partitionsEnabled .Load () {
165
178
panic ("Can not register node addresses with a partition enabled" )
166
179
}
167
180
p .nodeAddrMap .Store (addr , & id )
168
181
}
169
182
170
- // RegisterTestingKnobs creates the testing knobs for this node. It will
171
- // override both the Unary and Stream Interceptors to return errors once
172
- // EnablePartition is called.
173
- func (p * Partitioner ) RegisterTestingKnobs (
174
- id roachpb.NodeID , partition [][2 ]roachpb.NodeID , knobs * ContextTestingKnobs ,
175
- ) {
176
- // Structure the partition list for indexed lookup. We are partitioned from
177
- // the other node if we are found on either side of the pair.
178
- partitionedServers := make (map [roachpb.NodeID ]bool )
179
- for _ , p := range partition {
180
- if p [0 ] == id {
181
- partitionedServers [p [1 ]] = true
183
+ func (p * Partitioner ) AddPartition (from roachpb.NodeID , to roachpb.NodeID ) error {
184
+ if from == to {
185
+ return errors .Newf ("cannot add partition from node %d to itself" , from )
186
+ }
187
+ p .mu .Lock ()
188
+ defer p .mu .Unlock ()
189
+ if p .mu .partitions == nil {
190
+ p .mu .partitions = make (map [roachpb.NodeID ]map [roachpb.NodeID ]struct {})
191
+ }
192
+ if p .mu .partitions [from ] == nil {
193
+ p .mu .partitions [from ] = make (map [roachpb.NodeID ]struct {})
194
+ }
195
+ p .mu .partitions [from ][to ] = struct {}{}
196
+ return nil
197
+ }
198
+
199
+ func (p * Partitioner ) RemovePartition (from roachpb.NodeID , to roachpb.NodeID ) error {
200
+ err := errors .Newf ("cannot remove partition from node %d to %d; it doesn't exist" , from , to )
201
+ p .mu .Lock ()
202
+ defer p .mu .Unlock ()
203
+ if p .mu .partitions == nil {
204
+ return err
205
+ }
206
+ if toNodes , ok := p .mu .partitions [from ]; ok {
207
+ if _ , ok = toNodes [to ]; ok {
208
+ delete (toNodes , to )
209
+ if len (toNodes ) == 0 {
210
+ delete (p .mu .partitions , from )
211
+ }
212
+ return nil
182
213
}
183
- if p [1 ] == id {
184
- partitionedServers [p [0 ]] = true
214
+ }
215
+ return err
216
+ }
217
+
218
+ func (p * Partitioner ) isPartitioned (from roachpb.NodeID , to roachpb.NodeID ) bool {
219
+ p .mu .Lock ()
220
+ defer p .mu .Unlock ()
221
+ if p .mu .partitions == nil {
222
+ return false
223
+ }
224
+ if toPartitions , ok := p .mu .partitions [from ]; ok {
225
+ if _ , ok := toPartitions [to ]; ok {
226
+ return true
185
227
}
186
228
}
229
+ return false
230
+ }
231
+
232
+ // RegisterTestingKnobs creates the testing knobs for this node. It will
233
+ // override both the Unary and Stream Interceptors to return errors once
234
+ // EnablePartitions is called.
235
+ func (p * Partitioner ) RegisterTestingKnobs (id roachpb.NodeID , knobs * ContextTestingKnobs ) {
187
236
isPartitioned := func (addr string ) error {
188
- if ! p .partitionEnabled .Load () {
237
+ if ! p .partitionsEnabled .Load () {
189
238
return nil
190
239
}
191
- idPtr , ok := p .nodeAddrMap .Load (addr )
240
+ toNodePtr , ok := p .nodeAddrMap .Load (addr )
192
241
if ! ok {
193
242
panic ("address not mapped, call RegisterNodeAddr before enabling the partition" + addr )
194
243
}
195
- id := * idPtr
196
- if partitionedServers [ id ] {
244
+ toNodeId := * toNodePtr
245
+ if p . isPartitioned ( id , toNodeId ) {
197
246
return errors .Newf ("rpc error: partitioned from %s, n%d" , addr , id )
198
247
}
199
248
return nil
0 commit comments