@@ -46,7 +46,18 @@ class HostDistance(object):
46
46
connections opened to it.
47
47
"""
48
48
49
- LOCAL = 0
49
+ LOCAL_RACK = 0
50
+ """
51
+ Nodes with ``LOCAL_RACK`` distance will be preferred for operations
52
+ under some load balancing policies (such as :class:`.RackAwareRoundRobinPolicy`)
53
+ and will have a greater number of connections opened against
54
+ them by default.
55
+
56
+ This distance is typically used for nodes within the same
57
+ datacenter and the same rack as the client.
58
+ """
59
+
60
+ LOCAL = 1
50
61
"""
51
62
Nodes with ``LOCAL`` distance will be preferred for operations
52
63
under some load balancing policies (such as :class:`.DCAwareRoundRobinPolicy`)
@@ -57,12 +68,12 @@ class HostDistance(object):
57
68
datacenter as the client.
58
69
"""
59
70
60
- REMOTE = 1
71
+ REMOTE = 2
61
72
"""
62
73
Nodes with ``REMOTE`` distance will be treated as a last resort
63
- by some load balancing policies (such as :class:`.DCAwareRoundRobinPolicy`)
64
- and will have a smaller number of connections opened against
65
- them by default.
74
+ by some load balancing policies (such as :class:`.DCAwareRoundRobinPolicy`
75
+ and :class:`.RackAwareRoundRobinPolicy`)and will have a smaller number of
76
+ connections opened against them by default.
66
77
67
78
This distance is typically used for nodes outside of the
68
79
datacenter that the client is running in.
@@ -102,6 +113,11 @@ class LoadBalancingPolicy(HostStateListener):
102
113
103
114
You may also use subclasses of :class:`.LoadBalancingPolicy` for
104
115
custom behavior.
116
+
117
+ You should always use immutable collections (e.g., tuples or
118
+ frozensets) to store information about hosts to prevent accidental
119
+ modification. When there are changes to the hosts (e.g., a host is
120
+ down or up), the old collection should be replaced with a new one.
105
121
"""
106
122
107
123
_hosts_lock = None
@@ -316,6 +332,130 @@ def on_add(self, host):
316
332
def on_remove (self , host ):
317
333
self .on_down (host )
318
334
335
+ class RackAwareRoundRobinPolicy (LoadBalancingPolicy ):
336
+ """
337
+ Similar to :class:`.DCAwareRoundRobinPolicy`, but prefers hosts
338
+ in the local rack, before hosts in the local datacenter but a
339
+ different rack, before hosts in all other datercentres
340
+ """
341
+
342
+ local_dc = None
343
+ local_rack = None
344
+ used_hosts_per_remote_dc = 0
345
+
346
+ def __init__ (self , local_dc , local_rack , used_hosts_per_remote_dc = 0 ):
347
+ """
348
+ The `local_dc` and `local_rack` parameters should be the name of the
349
+ datacenter and rack (such as is reported by ``nodetool ring``) that
350
+ should be considered local.
351
+
352
+ `used_hosts_per_remote_dc` controls how many nodes in
353
+ each remote datacenter will have connections opened
354
+ against them. In other words, `used_hosts_per_remote_dc` hosts
355
+ will be considered :attr:`~.HostDistance.REMOTE` and the
356
+ rest will be considered :attr:`~.HostDistance.IGNORED`.
357
+ By default, all remote hosts are ignored.
358
+ """
359
+ self .local_rack = local_rack
360
+ self .local_dc = local_dc
361
+ self .used_hosts_per_remote_dc = used_hosts_per_remote_dc
362
+ self ._live_hosts = {}
363
+ self ._dc_live_hosts = {}
364
+ self ._endpoints = []
365
+ self ._position = 0
366
+ LoadBalancingPolicy .__init__ (self )
367
+
368
+ def _rack (self , host ):
369
+ return host .rack or self .local_rack
370
+
371
+ def _dc (self , host ):
372
+ return host .datacenter or self .local_dc
373
+
374
+ def populate (self , cluster , hosts ):
375
+ for (dc , rack ), rack_hosts in groupby (hosts , lambda host : (self ._dc (host ), self ._rack (host ))):
376
+ self ._live_hosts [(dc , rack )] = tuple (set (rack_hosts ))
377
+ for dc , dc_hosts in groupby (hosts , lambda host : self ._dc (host )):
378
+ self ._dc_live_hosts [dc ] = tuple (set (dc_hosts ))
379
+
380
+ self ._position = randint (0 , len (hosts ) - 1 ) if hosts else 0
381
+
382
+ def distance (self , host ):
383
+ rack = self ._rack (host )
384
+ dc = self ._dc (host )
385
+ if rack == self .local_rack and dc == self .local_dc :
386
+ return HostDistance .LOCAL_RACK
387
+
388
+ if dc == self .local_dc :
389
+ return HostDistance .LOCAL
390
+
391
+ if not self .used_hosts_per_remote_dc :
392
+ return HostDistance .IGNORED
393
+
394
+ dc_hosts = self ._dc_live_hosts .get (dc , ())
395
+ if not dc_hosts :
396
+ return HostDistance .IGNORED
397
+ if host in dc_hosts and dc_hosts .index (host ) < self .used_hosts_per_remote_dc :
398
+ return HostDistance .REMOTE
399
+ else :
400
+ return HostDistance .IGNORED
401
+
402
+ def make_query_plan (self , working_keyspace = None , query = None ):
403
+ pos = self ._position
404
+ self ._position += 1
405
+
406
+ local_rack_live = self ._live_hosts .get ((self .local_dc , self .local_rack ), ())
407
+ pos = (pos % len (local_rack_live )) if local_rack_live else 0
408
+ # Slice the cyclic iterator to start from pos and include the next len(local_live) elements
409
+ # This ensures we get exactly one full cycle starting from pos
410
+ for host in islice (cycle (local_rack_live ), pos , pos + len (local_rack_live )):
411
+ yield host
412
+
413
+ local_live = [host for host in self ._dc_live_hosts .get (self .local_dc , ()) if host .rack != self .local_rack ]
414
+ pos = (pos % len (local_live )) if local_live else 0
415
+ for host in islice (cycle (local_live ), pos , pos + len (local_live )):
416
+ yield host
417
+
418
+ # the dict can change, so get candidate DCs iterating over keys of a copy
419
+ for dc , remote_live in self ._dc_live_hosts .copy ().items ():
420
+ if dc != self .local_dc :
421
+ for host in remote_live [:self .used_hosts_per_remote_dc ]:
422
+ yield host
423
+
424
+ def on_up (self , host ):
425
+ dc = self ._dc (host )
426
+ rack = self ._rack (host )
427
+ with self ._hosts_lock :
428
+ current_rack_hosts = self ._live_hosts .get ((dc , rack ), ())
429
+ if host not in current_rack_hosts :
430
+ self ._live_hosts [(dc , rack )] = current_rack_hosts + (host , )
431
+ current_dc_hosts = self ._dc_live_hosts .get (dc , ())
432
+ if host not in current_dc_hosts :
433
+ self ._dc_live_hosts [dc ] = current_dc_hosts + (host , )
434
+
435
+ def on_down (self , host ):
436
+ dc = self ._dc (host )
437
+ rack = self ._rack (host )
438
+ with self ._hosts_lock :
439
+ current_rack_hosts = self ._live_hosts .get ((dc , rack ), ())
440
+ if host in current_rack_hosts :
441
+ hosts = tuple (h for h in current_rack_hosts if h != host )
442
+ if hosts :
443
+ self ._live_hosts [(dc , rack )] = hosts
444
+ else :
445
+ del self ._live_hosts [(dc , rack )]
446
+ current_dc_hosts = self ._dc_live_hosts .get (dc , ())
447
+ if host in current_dc_hosts :
448
+ hosts = tuple (h for h in current_dc_hosts if h != host )
449
+ if hosts :
450
+ self ._dc_live_hosts [dc ] = hosts
451
+ else :
452
+ del self ._dc_live_hosts [dc ]
453
+
454
+ def on_add (self , host ):
455
+ self .on_up (host )
456
+
457
+ def on_remove (self , host ):
458
+ self .on_down (host )
319
459
320
460
class TokenAwarePolicy (LoadBalancingPolicy ):
321
461
"""
@@ -390,7 +530,7 @@ def make_query_plan(self, working_keyspace=None, query=None):
390
530
shuffle (replicas )
391
531
392
532
for replica in replicas :
393
- if replica .is_up and child .distance (replica ) == HostDistance .LOCAL :
533
+ if replica .is_up and child .distance (replica ) in [ HostDistance .LOCAL , HostDistance . LOCAL_RACK ] :
394
534
yield replica
395
535
396
536
for host in child .make_query_plan (keyspace , query ):
0 commit comments