@@ -341,6 +341,8 @@ func (n *clusterNode) Close() error {
341341 return n .Client .Close ()
342342}
343343
344+ const maximumNodeLatency = 1 * time .Minute
345+
344346func (n * clusterNode ) updateLatency () {
345347 const numProbe = 10
346348 var dur uint64
@@ -361,7 +363,7 @@ func (n *clusterNode) updateLatency() {
361363 if successes == 0 {
362364 // If none of the pings worked, set latency to some arbitrarily high value so this node gets
363365 // least priority.
364- latency = float64 ((1 * time . Minute ) / time .Microsecond )
366+ latency = float64 ((maximumNodeLatency ) / time .Microsecond )
365367 } else {
366368 latency = float64 (dur ) / float64 (successes )
367369 }
@@ -735,20 +737,40 @@ func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
735737 return c .nodes .Random ()
736738 }
737739
738- var node * clusterNode
740+ var allNodesFailing = true
741+ var (
742+ closestNonFailingNode * clusterNode
743+ closestNode * clusterNode
744+ minLatency time.Duration
745+ )
746+
747+ // setting the max possible duration as zerovalue for minlatency
748+ minLatency = time .Duration (math .MaxInt64 )
749+
739750 for _ , n := range nodes {
740- if n .Failing () {
741- continue
742- }
743- if node == nil || n .Latency () < node .Latency () {
744- node = n
751+ if closestNode == nil || n .Latency () < minLatency {
752+ closestNode = n
753+ minLatency = n .Latency ()
754+ if ! n .Failing () {
755+ closestNonFailingNode = n
756+ allNodesFailing = false
757+ }
745758 }
746759 }
747- if node != nil {
748- return node , nil
760+
761+ // pick the healthly node with the lowest latency
762+ if ! allNodesFailing && closestNonFailingNode != nil {
763+ return closestNonFailingNode , nil
764+ }
765+
766+ // if all nodes are failing, we will pick the temporarily failing node with lowest latency
767+ if minLatency < maximumNodeLatency && closestNode != nil {
768+ internal .Logger .Printf (context .TODO (), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency" )
769+ return closestNode , nil
749770 }
750771
751- // If all nodes are failing - return random node
772+ // If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
773+ internal .Logger .Printf (context .TODO (), "redis: pings to all nodes are failing, picking a random node across the cluster" )
752774 return c .nodes .Random ()
753775}
754776
0 commit comments