Skip to content

Commit 64f972f

Browse files
fix: disregard failed pings in updateLatency() for cluster nodes
It is possible that a ping command might fail very quickly due to an immediate connection refused message or some other server failure condition. In this case, the derived latency is not reliable and should not be used to compare this node to other nodes in the cluster. This change will only count successful Ping commands in the average latency and in the case of no successful Pings, set the latency to an arbitrarily high value to make this node have the least priority when routing by latency.
1 parent 89d6dfe commit 64f972f

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

cluster.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,15 +204,26 @@ func (n *clusterNode) updateLatency() {
204204
const numProbe = 10
205205
var dur uint64
206206

207+
successes := 0
207208
for i := 0; i < numProbe; i++ {
208209
time.Sleep(time.Duration(10+rand.Intn(10)) * time.Millisecond)
209210

210211
start := time.Now()
211-
n.Client.Ping(context.TODO())
212-
dur += uint64(time.Since(start) / time.Microsecond)
212+
err := n.Client.Ping(context.TODO()).Err()
213+
if err == nil {
214+
dur += uint64(time.Since(start) / time.Microsecond)
215+
successes++
216+
}
213217
}
214218

215-
latency := float64(dur) / float64(numProbe)
219+
var latency float64
220+
if successes == 0 {
221+
// If none of the pings worked, set latency to some arbitrarily high value so this node gets
222+
// least priority.
223+
latency = float64((1 * time.Minute) / time.Microsecond)
224+
} else {
225+
latency = float64(dur) / float64(successes)
226+
}
216227
atomic.StoreUint32(&n.latency, uint32(latency+0.5))
217228
}
218229

0 commit comments

Comments
 (0)