Skip to content

Commit e2ce75e

Browse files
authored
server selection: modify round robin to shuffle retry order (#21)
In case of non-random server selection, previously the servers were retried in the same order all of the time, which is problematic of the first server(s) in the list are broken. Let's do a modified round robin, where we always try the next available server on retries.
1 parent aff85ae commit e2ce75e

File tree

1 file changed

+21
-12
lines changed

1 file changed

+21
-12
lines changed

robustsession/robustsession.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,10 @@ func CopyNetworks() []*Network {
9595
// This type is only exported so that you can expose internal network state
9696
// for debugging via CopyNetworks().
9797
type Network struct {
98-
servers []string
99-
mu sync.RWMutex
100-
backoff map[string]backoffState
98+
servers []string
99+
idxOffset int
100+
mu sync.RWMutex
101+
backoff map[string]backoffState
101102
}
102103

103104
func (n *Network) String() string {
@@ -112,9 +113,9 @@ func (n *Network) String() string {
112113
}
113114
lines = append(lines, fmt.Sprintf("\tserver %v (backoff: next possible reconnect: %v)", srv, reconnect))
114115
}
115-
return fmt.Sprintf("[network %p with %d servers]\n",
116-
n,
117-
len(n.servers)) + strings.Join(lines, "\n")
116+
return fmt.Sprintf("[network %p with %d servers]\n", n, len(n.servers)) +
117+
fmt.Sprintf("index offset to the next server=%v\n", n.idxOffset) +
118+
strings.Join(lines, "\n")
118119
}
119120

120121
func newNetwork(networkname string) (*Network, error) {
@@ -166,8 +167,9 @@ func newNetwork(networkname string) (*Network, error) {
166167
}
167168

168169
return &Network{
169-
servers: servers,
170-
backoff: make(map[string]backoffState),
170+
servers: servers,
171+
idxOffset: 0,
172+
backoff: make(map[string]backoffState),
171173
}, nil
172174
}
173175

@@ -180,7 +182,8 @@ func (n *Network) server(random bool) string {
180182

181183
for {
182184
soonest := time.Duration(math.MaxInt64)
183-
// Try to use a random server, but fall back to using the next
185+
186+
// If random, try to use a random server, but fall back to using the next
184187
// available server in case the randomly picked server is unhealthy.
185188
if random {
186189
server := n.servers[rand.Intn(len(n.servers))]
@@ -189,10 +192,14 @@ func (n *Network) server(random bool) string {
189192
return server
190193
}
191194
}
192-
for _, server := range n.servers {
193-
wait := n.backoff[server].next.Sub(time.Now())
195+
// Try to use the next available server, searching in offset order
196+
// (modified round robin).
197+
for i := 0; i < len(n.servers); i++ {
198+
idx := (i + n.idxOffset) % len(n.servers)
199+
wait := n.backoff[n.servers[idx]].next.Sub(time.Now())
194200
if wait <= 0 {
195-
return server
201+
n.idxOffset = (idx + 1) % len(n.servers)
202+
return n.servers[idx]
196203
}
197204
if wait < soonest {
198205
soonest = wait
@@ -212,6 +219,7 @@ func (n *Network) setServers(servers []string) {
212219

213220
// TODO(secure): we should clean up n.backoff from servers which no longer exist
214221
n.servers = servers
222+
n.idxOffset = 0
215223
}
216224

217225
// prefer moves (or adds, if it doesn't already exist) the specified server to
@@ -229,6 +237,7 @@ func (n *Network) prefer(server string) {
229237
}
230238
}
231239
n.servers = servers
240+
n.idxOffset = 0
232241
}
233242

234243
func (n *Network) failed(server string) {

0 commit comments

Comments
 (0)