@@ -51,18 +51,12 @@ type ClientSet struct {
51
51
// Address is the proxy server address. Assuming HA proxy server
52
52
address string
53
53
54
- // leaseCounter counts number of proxy server leases
55
- leaseCounter ServerCounter
54
+ // serverCounter counts number of proxy server leases
55
+ serverCounter ServerCounter
56
56
57
57
// lastReceivedServerCount is the last serverCount value received when connecting to a proxy server
58
58
lastReceivedServerCount int
59
59
60
- // lastServerCount is the most-recently observed serverCount value from either lease system or proxy server,
61
- // former takes priority unless it is an HA server.
62
- // Initialized when the ClientSet creates the first client.
63
- // When syncForever is set, it will be the most recently seen.
64
- lastServerCount int
65
-
66
60
// syncInterval is the interval at which the agent periodically checks
67
61
// that it has connections to all instances of the proxy server.
68
62
syncInterval time.Duration
@@ -108,6 +102,11 @@ func (cs *ClientSet) ClientsCount() int {
108
102
return len (cs .clients )
109
103
}
110
104
105
+ // SetServerCounter sets the strategy for determining the server count.
106
+ func (cs * ClientSet ) SetServerCounter (counter ServerCounter ) {
107
+ cs .serverCounter = counter
108
+ }
109
+
111
110
func (cs * ClientSet ) HealthyClientsCount () int {
112
111
cs .mu .Lock ()
113
112
defer cs .mu .Unlock ()
@@ -175,7 +174,6 @@ type ClientSetConfig struct {
175
174
WarnOnChannelLimit bool
176
175
SyncForever bool
177
176
XfrChannelSize int
178
- ServerLeaseCounter ServerCounter
179
177
ServerCountSource string
180
178
}
181
179
@@ -195,7 +193,6 @@ func (cc *ClientSetConfig) NewAgentClientSet(drainCh, stopCh <-chan struct{}) *C
195
193
drainCh : drainCh ,
196
194
xfrChannelSize : cc .XfrChannelSize ,
197
195
stopCh : stopCh ,
198
- leaseCounter : cc .ServerLeaseCounter ,
199
196
serverCountSource : cc .ServerCountSource ,
200
197
}
201
198
}
@@ -214,30 +211,40 @@ func (cs *ClientSet) resetBackoff() *wait.Backoff {
214
211
}
215
212
}
216
213
217
- // sync makes sure that #clients >= #proxy servers
214
+ // determineServerCount determines the number of proxy servers by delegating to its configured counter strategy.
215
+ func (cs * ClientSet ) determineServerCount () int {
216
+ serverCount := cs .serverCounter .Count ()
217
+ metrics .Metrics .SetServerCount (serverCount )
218
+ return serverCount
219
+ }
220
+
221
+ // sync manages the backoff and the connection attempts to the proxy server.
222
+ // sync runs until stopCh is closed
218
223
func (cs * ClientSet ) sync () {
219
224
defer cs .shutdown ()
220
225
backoff := cs .resetBackoff ()
221
226
var duration time.Duration
222
227
for {
223
- if serverCount , err := cs .connectOnce (); err != nil {
228
+ if err := cs .connectOnce (); err != nil {
224
229
if dse , ok := err .(* DuplicateServerError ); ok {
225
- clientsCount := cs .ClientsCount ()
226
- klog .V (4 ).InfoS ("duplicate server" , "serverID" , dse .ServerID , "serverCount" , serverCount , "clientsCount" , clientsCount )
227
- if serverCount != 0 && clientsCount >= serverCount {
228
- duration = backoff .Step ()
229
- } else {
230
- backoff = cs .resetBackoff ()
231
- duration = wait .Jitter (backoff .Duration , backoff .Jitter )
232
- }
230
+ klog .V (4 ).InfoS ("duplicate server connection attempt" , "serverID" , dse .ServerID )
231
+ // We connected to a server we already have a connection to.
232
+ // This is expected in syncForever mode. We just wait for the
233
+ // next sync period to try again. No need for backoff.
234
+ backoff = cs .resetBackoff ()
235
+ duration = wait .Jitter (backoff .Duration , backoff .Jitter )
233
236
} else {
237
+ // A 'real' error, so we backoff.
234
238
klog .ErrorS (err , "cannot connect once" )
235
239
duration = backoff .Step ()
236
240
}
237
241
} else {
242
+ // A successful connection was made, or no new connection was needed.
243
+ // Reset the backoff and wait for the next sync period.
238
244
backoff = cs .resetBackoff ()
239
245
duration = wait .Jitter (backoff .Duration , backoff .Jitter )
240
246
}
247
+
241
248
time .Sleep (duration )
242
249
select {
243
250
case <- cs .stopCh :
@@ -247,76 +254,37 @@ func (cs *ClientSet) sync() {
247
254
}
248
255
}
249
256
250
- func (cs * ClientSet ) ServerCount () int {
251
-
252
- var serverCount int
253
- var countSourceLabel string
257
+ func (cs * ClientSet ) connectOnce () error {
258
+ serverCount := cs .determineServerCount ()
254
259
255
- switch cs .serverCountSource {
256
- case "" , "default" :
257
- if cs .leaseCounter != nil {
258
- serverCount = cs .leaseCounter .Count ()
259
- countSourceLabel = fromLeases
260
- } else {
261
- serverCount = cs .lastReceivedServerCount
262
- countSourceLabel = fromResponses
263
- }
264
- case "max" :
265
- countFromLeases := 0
266
- if cs .leaseCounter != nil {
267
- countFromLeases = cs .leaseCounter .Count ()
268
- }
269
- countFromResponses := cs .lastReceivedServerCount
270
-
271
- serverCount = countFromLeases
272
- countSourceLabel = fromLeases
273
- if countFromResponses > serverCount {
274
- serverCount = countFromResponses
275
- countSourceLabel = fromResponses
276
- }
277
- if serverCount == 0 {
278
- serverCount = 1
279
- countSourceLabel = fromFallback
280
- }
281
-
282
- }
283
-
284
- if serverCount != cs .lastServerCount {
285
- klog .Warningf ("change detected in proxy server count (was: %d, now: %d, source: %q)" , cs .lastServerCount , serverCount , countSourceLabel )
286
- cs .lastServerCount = serverCount
260
+ // If not in syncForever mode, we only connect if we have fewer connections than the server count.
261
+ if ! cs .syncForever && cs .ClientsCount () >= serverCount && serverCount > 0 {
262
+ return nil // Nothing to do.
287
263
}
288
264
289
- metrics .Metrics .SetServerCount (serverCount )
290
- return serverCount
291
- }
292
-
293
- func (cs * ClientSet ) connectOnce () (int , error ) {
294
- serverCount := cs .ServerCount ()
295
-
296
- if ! cs .syncForever && serverCount != 0 && cs .ClientsCount () >= serverCount {
297
- return serverCount , nil
298
- }
265
+ // In syncForever mode, we always try to connect, to discover new servers.
299
266
c , receivedServerCount , err := cs .newAgentClient ()
300
267
if err != nil {
301
- return serverCount , err
268
+ return err
302
269
}
270
+
303
271
if err := cs .AddClient (c .serverID , c ); err != nil {
304
272
c .Close ()
305
- return serverCount , err
273
+ return err // likely *DuplicateServerError
306
274
}
307
- // By moving the update to here, we only accept the server count from a server
308
- // that we have successfully added to our active client set, implicitly ignoring
309
- // stale data from duplicate connection attempts.
275
+ // SUCCESS: We connected to a new, unique server.
276
+ // Only now do we update our view of the server count.
310
277
cs .lastReceivedServerCount = receivedServerCount
311
- klog .V (2 ).InfoS ("sync added client connecting to proxy server" , "serverID" , c .serverID )
278
+ klog .V (2 ).InfoS ("successfully connected to new proxy server" , "serverID" , c .serverID , "newServerCount" , receivedServerCount )
312
279
313
280
labels := runpprof .Labels (
314
281
"agentIdentifiers" , cs .agentIdentifiers ,
315
282
"serverAddress" , cs .address ,
316
283
"serverID" , c .serverID ,
317
284
)
318
285
go runpprof .Do (context .Background (), labels , func (context.Context ) { c .Serve () })
319
- return serverCount , nil
286
+
287
+ return nil
320
288
}
321
289
322
290
func (cs * ClientSet ) Serve () {
0 commit comments