Skip to content

Commit 26bd064

Browse files
committed
fallback and agent side stop sync
This commit adds a fallback in the case where all the agents in system are draining. Rather than drop the request with error, we fallback to the existing behavior i.e continue to the send the request to the agent even if its draining. As for the agent side issue, if the agent has sent the DRAIN signal to the server, ideally it should stop doing the syncOnce with the server. This mistakes the server the agent is back ready. Signed-off-by: Imran Pochi <[email protected]>
1 parent 53580b0 commit 26bd064

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

pkg/agent/clientset.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,14 @@ func (cs *ClientSet) sync() {
255255
}
256256

257257
func (cs *ClientSet) connectOnce() error {
258+
// Skip establishing new connections if draining
259+
select {
260+
case <-cs.drainCh:
261+
klog.V(2).InfoS("Skipping connectOnce - agent is draining")
262+
return nil
263+
default:
264+
}
265+
258266
serverCount := cs.determineServerCount()
259267

260268
// If not in syncForever mode, we only connect if we have fewer connections than the server count.

pkg/server/backend_manager.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,8 @@ func (s *DefaultBackendStorage) GetRandomBackend() (*Backend, error) {
366366
return nil, &ErrNotFound{}
367367
}
368368

369+
var firstDrainingBackend *Backend
370+
369371
// Start at a random agent and check each agent in sequence
370372
startIdx := s.random.Intn(len(s.agentIDs))
371373
for i := 0; i < len(s.agentIDs); i++ {
@@ -380,9 +382,19 @@ func (s *DefaultBackendStorage) GetRandomBackend() (*Backend, error) {
380382
klog.V(3).InfoS("Pick agent as backend", "agentID", agentID)
381383
return backend, nil
382384
}
385+
386+
// Keep track of first draining backend as fallback
387+
if firstDrainingBackend == nil {
388+
firstDrainingBackend = backend
389+
}
390+
}
391+
392+
// All agents are draining, use one as fallback
393+
if firstDrainingBackend != nil {
394+
agentID := firstDrainingBackend.id
395+
klog.V(2).InfoS("No non-draining backends available, using draining backend as fallback", "agentID", agentID)
396+
return firstDrainingBackend, nil
383397
}
384398

385-
// All agents are draining
386-
klog.V(2).InfoS("No non-draining backends available")
387399
return nil, &ErrNotFound{}
388400
}

pkg/server/desthost_backend_manager.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,25 @@ func (dibm *DestHostBackendManager) Backend(ctx context.Context) (*Backend, erro
7979
if destHost != "" {
8080
bes, exist := dibm.backends[destHost]
8181
if exist && len(bes) > 0 {
82+
var firstDrainingBackend *Backend
83+
8284
// Find a non-draining backend for this destination host
8385
for _, backend := range bes {
8486
if !backend.IsDraining() {
8587
klog.V(5).InfoS("Get the backend through the DestHostBackendManager", "destHost", destHost)
8688
return backend, nil
8789
}
90+
// Keep track of first draining backend as fallback
91+
if firstDrainingBackend == nil {
92+
firstDrainingBackend = backend
93+
}
94+
}
95+
96+
// All backends for this destination are draining, use one as fallback
97+
if firstDrainingBackend != nil {
98+
klog.V(4).InfoS("All backends for destination host are draining, using one as fallback", "destHost", destHost)
99+
return firstDrainingBackend, nil
88100
}
89-
klog.V(4).InfoS("All backends for destination host are draining", "destHost", destHost)
90101
}
91102
}
92103
return nil, &ErrNotFound{}

0 commit comments

Comments
 (0)