Skip to content

Commit e69a618

Browse files
author
Piotr Stankiewicz
committed
Reload defunct runners
In case a runner becomes defunct, e.g. as a result of a backend crash it would be neat to be able to reload it. So, if the loader finds runner, have it check if the runner is still alive, and create a new one if the runner is defunct. Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
1 parent 8aa7a28 commit e69a618

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

pkg/inference/scheduling/loader.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,13 @@ func (l *loader) evict(idleOnly bool) int {
162162
for r, slot := range l.runners {
163163
unused := l.references[slot] == 0
164164
idle := unused && now.Sub(l.timestamps[slot]) > runnerIdleTimeout
165-
if unused && (!idleOnly || idle) {
165+
defunct := false
166+
select {
167+
case <-l.slots[slot].done:
168+
defunct = true
169+
default:
170+
}
171+
if unused && (!idleOnly || idle || defunct) {
166172
l.log.Infof("Evicting %s backend runner with model %s in %s mode",
167173
r.backend, r.model, r.mode,
168174
)
@@ -372,9 +378,17 @@ func (l *loader) load(ctx context.Context, backendName, model string, mode infer
372378
// See if we can satisfy the request with an existing runner.
373379
existing, ok := l.runners[runnerKey{backendName, model, mode}]
374380
if ok {
375-
l.references[existing] += 1
376-
l.timestamps[existing] = time.Time{}
377-
return l.slots[existing], nil
381+
select {
382+
case <-l.slots[existing].done:
383+
l.log.Warnf("Will reload defunct %s runner for %s. Runner error: %s.", backendName, model,
384+
l.slots[existing].err)
385+
// Evict the defunct runner if it is not in use by anyone else.
386+
l.evictRunner(backendName, model)
387+
default:
388+
l.references[existing] += 1
389+
l.timestamps[existing] = time.Time{}
390+
return l.slots[existing], nil
391+
}
378392
}
379393

380394
// If there's not sufficient memory or all slots are full, then try

0 commit comments

Comments
 (0)