Allow unloading multiple models at once

Piotr Stankiewicz · Piotr · commit 423e1d6d1b72 · 2025-05-29T18:50:53.000+02:00
Once we enable running multiple models at once, it will be useful to be
able to unload multiple at a time. In preparation for that, make the
unload request accept multiple model tags, and evict those models.

Signed-off-by: Piotr Stankiewicz &lt;piotr.stankiewicz@docker.com&gt;
diff --git a/pkg/inference/scheduling/api.go b/pkg/inference/scheduling/api.go
@@ -64,9 +64,9 @@ type DiskUsage struct {
 
 // UnloadRequest is used to specify which models to unload.
 type UnloadRequest struct {
-	All     bool   `json:"all"`
-	Backend string `json:"backend"`
-	Model   string `json:"model"`
+	All     bool     `json:"all"`
+	Backend string   `json:"backend"`
+	Models  []string `json:"models"`
 }
 
 // UnloadResponse is used to return the number of unloaded runners (backend, model).
diff --git a/pkg/inference/scheduling/loader.go b/pkg/inference/scheduling/loader.go
@@ -209,7 +209,10 @@ func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int {
 		if unload.All {
 			return l.evict(false)
 		} else {
-			return l.evictRunner(unload.Backend, unload.Model)
+			for _, model := range unload.Models {
+				l.evictRunner(unload.Backend, model)
+			}
+			return len(l.runners)
 		}
 	}()
 }

Original file line number	Diff line number	Diff line change
`@@ -209,7 +209,10 @@ func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int {`
`209`	`209`	`if unload.All {`
`210`	`210`	`return l.evict(false)`
`211`	`211`	`} else {`
`212`		`- return l.evictRunner(unload.Backend, unload.Model)`
	`212`	`+ for _, model := range unload.Models {`
	`213`	`+ l.evictRunner(unload.Backend, model)`
	`214`	`+ }`
	`215`	`+ return len(l.runners)`
`213`	`216`	`}`
`214`	`217`	`}()`
`215`	`218`	`}`