Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ go 1.23.7
require (
github.com/containerd/containerd/v2 v2.0.4
github.com/containerd/platforms v1.0.0-rc.1
github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e
github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c
github.com/elastic/go-sysinfo v1.15.3
github.com/google/go-containerregistry v0.20.3
github.com/gpustack/gguf-parser-go v0.14.1
github.com/jaypipes/ghw v0.16.0
github.com/mattn/go-shellwords v1.0.12
github.com/opencontainers/go-digest v1.0.0
Expand All @@ -29,13 +31,11 @@ require (
github.com/docker/cli v27.5.0+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker-credential-helpers v0.8.2 // indirect
github.com/elastic/go-sysinfo v1.15.3 // indirect
github.com/elastic/go-windows v1.0.2 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/gpustack/gguf-parser-go v0.14.1 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/jaypipes/pcidb v1.0.1 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBi
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZSeVMrFgOr3T+zrFAo=
github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e h1:qBkjP4A20f3RXvtstitIPiStQ4p+bK8xcjosrXLBQZ0=
github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c h1:w9MekYamXmWLe9ZWXWgNXJ7BLDDemXwB8WcF7wzHF5Q=
github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
github.com/elastic/go-sysinfo v1.15.3 h1:W+RnmhKFkqPTCRoFq2VCTmsT4p/fwpo+3gKNQsn1XU0=
github.com/elastic/go-sysinfo v1.15.3/go.mod h1:K/cNrqYTDrSoMh2oDkYEMS2+a72GRxMvNP+GC+vRIlo=
github.com/elastic/go-windows v1.0.2 h1:yoLLsAsV5cfg9FLhZ9EXZ2n2sQFKeDYrHenkcivY4vI=
Expand Down
15 changes: 15 additions & 0 deletions pkg/inference/models/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ func (m *Manager) RebuildRoutes(allowedOrigins []string) {
func (m *Manager) routeHandlers(allowedOrigins []string) map[string]http.HandlerFunc {
handlers := map[string]http.HandlerFunc{
"POST " + inference.ModelsPrefix + "/create": m.handleCreateModel,
"POST " + inference.ModelsPrefix + "/load": m.handleLoadModel,
"GET " + inference.ModelsPrefix: m.handleGetModels,
"GET " + inference.ModelsPrefix + "/{name...}": m.handleGetModel,
"DELETE " + inference.ModelsPrefix + "/{name...}": m.handleDeleteModel,
Expand Down Expand Up @@ -187,6 +188,20 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
}
}

// handleLoadModel handles POST <inference-prefix>/models/load requests.
func (m *Manager) handleLoadModel(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
return
}

if _, err := m.distributionClient.LoadModel(r.Body, w); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
return
}

// handleGetModels handles GET <inference-prefix>/models requests.
func (m *Manager) handleGetModels(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
Expand Down