diff --git a/go.mod b/go.mod index 2d93f6f36..09aeeb041 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,10 @@ go 1.23.7 require ( github.com/containerd/containerd/v2 v2.0.4 github.com/containerd/platforms v1.0.0-rc.1 - github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e + github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c + github.com/elastic/go-sysinfo v1.15.3 github.com/google/go-containerregistry v0.20.3 + github.com/gpustack/gguf-parser-go v0.14.1 github.com/jaypipes/ghw v0.16.0 github.com/mattn/go-shellwords v1.0.12 github.com/opencontainers/go-digest v1.0.0 @@ -29,13 +31,11 @@ require ( github.com/docker/cli v27.5.0+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect github.com/docker/docker-credential-helpers v0.8.2 // indirect - github.com/elastic/go-sysinfo v1.15.3 // indirect github.com/elastic/go-windows v1.0.2 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect - github.com/gpustack/gguf-parser-go v0.14.1 // indirect github.com/henvic/httpretty v0.1.4 // indirect github.com/jaypipes/pcidb v1.0.1 // indirect github.com/json-iterator/go v1.1.12 // indirect diff --git a/go.sum b/go.sum index d008aff19..68f041d60 100644 --- a/go.sum +++ b/go.sum @@ -38,8 +38,8 @@ github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBi github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZSeVMrFgOr3T+zrFAo= github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M= -github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e h1:qBkjP4A20f3RXvtstitIPiStQ4p+bK8xcjosrXLBQZ0= -github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= +github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c h1:w9MekYamXmWLe9ZWXWgNXJ7BLDDemXwB8WcF7wzHF5Q= +github.com/docker/model-distribution v0.0.0-20250724114133-a11d745e582c/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= github.com/elastic/go-sysinfo v1.15.3 h1:W+RnmhKFkqPTCRoFq2VCTmsT4p/fwpo+3gKNQsn1XU0= github.com/elastic/go-sysinfo v1.15.3/go.mod h1:K/cNrqYTDrSoMh2oDkYEMS2+a72GRxMvNP+GC+vRIlo= github.com/elastic/go-windows v1.0.2 h1:yoLLsAsV5cfg9FLhZ9EXZ2n2sQFKeDYrHenkcivY4vI= diff --git a/pkg/inference/models/manager.go b/pkg/inference/models/manager.go index 34d01247f..b0cf68bf4 100644 --- a/pkg/inference/models/manager.go +++ b/pkg/inference/models/manager.go @@ -120,6 +120,7 @@ func (m *Manager) RebuildRoutes(allowedOrigins []string) { func (m *Manager) routeHandlers(allowedOrigins []string) map[string]http.HandlerFunc { handlers := map[string]http.HandlerFunc{ "POST " + inference.ModelsPrefix + "/create": m.handleCreateModel, + "POST " + inference.ModelsPrefix + "/load": m.handleLoadModel, "GET " + inference.ModelsPrefix: m.handleGetModels, "GET " + inference.ModelsPrefix + "/{name...}": m.handleGetModel, "DELETE " + inference.ModelsPrefix + "/{name...}": m.handleDeleteModel, @@ -187,6 +188,20 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) { } } +// handleLoadModel handles POST /models/load requests. +func (m *Manager) handleLoadModel(w http.ResponseWriter, r *http.Request) { + if m.distributionClient == nil { + http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable) + return + } + + if _, err := m.distributionClient.LoadModel(r.Body, w); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + return +} + // handleGetModels handles GET /models requests. func (m *Manager) handleGetModels(w http.ResponseWriter, r *http.Request) { if m.distributionClient == nil {