diff --git a/commands/rm.go b/commands/rm.go index fb4faab3..95159fb2 100644 --- a/commands/rm.go +++ b/commands/rm.go @@ -29,7 +29,7 @@ func newRemoveCmd() *cobra.Command { } response, err := desktopClient.Remove(args, force) if response != "" { - cmd.Println(response) + cmd.Print(response) } if err != nil { err = handleClientError(err, "Failed to remove model") diff --git a/desktop/desktop.go b/desktop/desktop.go index 2b43c329..06d43ec7 100644 --- a/desktop/desktop.go +++ b/desktop/desktop.go @@ -13,7 +13,7 @@ import ( "time" "github.com/docker/go-units" - + "github.com/docker/model-distribution/distribution" "github.com/docker/model-runner/pkg/inference" dmrm "github.com/docker/model-runner/pkg/inference/models" "github.com/docker/model-runner/pkg/inference/scheduling" @@ -443,20 +443,34 @@ func (c *Client) Remove(models []string, force bool) (string, error) { } defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { + var bodyStr string + body, err := io.ReadAll(resp.Body) + if err != nil { + bodyStr = fmt.Sprintf("(failed to read response body: %v)", err) + } else { + bodyStr = string(body) + } + + if resp.StatusCode == http.StatusOK { + var deleteResponse distribution.DeleteModelResponse + if err := json.Unmarshal(body, &deleteResponse); err != nil { + modelRemoved += fmt.Sprintf("Model %s removed successfully, but failed to parse response: %v\n", model, err) + } else { + for _, msg := range deleteResponse { + if msg.Untagged != nil { + modelRemoved += fmt.Sprintf("Untagged: %s\n", *msg.Untagged) + } + if msg.Deleted != nil { + modelRemoved += fmt.Sprintf("Deleted: %s\n", *msg.Deleted) + } + } + } + } else { if resp.StatusCode == http.StatusNotFound { return modelRemoved, fmt.Errorf("no such model: %s", model) } - var bodyStr string - body, err := io.ReadAll(resp.Body) - if err != nil { - bodyStr = fmt.Sprintf("(failed to read response body: %v)", err) - } else { - bodyStr = string(body) - } return modelRemoved, fmt.Errorf("removing %s failed with status %s: %s", model, resp.Status, bodyStr) } - modelRemoved += fmt.Sprintf("Model %s removed successfully\n", model) } return modelRemoved, nil } diff --git a/go.mod b/go.mod index 83368b74..7fba2fa7 100644 --- a/go.mod +++ b/go.mod @@ -11,8 +11,8 @@ require ( github.com/docker/docker v28.2.2+incompatible github.com/docker/go-connections v0.5.0 github.com/docker/go-units v0.5.0 - github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0 - github.com/docker/model-runner v0.0.0-20250627142917-26a0a73fbbc0 + github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e + github.com/docker/model-runner v0.0.0-20250711130825-8907b3ddf82e github.com/google/go-containerregistry v0.20.6 github.com/mattn/go-isatty v0.0.17 github.com/nxadm/tail v1.4.8 diff --git a/go.sum b/go.sum index 8b6587c3..0d5c0dc2 100644 --- a/go.sum +++ b/go.sum @@ -78,10 +78,10 @@ github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHz github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= -github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0 h1:bve4JZI06Admw+NewtPfrpJXsvRnGKTQvBOEICNC1C0= -github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= -github.com/docker/model-runner v0.0.0-20250627142917-26a0a73fbbc0 h1:yajuhlGe1xhpWW3eMehQi2RrqiBQiGoi6c6OWiPxMaQ= -github.com/docker/model-runner v0.0.0-20250627142917-26a0a73fbbc0/go.mod h1:vZJiUZH/7O1CyNsEGi1o4khUT4DVRjcwluuamU9fhuM= +github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e h1:qBkjP4A20f3RXvtstitIPiStQ4p+bK8xcjosrXLBQZ0= +github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c= +github.com/docker/model-runner v0.0.0-20250711130825-8907b3ddf82e h1:oafd84kAFBgv/DAYgtXGLkC1KmRpDN+7G3be5+2+hA0= +github.com/docker/model-runner v0.0.0-20250711130825-8907b3ddf82e/go.mod h1:QmSoUNAbqolMY1Aq9DaC+sR/M/OPga0oCT/DBA1z9ow= github.com/dvsekhvalnov/jose2go v0.0.0-20170216131308-f21a8cedbbae/go.mod h1:7BvyPhdbLxMXIYTFPLsyJRFMsKmOZnQmzh6Gb+uquuM= github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5/go.mod h1:a2zkGnVExMxdzMo3M0Hi/3sEU+cWnZpSni0O6/Yb/P0= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= diff --git a/vendor/github.com/docker/model-distribution/builder/builder.go b/vendor/github.com/docker/model-distribution/builder/builder.go index e23d7f6b..871e462a 100644 --- a/vendor/github.com/docker/model-distribution/builder/builder.go +++ b/vendor/github.com/docker/model-distribution/builder/builder.go @@ -44,6 +44,17 @@ func (b *Builder) WithContextSize(size uint64) *Builder { } } +// WithMultimodalProjector adds a Multimodal projector file to the artifact +func (b *Builder) WithMultimodalProjector(path string) (*Builder, error) { + mmprojLayer, err := partial.NewLayer(path, types.MediaTypeMultimodalProjector) + if err != nil { + return nil, fmt.Errorf("mmproj layer from %q: %w", path, err) + } + return &Builder{ + model: mutate.AppendLayers(b.model, mmprojLayer), + }, nil +} + // Target represents a build target type Target interface { Write(context.Context, types.ModelArtifact, io.Writer) error diff --git a/vendor/github.com/docker/model-distribution/distribution/client.go b/vendor/github.com/docker/model-distribution/distribution/client.go index 1057944e..a601888e 100644 --- a/vendor/github.com/docker/model-distribution/distribution/client.go +++ b/vendor/github.com/docker/model-distribution/distribution/client.go @@ -242,45 +242,62 @@ func (c *Client) GetModel(reference string) (types.Model, error) { return model, nil } +type DeleteModelAction struct { + Untagged *string `json:"Untagged,omitempty"` + Deleted *string `json:"Deleted,omitempty"` +} + +type DeleteModelResponse []DeleteModelAction + // DeleteModel deletes a model -func (c *Client) DeleteModel(reference string, force bool) error { +func (c *Client) DeleteModel(reference string, force bool) (*DeleteModelResponse, error) { mdl, err := c.store.Read(reference) if err != nil { - return err + return &DeleteModelResponse{}, err } id, err := mdl.ID() if err != nil { - return fmt.Errorf("getting model ID: %w", err) + return &DeleteModelResponse{}, fmt.Errorf("getting model ID: %w", err) } isTag := id != reference + resp := DeleteModelResponse{} + if isTag { c.log.Infoln("Untagging model:", reference) - if err := c.store.RemoveTags([]string{reference}); err != nil { + tags, err := c.store.RemoveTags([]string{reference}) + if err != nil { c.log.Errorln("Failed to untag model:", err, "tag:", reference) - return fmt.Errorf("untagging model: %w", err) + return &DeleteModelResponse{}, fmt.Errorf("untagging model: %w", err) + } + for _, t := range tags { + resp = append(resp, DeleteModelAction{Untagged: &t}) + } + if len(mdl.Tags()) > 1 { + return &resp, nil } } - if len(mdl.Tags()) > 1 { - if isTag { - return nil // we are done after untagging - } else if !force { - // if the reference is not a tag and there are multiple tags, return an error unless forced - return fmt.Errorf( - "unable to delete %q (must be forced) due to multiple tag references: %w", - reference, ErrConflict, - ) - } + if len(mdl.Tags()) > 1 && !force { + // if the reference is not a tag and there are multiple tags, return an error unless forced + return &DeleteModelResponse{}, fmt.Errorf( + "unable to delete %q (must be forced) due to multiple tag references: %w", + reference, ErrConflict, + ) } c.log.Infoln("Deleting model:", id) - if err := c.store.Delete(id); err != nil { + deletedID, tags, err := c.store.Delete(id) + if err != nil { c.log.Errorln("Failed to delete model:", err, "tag:", reference) - return fmt.Errorf("deleting model: %w", err) + return &DeleteModelResponse{}, fmt.Errorf("deleting model: %w", err) } c.log.Infoln("Successfully deleted model:", reference) - return nil + for _, t := range tags { + resp = append(resp, DeleteModelAction{Untagged: &t}) + } + resp = append(resp, DeleteModelAction{Deleted: &deletedID}) + return &resp, nil } // Tag adds a tag to a model diff --git a/vendor/github.com/docker/model-distribution/internal/partial/partial.go b/vendor/github.com/docker/model-distribution/internal/partial/partial.go index e31814ca..3def14ae 100644 --- a/vendor/github.com/docker/model-distribution/internal/partial/partial.go +++ b/vendor/github.com/docker/model-distribution/internal/partial/partial.go @@ -7,7 +7,6 @@ import ( v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/partial" ggcr "github.com/google/go-containerregistry/pkg/v1/types" - "github.com/pkg/errors" "github.com/docker/model-distribution/types" ) @@ -67,22 +66,31 @@ type WithLayers interface { } func GGUFPath(i WithLayers) (string, error) { + return layerPathByMediaType(i, types.MediaTypeGGUF) +} + +func MMPROJPath(i WithLayers) (string, error) { + return layerPathByMediaType(i, types.MediaTypeMultimodalProjector) +} + +// layerPathByMediaType is a generic helper function that finds a layer by media type and returns its path +func layerPathByMediaType(i WithLayers, mediaType ggcr.MediaType) (string, error) { layers, err := i.Layers() if err != nil { return "", fmt.Errorf("get layers: %w", err) } for _, l := range layers { mt, err := l.MediaType() - if err != nil || mt != types.MediaTypeGGUF { + if err != nil || mt != mediaType { continue } - ggufLayer, ok := l.(*Layer) + layer, ok := l.(*Layer) if !ok { - return "", errors.New("gguf Layer is not available locally") + return "", fmt.Errorf("%s Layer is not available locally", mediaType) } - return ggufLayer.Path, nil + return layer.Path, nil } - return "", errors.New("model does not contain a GGUF layer") + return "", fmt.Errorf("model does not contain a %s layer", mediaType) } func ManifestForLayers(i WithLayers) (*v1.Manifest, error) { diff --git a/vendor/github.com/docker/model-distribution/internal/progress/reporter.go b/vendor/github.com/docker/model-distribution/internal/progress/reporter.go index be583bfd..d1a36385 100644 --- a/vendor/github.com/docker/model-distribution/internal/progress/reporter.go +++ b/vendor/github.com/docker/model-distribution/internal/progress/reporter.go @@ -24,21 +24,21 @@ type Layer struct { // Message represents a structured message for progress reporting type Message struct { - Type string `json:"type"` // "progress", "success", or "error" - Message string `json:"message"` // Human-readable message - Total uint64 `json:"total"` // Deprecated: use Layer.Size - Pulled uint64 `json:"pulled"` // Deprecated: use Layer.Current - Layer Layer `json:"layer,omitempty"` // Current layer information + Type string `json:"type"` // "progress", "success", or "error" + Message string `json:"message"` // Deprecated: the message should be defined by clients based on Message.Total and Message.Layer + Total uint64 `json:"total"` + Pulled uint64 `json:"pulled"` // Deprecated: use Layer.Current + Layer Layer `json:"layer"` // Current layer information } type Reporter struct { - progress chan v1.Update - done chan struct{} - err error - out io.Writer - format progressF - layer v1.Layer - TotalLayers int // Total number of layers + progress chan v1.Update + done chan struct{} + err error + out io.Writer + format progressF + layer v1.Layer + imageSize uint64 } type progressF func(update v1.Update) string @@ -51,13 +51,14 @@ func PushMsg(update v1.Update) string { return fmt.Sprintf("Uploaded: %.2f MB", float64(update.Complete)/1024/1024) } -func NewProgressReporter(w io.Writer, msgF progressF, layer v1.Layer) *Reporter { +func NewProgressReporter(w io.Writer, msgF progressF, imageSize int64, layer v1.Layer) *Reporter { return &Reporter{ - out: w, - progress: make(chan v1.Update, 1), - done: make(chan struct{}), - format: msgF, - layer: layer, + out: w, + progress: make(chan v1.Update, 1), + done: make(chan struct{}), + format: msgF, + layer: layer, + imageSize: safeUint64(imageSize), } } @@ -81,7 +82,7 @@ func (r *Reporter) Updates() chan<- v1.Update { continue // If we fail to write progress, don't try again } now := time.Now() - var total int64 + var layerSize uint64 var layerID string if r.layer != nil { // In case of Push there is no layer yet id, err := r.layer.DiffID() @@ -95,16 +96,17 @@ func (r *Reporter) Updates() chan<- v1.Update { r.err = err continue } - total = size + layerSize = safeUint64(size) } else { - total = p.Total + layerSize = safeUint64(p.Total) } incrementalBytes := p.Complete - lastComplete // Only update if enough time has passed or enough bytes downloaded or finished if now.Sub(lastUpdate) >= UpdateInterval || - incrementalBytes >= MinBytesForUpdate { - if err := WriteProgress(r.out, r.format(p), safeUint64(total), safeUint64(p.Complete), layerID); err != nil { + incrementalBytes >= MinBytesForUpdate || + safeUint64(p.Complete) == layerSize { + if err := WriteProgress(r.out, r.format(p), r.imageSize, layerSize, safeUint64(p.Complete), layerID); err != nil { r.err = err } lastUpdate = now @@ -123,15 +125,15 @@ func (r *Reporter) Wait() error { } // WriteProgress writes a progress update message -func WriteProgress(w io.Writer, msg string, total, current uint64, layerID string) error { +func WriteProgress(w io.Writer, msg string, imageSize, layerSize, current uint64, layerID string) error { return write(w, Message{ Type: "progress", Message: msg, - Total: total, + Total: imageSize, Pulled: current, Layer: Layer{ ID: layerID, - Size: total, + Size: layerSize, Current: current, }, }) diff --git a/vendor/github.com/docker/model-distribution/internal/store/index.go b/vendor/github.com/docker/model-distribution/internal/store/index.go index 641a4d5d..6cff7c98 100644 --- a/vendor/github.com/docker/model-distribution/internal/store/index.go +++ b/vendor/github.com/docker/model-distribution/internal/store/index.go @@ -39,10 +39,10 @@ func (i Index) Tag(reference string, tag string) (Index, error) { return result, nil } -func (i Index) UnTag(tag string) Index { +func (i Index) UnTag(tag string) (name.Tag, Index, error) { tagRef, err := name.NewTag(tag) if err != nil { - return Index{} + return name.Tag{}, Index{}, err } result := Index{ @@ -52,7 +52,7 @@ func (i Index) UnTag(tag string) Index { result.Models = append(result.Models, entry.UnTag(tagRef)) } - return result + return tagRef, result, nil } func (i Index) Find(reference string) (IndexEntry, int, bool) { diff --git a/vendor/github.com/docker/model-distribution/internal/store/model.go b/vendor/github.com/docker/model-distribution/internal/store/model.go index 85eba02f..e48a386a 100644 --- a/vendor/github.com/docker/model-distribution/internal/store/model.go +++ b/vendor/github.com/docker/model-distribution/internal/store/model.go @@ -114,6 +114,10 @@ func (m *Model) GGUFPath() (string, error) { return mdpartial.GGUFPath(m) } +func (m *Model) MMPROJPath() (string, error) { + return mdpartial.MMPROJPath(m) +} + func (m *Model) Tags() []string { return m.tags } diff --git a/vendor/github.com/docker/model-distribution/internal/store/store.go b/vendor/github.com/docker/model-distribution/internal/store/store.go index 3ba8ec53..233608b4 100644 --- a/vendor/github.com/docker/model-distribution/internal/store/store.go +++ b/vendor/github.com/docker/model-distribution/internal/store/store.go @@ -7,7 +7,6 @@ import ( "path/filepath" "github.com/docker/model-distribution/internal/progress" - v1 "github.com/google/go-containerregistry/pkg/v1" ) @@ -93,14 +92,14 @@ func (s *LocalStore) List() ([]IndexEntry, error) { } // Delete deletes a model by reference -func (s *LocalStore) Delete(ref string) error { +func (s *LocalStore) Delete(ref string) (string, []string, error) { idx, err := s.readIndex() if err != nil { - return fmt.Errorf("reading models file: %w", err) + return "", nil, fmt.Errorf("reading models file: %w", err) } model, _, ok := idx.Find(ref) if !ok { - return ErrModelNotFound + return "", nil, ErrModelNotFound } // Remove manifest file @@ -140,7 +139,7 @@ func (s *LocalStore) Delete(ref string) error { idx = idx.Remove(model.ID) - return s.writeIndex(idx) + return model.ID, model.Tags, s.writeIndex(idx) } // AddTags adds tags to an existing model @@ -160,15 +159,25 @@ func (s *LocalStore) AddTags(ref string, newTags []string) error { } // RemoveTags removes tags from models -func (s *LocalStore) RemoveTags(tags []string) error { +func (s *LocalStore) RemoveTags(tags []string) ([]string, error) { index, err := s.readIndex() if err != nil { - return fmt.Errorf("reading modelss index: %w", err) + return nil, fmt.Errorf("reading modelss index: %w", err) } + var tagRefs []string for _, tag := range tags { - index = index.UnTag(tag) + tagRef, newIndex, err := index.UnTag(tag) + if err != nil { + // Try to save progress before returning error. + if writeIndexErr := s.writeIndex(newIndex); writeIndexErr != nil { + return tagRefs, fmt.Errorf("untagging model: %w, also failed to save: %w", err, writeIndexErr) + } + return tagRefs, fmt.Errorf("untagging model: %w", err) + } + tagRefs = append(tagRefs, tagRef.Name()) + index = newIndex } - return s.writeIndex(index) + return tagRefs, s.writeIndex(index) } // Version returns the store version @@ -188,17 +197,25 @@ func (s *LocalStore) Write(mdl v1.Image, tags []string, w io.Writer) error { return fmt.Errorf("writing config file: %w", err) } - // Write the blobs layers, err := mdl.Layers() if err != nil { return fmt.Errorf("getting layers: %w", err) } + imageSize := int64(0) + for _, layer := range layers { + size, err := layer.Size() + if err != nil { + return fmt.Errorf("getting layer size: %w", err) + } + imageSize += size + } + for _, layer := range layers { var pr *progress.Reporter var progressChan chan<- v1.Update if w != nil { - pr = progress.NewProgressReporter(w, progress.PullMsg, layer) + pr = progress.NewProgressReporter(w, progress.PullMsg, imageSize, layer) progressChan = pr.Updates() } diff --git a/vendor/github.com/docker/model-distribution/registry/client.go b/vendor/github.com/docker/model-distribution/registry/client.go index 6d2d59f8..b0a1fd2a 100644 --- a/vendor/github.com/docker/model-distribution/registry/client.go +++ b/vendor/github.com/docker/model-distribution/registry/client.go @@ -132,7 +132,20 @@ func (c *Client) NewTarget(tag string) (*Target, error) { } func (t *Target) Write(ctx context.Context, model types.ModelArtifact, progressWriter io.Writer) error { - pr := progress.NewProgressReporter(progressWriter, progress.PushMsg, nil) + layers, err := model.Layers() + if err != nil { + return fmt.Errorf("getting layers: %w", err) + } + + imageSize := int64(0) + for _, layer := range layers { + size, err := layer.Size() + if err != nil { + return fmt.Errorf("getting layer size: %w", err) + } + imageSize += size + } + pr := progress.NewProgressReporter(progressWriter, progress.PushMsg, imageSize, nil) defer pr.Wait() // Set up authentication options diff --git a/vendor/github.com/docker/model-distribution/types/config.go b/vendor/github.com/docker/model-distribution/types/config.go index dbaf2ea1..8211dd2a 100644 --- a/vendor/github.com/docker/model-distribution/types/config.go +++ b/vendor/github.com/docker/model-distribution/types/config.go @@ -1,7 +1,6 @@ package types import ( - "strings" "time" v1 "github.com/google/go-containerregistry/pkg/v1" @@ -21,13 +20,12 @@ const ( // MediaTypeLicense indicates a plain text file containing a license MediaTypeLicense = types.MediaType("application/vnd.docker.ai.license") + // MediaTypeMultimodalProjector indicates a Multimodal projector file + MediaTypeMultimodalProjector = types.MediaType("application/vnd.docker.ai.mmproj") + FormatGGUF = Format("gguf") ) -func IsModelConfig(mt types.MediaType) bool { - return strings.HasPrefix(string(mt), string(MediaTypeModelConfigV01)) -} - type Format string type ConfigFile struct { diff --git a/vendor/github.com/docker/model-distribution/types/model.go b/vendor/github.com/docker/model-distribution/types/model.go index 575a7d0e..7a592ba4 100644 --- a/vendor/github.com/docker/model-distribution/types/model.go +++ b/vendor/github.com/docker/model-distribution/types/model.go @@ -7,6 +7,7 @@ import ( type Model interface { ID() (string, error) GGUFPath() (string, error) + MMPROJPath() (string, error) Config() (Config, error) Tags() []string Descriptor() (Descriptor, error) diff --git a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go index 00fa57b8..f745320d 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp.go @@ -11,7 +11,6 @@ import ( "os/exec" "path/filepath" "runtime" - "strconv" "strings" "github.com/docker/model-runner/pkg/diskusage" @@ -122,10 +121,9 @@ func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error { // Run implements inference.Backend.Run. func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference.BackendMode, config *inference.BackendConfiguration) error { - modelPath, err := l.modelManager.GetModelPath(model) - l.log.Infof("Model path: %s", modelPath) + mdl, err := l.modelManager.GetModel(model) if err != nil { - return fmt.Errorf("failed to get model path: %w", err) + return fmt.Errorf("failed to get model: %w", err) } if err := os.RemoveAll(socket); err != nil && !errors.Is(err, fs.ErrNotExist) { @@ -138,13 +136,9 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference binPath = l.updatedServerStoragePath } - args := l.config.GetArgs(modelPath, socket, mode) - - if config != nil { - if config.ContextSize >= 0 { - args = append(args, "--ctx-size", strconv.Itoa(int(config.ContextSize))) - } - args = append(args, config.RuntimeFlags...) + args, err := l.config.GetArgs(mdl, socket, mode, config) + if err != nil { + return fmt.Errorf("failed to get args for llama.cpp: %w", err) } l.log.Infof("llamaCppArgs: %v", args) diff --git a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go index c7990ce9..5c8822d3 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/backends/llamacpp/llamacpp_config.go @@ -1,9 +1,11 @@ package llamacpp import ( + "fmt" "runtime" "strconv" + "github.com/docker/model-distribution/types" "github.com/docker/model-runner/pkg/inference" ) @@ -33,10 +35,20 @@ func NewDefaultLlamaCppConfig() *Config { } // GetArgs implements BackendConfig.GetArgs. -func (c *Config) GetArgs(modelPath, socket string, mode inference.BackendMode) []string { +func (c *Config) GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) { // Start with the arguments from LlamaCppConfig args := append([]string{}, c.Args...) + modelPath, err := model.GGUFPath() + if err != nil { + return nil, fmt.Errorf("get gguf path: %w", err) + } + + modelCfg, err := model.Config() + if err != nil { + return nil, fmt.Errorf("get model config: %w", err) + } + // Add model and socket arguments args = append(args, "--model", modelPath, "--host", socket) @@ -45,7 +57,26 @@ func (c *Config) GetArgs(modelPath, socket string, mode inference.BackendMode) [ args = append(args, "--embeddings") } - return args + // Add arguments from model config + if modelCfg.ContextSize != nil { + args = append(args, "--ctx-size", strconv.FormatUint(*modelCfg.ContextSize, 10)) + } + + // Add arguments from backend config + if config != nil { + if config.ContextSize > 0 && !containsArg(args, "--ctx-size") { + args = append(args, "--ctx-size", strconv.FormatInt(config.ContextSize, 10)) + } + args = append(args, config.RuntimeFlags...) + } + + // Add arguments for Multimodal projector + path, err := model.MMPROJPath() + if path != "" && err == nil { + args = append(args, "--mmproj", path) + } + + return args, nil } // containsArg checks if the given argument is already in the args slice. diff --git a/vendor/github.com/docker/model-runner/pkg/inference/config/config.go b/vendor/github.com/docker/model-runner/pkg/inference/config/config.go index 6d05d1af..8163759d 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/config/config.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/config/config.go @@ -1,6 +1,7 @@ package config import ( + "github.com/docker/model-distribution/types" "github.com/docker/model-runner/pkg/inference" ) @@ -11,5 +12,5 @@ type BackendConfig interface { // GetArgs returns the command-line arguments for the backend. // It takes the model path, socket, and mode as input and returns // the appropriate arguments for the backend. - GetArgs(modelPath, socket string, mode inference.BackendMode) []string + GetArgs(model types.Model, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) } diff --git a/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go b/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go index c87efa83..1c083abc 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/models/manager.go @@ -333,7 +333,8 @@ func (m *Manager) handleDeleteModel(w http.ResponseWriter, r *http.Request) { } } - if err := m.distributionClient.DeleteModel(r.PathValue("name"), force); err != nil { + resp, err := m.distributionClient.DeleteModel(r.PathValue("name"), force) + if err != nil { if errors.Is(err, distribution.ErrModelNotFound) { http.Error(w, err.Error(), http.StatusNotFound) return @@ -346,6 +347,11 @@ func (m *Manager) handleDeleteModel(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusInternalServerError) return } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(resp); err != nil { + http.Error(w, fmt.Sprintf("error writing response: %v", err), http.StatusInternalServerError) + } } // handleOpenAIGetModels handles GET //v1/models and diff --git a/vendor/github.com/docker/model-runner/pkg/inference/scheduling/scheduler.go b/vendor/github.com/docker/model-runner/pkg/inference/scheduling/scheduler.go index 6ee978b1..10b7e6e6 100644 --- a/vendor/github.com/docker/model-runner/pkg/inference/scheduling/scheduler.go +++ b/vendor/github.com/docker/model-runner/pkg/inference/scheduling/scheduler.go @@ -235,7 +235,7 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request return } // Non-blocking call to track the model usage. - s.tracker.TrackModel(model) + s.tracker.TrackModel(model, r.UserAgent()) } // Request a runner to execute the request and defer its release. @@ -410,6 +410,11 @@ func (s *Scheduler) Configure(w http.ResponseWriter, r *http.Request) { runnerConfig.ContextSize = configureRequest.ContextSize runnerConfig.RuntimeFlags = runtimeFlags + if model, err := s.modelManager.GetModel(configureRequest.Model); err == nil { + // Configure is called by compose for each model. + s.tracker.TrackModel(model, r.UserAgent()) + } + if err := s.loader.setRunnerConfig(r.Context(), backend.Name(), configureRequest.Model, inference.BackendModeCompletion, runnerConfig); err != nil { s.log.Warnf("Failed to configure %s runner for %s: %s", backend.Name(), configureRequest.Model, err) if errors.Is(err, errRunnerAlreadyActive) { diff --git a/vendor/github.com/docker/model-runner/pkg/metrics/metrics.go b/vendor/github.com/docker/model-runner/pkg/metrics/metrics.go index 11521985..5b58f6f8 100644 --- a/vendor/github.com/docker/model-runner/pkg/metrics/metrics.go +++ b/vendor/github.com/docker/model-runner/pkg/metrics/metrics.go @@ -60,32 +60,38 @@ func NewTracker(httpClient *http.Client, log logging.Logger, userAgent string, d } } -func (t *Tracker) TrackModel(model types.Model) { +func (t *Tracker) TrackModel(model types.Model, userAgent string) { if t.doNotTrack { return } - go t.trackModel(model) + go t.trackModel(model, userAgent) } -func (t *Tracker) trackModel(model types.Model) { +func (t *Tracker) trackModel(model types.Model, userAgent string) { tags := model.Tags() t.log.Debugln("Tracking model:", tags) if len(tags) == 0 { return } - ref, err := name.ParseReference(tags[0]) - if err != nil { - t.log.Errorf("Error parsing reference: %v\n", err) - return + ua := t.userAgent + if userAgent != "" { + ua += " " + userAgent } - if _, err = remote.Head(ref, - remote.WithAuthFromKeychain(authn.DefaultKeychain), - remote.WithTransport(t.transport), - remote.WithUserAgent(t.userAgent), - ); err != nil { - t.log.Debugf("Manifest does not exist or error occurred: %v\n", err) - return + for _, tag := range tags { + ref, err := name.ParseReference(tag) + if err != nil { + t.log.Errorf("Error parsing reference: %v\n", err) + return + } + if _, err = remote.Head(ref, + remote.WithAuthFromKeychain(authn.DefaultKeychain), + remote.WithTransport(t.transport), + remote.WithUserAgent(ua), + ); err != nil { + t.log.Debugf("Manifest does not exist or error occurred: %v\n", err) + continue + } + t.log.Debugln("Tracked", ref.Name(), ref.Identifier(), "with user agent:", ua) } - t.log.Debugln("Tracked", ref.Name(), ref.Identifier()) } diff --git a/vendor/modules.txt b/vendor/modules.txt index a7df29ea..c01f238c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -144,7 +144,7 @@ github.com/docker/go-connections/tlsconfig # github.com/docker/go-units v0.5.0 ## explicit github.com/docker/go-units -# github.com/docker/model-distribution v0.0.0-20250627163720-aff34abcf3e0 +# github.com/docker/model-distribution v0.0.0-20250710123110-a633223e127e ## explicit; go 1.23.0 github.com/docker/model-distribution/builder github.com/docker/model-distribution/distribution @@ -155,7 +155,7 @@ github.com/docker/model-distribution/internal/progress github.com/docker/model-distribution/internal/store github.com/docker/model-distribution/registry github.com/docker/model-distribution/types -# github.com/docker/model-runner v0.0.0-20250627142917-26a0a73fbbc0 +# github.com/docker/model-runner v0.0.0-20250711130825-8907b3ddf82e ## explicit; go 1.23.7 github.com/docker/model-runner/pkg/diskusage github.com/docker/model-runner/pkg/environment