Skip to content
This repository was archived by the owner on Oct 6, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 28 additions & 21 deletions commands/compose.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ import (
"github.com/spf13/cobra"
)

type composeCommandFlags struct {
Models []string
CtxSize int64
RawRuntimeFlags string
Backend string
}

func newComposeCmd() *cobra.Command {

c := &cobra.Command{
Expand All @@ -26,15 +33,19 @@ func newComposeCmd() *cobra.Command {
return c
}

func setupComposeCommandFlags(c *cobra.Command, flags *composeCommandFlags) {
c.Flags().StringArrayVar(&flags.Models, "model", nil, "model to use")
c.Flags().Int64Var(&flags.CtxSize, "context-size", -1, "context size for the model")
c.Flags().StringVar(&flags.RawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
c.Flags().StringVar(&flags.Backend, "backend", llamacpp.Name, "inference backend to use")
}

func newUpCommand() *cobra.Command {
var models []string
var ctxSize int64
var rawRuntimeFlags string
var backend string
flags := &composeCommandFlags{}
c := &cobra.Command{
Use: "up",
RunE: func(cmd *cobra.Command, args []string) error {
if len(models) == 0 {
if len(flags.Models) == 0 {
err := errors.New("options.model is required")
_ = sendError(err.Error())
return err
Expand All @@ -52,26 +63,26 @@ func newUpCommand() *cobra.Command {
return errors.New("unable to determine standalone runner endpoint")
}

if err := downloadModelsOnlyIfNotFound(desktopClient, models); err != nil {
if err := downloadModelsOnlyIfNotFound(desktopClient, flags.Models); err != nil {
return err
}

if ctxSize > 0 {
sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
if flags.CtxSize > 0 {
sendInfo(fmt.Sprintf("Setting context size to %d", flags.CtxSize))
}
if rawRuntimeFlags != "" {
sendInfo("Setting raw runtime flags to " + rawRuntimeFlags)
if flags.RawRuntimeFlags != "" {
sendInfo("Setting raw runtime flags to " + flags.RawRuntimeFlags)
}

for _, model := range models {
for _, model := range flags.Models {
if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
Model: model,
ContextSize: ctxSize,
RawRuntimeFlags: rawRuntimeFlags,
ContextSize: flags.CtxSize,
RawRuntimeFlags: flags.RawRuntimeFlags,
}); err != nil {
configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s"
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, rawRuntimeFlags, err)
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, rawRuntimeFlags, err)
_ = sendErrorf(configErrFmtString+": %v", model, flags.CtxSize, flags.RawRuntimeFlags, err)
return fmt.Errorf(configErrFmtString+": %w", model, flags.CtxSize, flags.RawRuntimeFlags, err)
}
sendInfo("Successfully configured backend for model " + model)
}
Expand All @@ -91,23 +102,19 @@ func newUpCommand() *cobra.Command {
return nil
},
}
c.Flags().StringArrayVar(&models, "model", nil, "model to use")
c.Flags().Int64Var(&ctxSize, "context-size", -1, "context size for the model")
c.Flags().StringVar(&rawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
c.Flags().StringVar(&backend, "backend", llamacpp.Name, "inference backend to use")
setupComposeCommandFlags(c, flags)
return c
}

func newDownCommand() *cobra.Command {
var model []string
c := &cobra.Command{
Use: "down",
RunE: func(cmd *cobra.Command, args []string) error {
// No required cleanup on down
return nil
},
}
c.Flags().StringArrayVar(&model, "model", nil, "model to use")
setupComposeCommandFlags(c, &composeCommandFlags{})
return c
}

Expand Down
29 changes: 29 additions & 0 deletions docs/reference/docker_model_compose_down.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@ usage: docker model compose down
pname: docker model compose
plink: docker_model_compose.yaml
options:
- option: backend
value_type: string
default_value: llama.cpp
description: inference backend to use
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
- option: context-size
value_type: int64
default_value: "-1"
description: context size for the model
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
- option: model
value_type: stringArray
default_value: '[]'
Expand All @@ -13,6 +33,15 @@ options:
experimentalcli: false
kubernetes: false
swarm: false
- option: runtime-flags
value_type: string
description: raw runtime flags to pass to the inference engine
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
inherited_options:
- option: project-name
value_type: string
Expand Down