Skip to content
This repository was archived by the owner on Oct 6, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
29 changes: 29 additions & 0 deletions commands/compose.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"strings"

"github.com/docker/model-cli/desktop"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
"github.com/docker/model-runner/pkg/inference/scheduling"
"github.com/spf13/cobra"
)

Expand All @@ -26,6 +28,9 @@ func newComposeCmd() *cobra.Command {

func newUpCommand() *cobra.Command {
var models []string
var ctxSize int64
var rawRuntimeFlags string
var backend string
c := &cobra.Command{
Use: "up",
RunE: func(cmd *cobra.Command, args []string) error {
Expand All @@ -35,6 +40,14 @@ func newUpCommand() *cobra.Command {
return err
}

sendInfo("Initializing model runner...")
if ctxSize != 4096 {
sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
}
if rawRuntimeFlags != "" {
sendInfo("Setting raw runtime flags to " + rawRuntimeFlags)
}

kind := modelRunner.EngineKind()
standalone, err := ensureStandaloneRunnerAvailable(cmd.Context(), nil)
if err != nil {
Expand All @@ -50,6 +63,19 @@ func newUpCommand() *cobra.Command {
return err
}

for _, model := range models {
if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
Model: model,
ContextSize: ctxSize,
RawRuntimeFlags: rawRuntimeFlags,
}); err != nil {
configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s"
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, rawRuntimeFlags, err)
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, rawRuntimeFlags, err)
}
sendInfo("Successfully configured backend for model " + model)
}

switch kind {
case desktop.ModelRunnerEngineKindDesktop:
_ = setenv("URL", "http://model-runner.docker.internal/engines/v1/")
Expand All @@ -66,6 +92,9 @@ func newUpCommand() *cobra.Command {
},
}
c.Flags().StringArrayVar(&models, "model", nil, "model to use")
c.Flags().Int64Var(&ctxSize, "context-size", -1, "context size for the model")
c.Flags().StringVar(&rawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
c.Flags().StringVar(&backend, "backend", llamacpp.Name, "inference backend to use")
return c
}

Expand Down
25 changes: 25 additions & 0 deletions desktop/desktop.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/models"
"github.com/docker/model-runner/pkg/inference/scheduling"
"github.com/pkg/errors"
"go.opentelemetry.io/otel"
)
Expand Down Expand Up @@ -542,6 +543,30 @@ func (c *Client) Unload(req UnloadRequest) (UnloadResponse, error) {
return unloadResp, nil
}

func (c *Client) ConfigureBackend(request scheduling.ConfigureRequest) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks fine, just change paths and status if we decide to apply comments in docker/model-runner#76.

configureBackendPath := inference.InferencePrefix + "/_configure"
jsonData, err := json.Marshal(request)
if err != nil {
return fmt.Errorf("error marshaling request: %w", err)
}

resp, err := c.doRequest(http.MethodPost, configureBackendPath, bytes.NewReader(jsonData))
if err != nil {
return c.handleQueryError(err, configureBackendPath)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusAccepted {
body, _ := io.ReadAll(resp.Body)
if resp.StatusCode == http.StatusConflict {
return fmt.Errorf("%s", body)
}
return fmt.Errorf("%s (%s)", body, resp.Status)
}

return nil
}

// doRequest is a helper function that performs HTTP requests and handles 503 responses
func (c *Client) doRequest(method, path string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequest(method, c.modelRunner.URL(path), body)
Expand Down
29 changes: 29 additions & 0 deletions docs/reference/docker_model_compose_up.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@ usage: docker model compose up
pname: docker model compose
plink: docker_model_compose.yaml
options:
- option: backend
value_type: string
default_value: llama.cpp
description: inference backend to use
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
- option: context-size
value_type: int64
default_value: "-1"
description: context size for the model
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
- option: model
value_type: stringArray
default_value: '[]'
Expand All @@ -13,6 +33,15 @@ options:
experimentalcli: false
kubernetes: false
swarm: false
- option: runtime-flags
value_type: string
description: raw runtime flags to pass to the inference engine
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
inherited_options:
- option: project-name
value_type: string
Expand Down
13 changes: 12 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ require (
github.com/docker/go-connections v0.5.0
github.com/docker/go-units v0.5.0
github.com/docker/model-distribution v0.0.0-20250512190053-b3792c042d57
github.com/docker/model-runner v0.0.0-20250512190413-96af7b750f88
github.com/docker/model-runner v0.0.0-20250613083629-6b8c3b816f00
github.com/google/go-containerregistry v0.20.3
github.com/nxadm/tail v1.4.8
github.com/olekukonko/tablewriter v0.0.5
Expand All @@ -23,15 +23,20 @@ require (
)

require (
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/BurntSushi/toml v1.4.0 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d // indirect
github.com/StackExchange/wmi v1.2.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bugsnag/panicwrap v1.3.4 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/containerd/v2 v2.0.4 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/containerd/platforms v1.0.0-rc.1 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
github.com/creack/pty v1.1.24 // indirect
Expand All @@ -46,6 +51,7 @@ require (
github.com/fvbommel/sortorder v1.1.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-sql-driver/mysql v1.6.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/uuid v1.6.0 // indirect
Expand All @@ -54,15 +60,19 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jaypipes/ghw v0.16.0 // indirect
github.com/jaypipes/pcidb v1.0.1 // indirect
github.com/jinzhu/gorm v1.9.16 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/mattn/go-shellwords v1.0.12 // indirect
github.com/miekg/pkcs11 v1.1.1 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.1-0.20220423185008-bf980b35cac4 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/moby/sys/sequential v0.6.0 // indirect
github.com/moby/term v0.5.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand Down Expand Up @@ -110,4 +120,5 @@ require (
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gotest.tools/v3 v3.5.2 // indirect
howett.net/plist v1.0.0 // indirect
)
Loading