docker
diff --git a/‎cmd/cli/commands/compose.go‎
Lines changed: 8 additions & 12 deletions b/‎cmd/cli/commands/compose.go‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎cmd/cli/commands/configure.go‎
Lines changed: 35 additions & 19 deletions b/‎cmd/cli/commands/configure.go‎
Lines changed: 35 additions & 19 deletions
diff --git a/‎cmd/cli/commands/configure_test.go‎
Lines changed: 165 additions & 0 deletions b/‎cmd/cli/commands/configure_test.go‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎cmd/cli/docs/reference/docker_model_compose_up.yaml‎
Lines changed: 0 additions & 9 deletions b/‎cmd/cli/docs/reference/docker_model_compose_up.yaml‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎cmd/cli/docs/reference/docker_model_configure.yaml‎
Lines changed: 20 additions & 1 deletion b/‎cmd/cli/docs/reference/docker_model_configure.yaml‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎go.mod‎
Lines changed: 0 additions & 1 deletion b/‎go.mod‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎go.sum‎
Lines changed: 0 additions & 2 deletions b/‎go.sum‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎go.work.sum‎
Lines changed: 2 additions & 3 deletions b/‎go.work.sum‎
Lines changed: 2 additions & 3 deletions
@@ -37,7 +37,6 @@ func newComposeCmd() *cobra.Command {
 func newUpCommand() *cobra.Command {
 	var models []string
 	var ctxSize int64
-	var rawRuntimeFlags string
 	var backend string
 	var draftModel string
 	var numTokens int
@@ -70,9 +69,6 @@ func newUpCommand() *cobra.Command {
 			if ctxSize > 0 {
 				sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
 			}
-			if rawRuntimeFlags != "" {
-				sendInfo("Setting raw runtime flags to " + rawRuntimeFlags)
-			}
 
 			// Build speculative config if any speculative flags are set
 			var speculativeConfig *inference.SpeculativeDecodingConfig
@@ -87,14 +83,15 @@ func newUpCommand() *cobra.Command {
 
 			for _, model := range models {
 				if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
-					Model:           model,
-					ContextSize:     ctxSize,
-					RawRuntimeFlags: rawRuntimeFlags,
-					Speculative:     speculativeConfig,
+					Model: model,
+					BackendConfiguration: inference.BackendConfiguration{
+						ContextSize: ctxSize,
+						Speculative: speculativeConfig,
+					},
 				}); err != nil {
-					configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s"
-					_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, rawRuntimeFlags, err)
-					return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, rawRuntimeFlags, err)
+					configErrFmtString := "failed to configure backend for model %s with context-size %d"
+					_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err)
+					return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, err)
 				}
 				sendInfo("Successfully configured backend for model " + model)
 			}
@@ -116,7 +113,6 @@ func newUpCommand() *cobra.Command {
 	}
 	c.Flags().StringArrayVar(&models, "model", nil, "model to use")
 	c.Flags().Int64Var(&ctxSize, "context-size", -1, "context size for the model")
-	c.Flags().StringVar(&rawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
 	c.Flags().StringVar(&backend, "backend", llamacpp.Name, "inference backend to use")
 	c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
 	c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")
 
@@ -1,6 +1,7 @@
 package commands
 
 import (
+	"encoding/json"
 	"fmt"
 
 	"github.com/docker/model-runner/cmd/cli/commands/completion"
@@ -15,32 +16,21 @@ func newConfigureCmd() *cobra.Command {
 	var draftModel string
 	var numTokens int
 	var minAcceptanceRate float64
+	var hfOverrides string
+	var reasoningBudget int64
 
 	c := &cobra.Command{
-		Use:    "configure [--context-size=<n>] [--speculative-draft-model=<model>] MODEL [-- <runtime-flags...>]",
+		Use:    "configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--reasoning-budget=<n>] MODEL",
 		Short:  "Configure runtime options for a model",
 		Hidden: true,
 		Args: func(cmd *cobra.Command, args []string) error {
-			argsBeforeDash := cmd.ArgsLenAtDash()
-			if argsBeforeDash == -1 {
-				// No "--" used, so we need exactly 1 total argument.
-				if len(args) != 1 {
-					return fmt.Errorf(
-						"Exactly one model must be specified, got %d: %v\n\n"+
-							"See 'docker model configure --help' for more information",
-						len(args), args)
-				}
-			} else {
-				// Has "--", so we need exactly 1 argument before it.
-				if argsBeforeDash != 1 {
-					return fmt.Errorf(
-						"Exactly one model must be specified before --, got %d\n\n"+
-							"See 'docker model configure --help' for more information",
-						argsBeforeDash)
-				}
+			if len(args) != 1 {
+				return fmt.Errorf(
+					"Exactly one model must be specified, got %d: %v\n\n"+
+						"See 'docker model configure --help' for more information",
+					len(args), args)
 			}
 			opts.Model = args[0]
-			opts.RuntimeFlags = args[1:]
 			return nil
 		},
 		RunE: func(cmd *cobra.Command, args []string) error {
@@ -52,6 +42,30 @@ func newConfigureCmd() *cobra.Command {
 					MinAcceptanceRate: minAcceptanceRate,
 				}
 			}
+			// Parse and validate HuggingFace overrides if provided (vLLM-specific)
+			if hfOverrides != "" {
+				var hfo inference.HFOverrides
+				if err := json.Unmarshal([]byte(hfOverrides), &hfo); err != nil {
+					return fmt.Errorf("invalid --hf_overrides JSON: %w", err)
+				}
+				// Validate the overrides to prevent command injection
+				if err := hfo.Validate(); err != nil {
+					return err
+				}
+				if opts.VLLM == nil {
+					opts.VLLM = &inference.VLLMConfig{}
+				}
+				opts.VLLM.HFOverrides = hfo
+			}
+			// Set llama.cpp-specific reasoning budget if explicitly provided
+			// Note: We check if flag was changed rather than checking value > 0
+			// because 0 is a valid value (disables reasoning) and -1 means unlimited
+			if cmd.Flags().Changed("reasoning-budget") {
+				if opts.LlamaCpp == nil {
+					opts.LlamaCpp = &inference.LlamaCppConfig{}
+				}
+				opts.LlamaCpp.ReasoningBudget = &reasoningBudget
+			}
 			return desktopClient.ConfigureBackend(opts)
 		},
 		ValidArgsFunction: completion.ModelNames(getDesktopClient, -1),
@@ -61,5 +75,7 @@ func newConfigureCmd() *cobra.Command {
 	c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
 	c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")
 	c.Flags().Float64Var(&minAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding")
+	c.Flags().StringVar(&hfOverrides, "hf_overrides", "", "HuggingFace model config overrides (JSON) - vLLM only")
+	c.Flags().Int64Var(&reasoningBudget, "reasoning-budget", 0, "reasoning budget for reasoning models - llama.cpp only")
 	return c
 }
@@ -0,0 +1,165 @@
+package commands
+
+import (
+	"testing"
+)
+
+func TestConfigureCmdReasoningBudgetFlag(t *testing.T) {
+	// Create the configure command
+	cmd := newConfigureCmd()
+
+	// Verify the --reasoning-budget flag exists
+	reasoningBudgetFlag := cmd.Flags().Lookup("reasoning-budget")
+	if reasoningBudgetFlag == nil {
+		t.Fatal("--reasoning-budget flag not found")
+	}
+
+	// Verify the default value is 0
+	if reasoningBudgetFlag.DefValue != "0" {
+		t.Errorf("Expected default reasoning-budget value to be '0', got '%s'", reasoningBudgetFlag.DefValue)
+	}
+
+	// Verify the flag type
+	if reasoningBudgetFlag.Value.Type() != "int64" {
+		t.Errorf("Expected reasoning-budget flag type to be 'int64', got '%s'", reasoningBudgetFlag.Value.Type())
+	}
+}
+
+func TestConfigureCmdReasoningBudgetFlagChanged(t *testing.T) {
+	tests := []struct {
+		name          string
+		setValue      string
+		expectChanged bool
+		expectedValue int64
+	}{
+		{
+			name:          "flag not set - should not be changed",
+			setValue:      "",
+			expectChanged: false,
+			expectedValue: 0,
+		},
+		{
+			name:          "flag set to 0 (disable reasoning) - should be changed",
+			setValue:      "0",
+			expectChanged: true,
+			expectedValue: 0,
+		},
+		{
+			name:          "flag set to -1 (unlimited) - should be changed",
+			setValue:      "-1",
+			expectChanged: true,
+			expectedValue: -1,
+		},
+		{
+			name:          "flag set to positive value - should be changed",
+			setValue:      "1024",
+			expectChanged: true,
+			expectedValue: 1024,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create a fresh configure command for each test
+			cmd := newConfigureCmd()
+
+			// Only set the flag if setValue is not empty
+			if tt.setValue != "" {
+				err := cmd.Flags().Set("reasoning-budget", tt.setValue)
+				if err != nil {
+					t.Fatalf("Failed to set reasoning-budget flag: %v", err)
+				}
+			}
+
+			// Check if the flag was marked as changed
+			isChanged := cmd.Flags().Changed("reasoning-budget")
+			if isChanged != tt.expectChanged {
+				t.Errorf("Expected Changed() = %v, got %v", tt.expectChanged, isChanged)
+			}
+
+			// Verify the value
+			value, err := cmd.Flags().GetInt64("reasoning-budget")
+			if err != nil {
+				t.Fatalf("Failed to get reasoning-budget flag value: %v", err)
+			}
+			if value != tt.expectedValue {
+				t.Errorf("Expected value = %d, got %d", tt.expectedValue, value)
+			}
+		})
+	}
+}
+
+func TestConfigureCmdHfOverridesFlag(t *testing.T) {
+	// Create the configure command
+	cmd := newConfigureCmd()
+
+	// Verify the --hf_overrides flag exists
+	hfOverridesFlag := cmd.Flags().Lookup("hf_overrides")
+	if hfOverridesFlag == nil {
+		t.Fatal("--hf_overrides flag not found")
+	}
+
+	// Verify the default value is empty
+	if hfOverridesFlag.DefValue != "" {
+		t.Errorf("Expected default hf_overrides value to be empty, got '%s'", hfOverridesFlag.DefValue)
+	}
+
+	// Verify the flag type
+	if hfOverridesFlag.Value.Type() != "string" {
+		t.Errorf("Expected hf_overrides flag type to be 'string', got '%s'", hfOverridesFlag.Value.Type())
+	}
+}
+
+func TestConfigureCmdContextSizeFlag(t *testing.T) {
+	// Create the configure command
+	cmd := newConfigureCmd()
+
+	// Verify the --context-size flag exists
+	contextSizeFlag := cmd.Flags().Lookup("context-size")
+	if contextSizeFlag == nil {
+		t.Fatal("--context-size flag not found")
+	}
+
+	// Verify the default value is -1 (indicating not set)
+	if contextSizeFlag.DefValue != "-1" {
+		t.Errorf("Expected default context-size value to be '-1', got '%s'", contextSizeFlag.DefValue)
+	}
+
+	// Test setting the flag value
+	err := cmd.Flags().Set("context-size", "8192")
+	if err != nil {
+		t.Errorf("Failed to set context-size flag: %v", err)
+	}
+
+	// Verify the value was set
+	contextSizeValue, err := cmd.Flags().GetInt64("context-size")
+	if err != nil {
+		t.Errorf("Failed to get context-size flag value: %v", err)
+	}
+
+	if contextSizeValue != 8192 {
+		t.Errorf("Expected context-size flag value to be 8192, got %d", contextSizeValue)
+	}
+}
+
+func TestConfigureCmdSpeculativeFlags(t *testing.T) {
+	cmd := newConfigureCmd()
+
+	// Test speculative-draft-model flag
+	draftModelFlag := cmd.Flags().Lookup("speculative-draft-model")
+	if draftModelFlag == nil {
+		t.Fatal("--speculative-draft-model flag not found")
+	}
+
+	// Test speculative-num-tokens flag
+	numTokensFlag := cmd.Flags().Lookup("speculative-num-tokens")
+	if numTokensFlag == nil {
+		t.Fatal("--speculative-num-tokens flag not found")
+	}
+
+	// Test speculative-min-acceptance-rate flag
+	minAcceptanceRateFlag := cmd.Flags().Lookup("speculative-min-acceptance-rate")
+	if minAcceptanceRateFlag == nil {
+		t.Fatal("--speculative-min-acceptance-rate flag not found")
+	}
+}
@@ -33,15 +33,6 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
-    - option: runtime-flags
-      value_type: string
-      description: raw runtime flags to pass to the inference engine
-      deprecated: false
-      hidden: false
-      experimental: false
-      experimentalcli: false
-      kubernetes: false
-      swarm: false
     - option: speculative-draft-model
       value_type: string
       description: draft model for speculative decoding
 
@@ -1,7 +1,7 @@
 command: docker model configure
 short: Configure runtime options for a model
 long: Configure runtime options for a model
-usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] MODEL [-- <runtime-flags...>]
+usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--reasoning-budget=<n>] MODEL
 pname: docker model
 plink: docker_model.yaml
 options:
@@ -15,6 +15,25 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
+    - option: hf_overrides
+      value_type: string
+      description: HuggingFace model config overrides (JSON) - vLLM only
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
+    - option: reasoning-budget
+      value_type: int64
+      default_value: "0"
+      description: reasoning budget for reasoning models - llama.cpp only
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
     - option: speculative-draft-model
       value_type: string
       description: draft model for speculative decoding
 
@@ -11,7 +11,6 @@ require (
 	github.com/gpustack/gguf-parser-go v0.22.1
 	github.com/jaypipes/ghw v0.19.1
 	github.com/kolesnikovae/go-winjob v1.0.0
-	github.com/mattn/go-shellwords v1.0.12
 	github.com/opencontainers/go-digest v1.0.0
 	github.com/opencontainers/image-spec v1.1.1
 	github.com/prometheus/client_model v0.6.2
 
@@ -84,8 +84,6 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk=
-github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
 
@@ -141,7 +141,6 @@ github.com/denisenkom/go-mssqldb v0.0.0-20191128021309-1d7a30a10f73 h1:OGNva6Whs
 github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=
 github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
 github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 h1:UhxFibDNY/bfvqU5CAUmr9zpesgbU6SWc8/B4mflAE4=
-github.com/docker/model-runner v1.0.3/go.mod h1:qRIuXMeZ5dnL4A9e/+BUtQOSZAS0PBxUkLN51XYpLOE=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/dvsekhvalnov/jose2go v0.0.0-20170216131308-f21a8cedbbae h1:UTOyRlLeWJrZx+ynml6q6qzZ1uDkJe/0Z5CMZRbEIJg=
@@ -238,8 +237,6 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/go-containerregistry v0.20.6 h1:cvWX87UxxLgaH76b4hIvya6Dzz9qHB31qAwjAohdSTU=
-github.com/google/go-containerregistry v0.20.6/go.mod h1:T0x8MuoAoKX/873bkeSfLD2FAkwCDf9/HZgsFJ02E2Y=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
@@ -331,6 +328,8 @@ github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXq
 github.com/magiconair/properties v1.5.3 h1:C8fxWnhYyME3n0klPOhVM7PtYUB3eV1W3DeFmN3j53Y=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk=
+github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
 github.com/mattn/go-sqlite3 v1.6.0 h1:TDwTWbeII+88Qy55nWlof0DclgAtI4LqGujkYMzmQII=
 github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
 github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=