Skip to content

Commit 0699420

Browse files
authored
Remove runtime flags (#482)
* add HuggingFace model configuration overrides support * refactor: remove runtime flags and introduce backend-specific configurations for vLLM and llama.cpp * refactor: remove runtime flags and update model configuration options for HuggingFace and reasoning budget * refactor: remove go-shellwords dependency from go.mod and go.sum * address PR feedback: add security validation and comprehensive tests - Add HFOverrides validation at scheduler/API boundary (security fix) Validates HFOverrides in ConfigureRunner to prevent injection attacks from non-CLI callers (e.g., API requests) - Update HFOverrides validation to allow integer types Adds support for int, int8-64, uint, uint8-64 types for programmatic construction of HFOverrides maps - Add tests for vLLM HFOverrides argument generation Tests simple/complex HFOverrides, nil/empty cases, and combined with context size configuration - Add tests for LlamaCpp reasoning budget configuration Tests reasoning budget from backend config, nil cases, and combined with context size configuration * ConfigureRequest embeds BackendConfiguration to improve readability * chore: add go-shellwords dependency to go.mod and go.sum
1 parent f6b113a commit 0699420

21 files changed

+957
-134
lines changed

cmd/cli/commands/compose.go

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ func newComposeCmd() *cobra.Command {
3737
func newUpCommand() *cobra.Command {
3838
var models []string
3939
var ctxSize int64
40-
var rawRuntimeFlags string
4140
var backend string
4241
var draftModel string
4342
var numTokens int
@@ -70,9 +69,6 @@ func newUpCommand() *cobra.Command {
7069
if ctxSize > 0 {
7170
sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
7271
}
73-
if rawRuntimeFlags != "" {
74-
sendInfo("Setting raw runtime flags to " + rawRuntimeFlags)
75-
}
7672

7773
// Build speculative config if any speculative flags are set
7874
var speculativeConfig *inference.SpeculativeDecodingConfig
@@ -87,14 +83,15 @@ func newUpCommand() *cobra.Command {
8783

8884
for _, model := range models {
8985
if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
90-
Model: model,
91-
ContextSize: ctxSize,
92-
RawRuntimeFlags: rawRuntimeFlags,
93-
Speculative: speculativeConfig,
86+
Model: model,
87+
BackendConfiguration: inference.BackendConfiguration{
88+
ContextSize: ctxSize,
89+
Speculative: speculativeConfig,
90+
},
9491
}); err != nil {
95-
configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s"
96-
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, rawRuntimeFlags, err)
97-
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, rawRuntimeFlags, err)
92+
configErrFmtString := "failed to configure backend for model %s with context-size %d"
93+
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err)
94+
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, err)
9895
}
9996
sendInfo("Successfully configured backend for model " + model)
10097
}
@@ -116,7 +113,6 @@ func newUpCommand() *cobra.Command {
116113
}
117114
c.Flags().StringArrayVar(&models, "model", nil, "model to use")
118115
c.Flags().Int64Var(&ctxSize, "context-size", -1, "context size for the model")
119-
c.Flags().StringVar(&rawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
120116
c.Flags().StringVar(&backend, "backend", llamacpp.Name, "inference backend to use")
121117
c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
122118
c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")

cmd/cli/commands/configure.go

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package commands
22

33
import (
4+
"encoding/json"
45
"fmt"
56

67
"github.com/docker/model-runner/cmd/cli/commands/completion"
@@ -15,32 +16,21 @@ func newConfigureCmd() *cobra.Command {
1516
var draftModel string
1617
var numTokens int
1718
var minAcceptanceRate float64
19+
var hfOverrides string
20+
var reasoningBudget int64
1821

1922
c := &cobra.Command{
20-
Use: "configure [--context-size=<n>] [--speculative-draft-model=<model>] MODEL [-- <runtime-flags...>]",
23+
Use: "configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--reasoning-budget=<n>] MODEL",
2124
Short: "Configure runtime options for a model",
2225
Hidden: true,
2326
Args: func(cmd *cobra.Command, args []string) error {
24-
argsBeforeDash := cmd.ArgsLenAtDash()
25-
if argsBeforeDash == -1 {
26-
// No "--" used, so we need exactly 1 total argument.
27-
if len(args) != 1 {
28-
return fmt.Errorf(
29-
"Exactly one model must be specified, got %d: %v\n\n"+
30-
"See 'docker model configure --help' for more information",
31-
len(args), args)
32-
}
33-
} else {
34-
// Has "--", so we need exactly 1 argument before it.
35-
if argsBeforeDash != 1 {
36-
return fmt.Errorf(
37-
"Exactly one model must be specified before --, got %d\n\n"+
38-
"See 'docker model configure --help' for more information",
39-
argsBeforeDash)
40-
}
27+
if len(args) != 1 {
28+
return fmt.Errorf(
29+
"Exactly one model must be specified, got %d: %v\n\n"+
30+
"See 'docker model configure --help' for more information",
31+
len(args), args)
4132
}
4233
opts.Model = args[0]
43-
opts.RuntimeFlags = args[1:]
4434
return nil
4535
},
4636
RunE: func(cmd *cobra.Command, args []string) error {
@@ -52,6 +42,30 @@ func newConfigureCmd() *cobra.Command {
5242
MinAcceptanceRate: minAcceptanceRate,
5343
}
5444
}
45+
// Parse and validate HuggingFace overrides if provided (vLLM-specific)
46+
if hfOverrides != "" {
47+
var hfo inference.HFOverrides
48+
if err := json.Unmarshal([]byte(hfOverrides), &hfo); err != nil {
49+
return fmt.Errorf("invalid --hf_overrides JSON: %w", err)
50+
}
51+
// Validate the overrides to prevent command injection
52+
if err := hfo.Validate(); err != nil {
53+
return err
54+
}
55+
if opts.VLLM == nil {
56+
opts.VLLM = &inference.VLLMConfig{}
57+
}
58+
opts.VLLM.HFOverrides = hfo
59+
}
60+
// Set llama.cpp-specific reasoning budget if explicitly provided
61+
// Note: We check if flag was changed rather than checking value > 0
62+
// because 0 is a valid value (disables reasoning) and -1 means unlimited
63+
if cmd.Flags().Changed("reasoning-budget") {
64+
if opts.LlamaCpp == nil {
65+
opts.LlamaCpp = &inference.LlamaCppConfig{}
66+
}
67+
opts.LlamaCpp.ReasoningBudget = &reasoningBudget
68+
}
5569
return desktopClient.ConfigureBackend(opts)
5670
},
5771
ValidArgsFunction: completion.ModelNames(getDesktopClient, -1),
@@ -61,5 +75,7 @@ func newConfigureCmd() *cobra.Command {
6175
c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
6276
c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")
6377
c.Flags().Float64Var(&minAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding")
78+
c.Flags().StringVar(&hfOverrides, "hf_overrides", "", "HuggingFace model config overrides (JSON) - vLLM only")
79+
c.Flags().Int64Var(&reasoningBudget, "reasoning-budget", 0, "reasoning budget for reasoning models - llama.cpp only")
6480
return c
6581
}

cmd/cli/commands/configure_test.go

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
package commands
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestConfigureCmdReasoningBudgetFlag(t *testing.T) {
8+
// Create the configure command
9+
cmd := newConfigureCmd()
10+
11+
// Verify the --reasoning-budget flag exists
12+
reasoningBudgetFlag := cmd.Flags().Lookup("reasoning-budget")
13+
if reasoningBudgetFlag == nil {
14+
t.Fatal("--reasoning-budget flag not found")
15+
}
16+
17+
// Verify the default value is 0
18+
if reasoningBudgetFlag.DefValue != "0" {
19+
t.Errorf("Expected default reasoning-budget value to be '0', got '%s'", reasoningBudgetFlag.DefValue)
20+
}
21+
22+
// Verify the flag type
23+
if reasoningBudgetFlag.Value.Type() != "int64" {
24+
t.Errorf("Expected reasoning-budget flag type to be 'int64', got '%s'", reasoningBudgetFlag.Value.Type())
25+
}
26+
}
27+
28+
func TestConfigureCmdReasoningBudgetFlagChanged(t *testing.T) {
29+
tests := []struct {
30+
name string
31+
setValue string
32+
expectChanged bool
33+
expectedValue int64
34+
}{
35+
{
36+
name: "flag not set - should not be changed",
37+
setValue: "",
38+
expectChanged: false,
39+
expectedValue: 0,
40+
},
41+
{
42+
name: "flag set to 0 (disable reasoning) - should be changed",
43+
setValue: "0",
44+
expectChanged: true,
45+
expectedValue: 0,
46+
},
47+
{
48+
name: "flag set to -1 (unlimited) - should be changed",
49+
setValue: "-1",
50+
expectChanged: true,
51+
expectedValue: -1,
52+
},
53+
{
54+
name: "flag set to positive value - should be changed",
55+
setValue: "1024",
56+
expectChanged: true,
57+
expectedValue: 1024,
58+
},
59+
}
60+
61+
for _, tt := range tests {
62+
t.Run(tt.name, func(t *testing.T) {
63+
// Create a fresh configure command for each test
64+
cmd := newConfigureCmd()
65+
66+
// Only set the flag if setValue is not empty
67+
if tt.setValue != "" {
68+
err := cmd.Flags().Set("reasoning-budget", tt.setValue)
69+
if err != nil {
70+
t.Fatalf("Failed to set reasoning-budget flag: %v", err)
71+
}
72+
}
73+
74+
// Check if the flag was marked as changed
75+
isChanged := cmd.Flags().Changed("reasoning-budget")
76+
if isChanged != tt.expectChanged {
77+
t.Errorf("Expected Changed() = %v, got %v", tt.expectChanged, isChanged)
78+
}
79+
80+
// Verify the value
81+
value, err := cmd.Flags().GetInt64("reasoning-budget")
82+
if err != nil {
83+
t.Fatalf("Failed to get reasoning-budget flag value: %v", err)
84+
}
85+
if value != tt.expectedValue {
86+
t.Errorf("Expected value = %d, got %d", tt.expectedValue, value)
87+
}
88+
})
89+
}
90+
}
91+
92+
func TestConfigureCmdHfOverridesFlag(t *testing.T) {
93+
// Create the configure command
94+
cmd := newConfigureCmd()
95+
96+
// Verify the --hf_overrides flag exists
97+
hfOverridesFlag := cmd.Flags().Lookup("hf_overrides")
98+
if hfOverridesFlag == nil {
99+
t.Fatal("--hf_overrides flag not found")
100+
}
101+
102+
// Verify the default value is empty
103+
if hfOverridesFlag.DefValue != "" {
104+
t.Errorf("Expected default hf_overrides value to be empty, got '%s'", hfOverridesFlag.DefValue)
105+
}
106+
107+
// Verify the flag type
108+
if hfOverridesFlag.Value.Type() != "string" {
109+
t.Errorf("Expected hf_overrides flag type to be 'string', got '%s'", hfOverridesFlag.Value.Type())
110+
}
111+
}
112+
113+
func TestConfigureCmdContextSizeFlag(t *testing.T) {
114+
// Create the configure command
115+
cmd := newConfigureCmd()
116+
117+
// Verify the --context-size flag exists
118+
contextSizeFlag := cmd.Flags().Lookup("context-size")
119+
if contextSizeFlag == nil {
120+
t.Fatal("--context-size flag not found")
121+
}
122+
123+
// Verify the default value is -1 (indicating not set)
124+
if contextSizeFlag.DefValue != "-1" {
125+
t.Errorf("Expected default context-size value to be '-1', got '%s'", contextSizeFlag.DefValue)
126+
}
127+
128+
// Test setting the flag value
129+
err := cmd.Flags().Set("context-size", "8192")
130+
if err != nil {
131+
t.Errorf("Failed to set context-size flag: %v", err)
132+
}
133+
134+
// Verify the value was set
135+
contextSizeValue, err := cmd.Flags().GetInt64("context-size")
136+
if err != nil {
137+
t.Errorf("Failed to get context-size flag value: %v", err)
138+
}
139+
140+
if contextSizeValue != 8192 {
141+
t.Errorf("Expected context-size flag value to be 8192, got %d", contextSizeValue)
142+
}
143+
}
144+
145+
func TestConfigureCmdSpeculativeFlags(t *testing.T) {
146+
cmd := newConfigureCmd()
147+
148+
// Test speculative-draft-model flag
149+
draftModelFlag := cmd.Flags().Lookup("speculative-draft-model")
150+
if draftModelFlag == nil {
151+
t.Fatal("--speculative-draft-model flag not found")
152+
}
153+
154+
// Test speculative-num-tokens flag
155+
numTokensFlag := cmd.Flags().Lookup("speculative-num-tokens")
156+
if numTokensFlag == nil {
157+
t.Fatal("--speculative-num-tokens flag not found")
158+
}
159+
160+
// Test speculative-min-acceptance-rate flag
161+
minAcceptanceRateFlag := cmd.Flags().Lookup("speculative-min-acceptance-rate")
162+
if minAcceptanceRateFlag == nil {
163+
t.Fatal("--speculative-min-acceptance-rate flag not found")
164+
}
165+
}

cmd/cli/docs/reference/docker_model_compose_up.yaml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,6 @@ options:
3333
experimentalcli: false
3434
kubernetes: false
3535
swarm: false
36-
- option: runtime-flags
37-
value_type: string
38-
description: raw runtime flags to pass to the inference engine
39-
deprecated: false
40-
hidden: false
41-
experimental: false
42-
experimentalcli: false
43-
kubernetes: false
44-
swarm: false
4536
- option: speculative-draft-model
4637
value_type: string
4738
description: draft model for speculative decoding

cmd/cli/docs/reference/docker_model_configure.yaml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
command: docker model configure
22
short: Configure runtime options for a model
33
long: Configure runtime options for a model
4-
usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] MODEL [-- <runtime-flags...>]
4+
usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--reasoning-budget=<n>] MODEL
55
pname: docker model
66
plink: docker_model.yaml
77
options:
@@ -15,6 +15,25 @@ options:
1515
experimentalcli: false
1616
kubernetes: false
1717
swarm: false
18+
- option: hf_overrides
19+
value_type: string
20+
description: HuggingFace model config overrides (JSON) - vLLM only
21+
deprecated: false
22+
hidden: false
23+
experimental: false
24+
experimentalcli: false
25+
kubernetes: false
26+
swarm: false
27+
- option: reasoning-budget
28+
value_type: int64
29+
default_value: "0"
30+
description: reasoning budget for reasoning models - llama.cpp only
31+
deprecated: false
32+
hidden: false
33+
experimental: false
34+
experimentalcli: false
35+
kubernetes: false
36+
swarm: false
1837
- option: speculative-draft-model
1938
value_type: string
2039
description: draft model for speculative decoding

go.mod

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ require (
1111
github.com/gpustack/gguf-parser-go v0.22.1
1212
github.com/jaypipes/ghw v0.19.1
1313
github.com/kolesnikovae/go-winjob v1.0.0
14-
github.com/mattn/go-shellwords v1.0.12
1514
github.com/opencontainers/go-digest v1.0.0
1615
github.com/opencontainers/image-spec v1.1.1
1716
github.com/prometheus/client_model v0.6.2

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
8484
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
8585
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
8686
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
87-
github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk=
88-
github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
8987
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
9088
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
9189
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=

go.work.sum

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ github.com/denisenkom/go-mssqldb v0.0.0-20191128021309-1d7a30a10f73 h1:OGNva6Whs
141141
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=
142142
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
143143
github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 h1:UhxFibDNY/bfvqU5CAUmr9zpesgbU6SWc8/B4mflAE4=
144-
github.com/docker/model-runner v1.0.3/go.mod h1:qRIuXMeZ5dnL4A9e/+BUtQOSZAS0PBxUkLN51XYpLOE=
145144
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
146145
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
147146
github.com/dvsekhvalnov/jose2go v0.0.0-20170216131308-f21a8cedbbae h1:UTOyRlLeWJrZx+ynml6q6qzZ1uDkJe/0Z5CMZRbEIJg=
@@ -238,8 +237,6 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
238237
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
239238
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
240239
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
241-
github.com/google/go-containerregistry v0.20.6 h1:cvWX87UxxLgaH76b4hIvya6Dzz9qHB31qAwjAohdSTU=
242-
github.com/google/go-containerregistry v0.20.6/go.mod h1:T0x8MuoAoKX/873bkeSfLD2FAkwCDf9/HZgsFJ02E2Y=
243240
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
244241
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
245242
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
@@ -331,6 +328,8 @@ github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXq
331328
github.com/magiconair/properties v1.5.3 h1:C8fxWnhYyME3n0klPOhVM7PtYUB3eV1W3DeFmN3j53Y=
332329
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
333330
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
331+
github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk=
332+
github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
334333
github.com/mattn/go-sqlite3 v1.6.0 h1:TDwTWbeII+88Qy55nWlof0DclgAtI4LqGujkYMzmQII=
335334
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
336335
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=

0 commit comments

Comments
 (0)