Skip to content

Commit 41dd3f1

Browse files
get --runtime-args back (#518)
* get --runtime-args back * go mod tidy * Update cmd/cli/commands/compose.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * lint * Do not allow paths in runtime flags --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 8a8455c commit 41dd3f1

File tree

16 files changed

+437
-15
lines changed

16 files changed

+437
-15
lines changed

cmd/cli/commands/compose.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ func newComposeCmd() *cobra.Command {
3737
func newUpCommand() *cobra.Command {
3838
var models []string
3939
var ctxSize int64
40+
var rawRuntimeFlags string
4041
var backend string
4142
var draftModel string
4243
var numTokens int
@@ -69,6 +70,9 @@ func newUpCommand() *cobra.Command {
6970
if ctxSize > 0 {
7071
sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
7172
}
73+
if rawRuntimeFlags != "" {
74+
sendInfo("Setting raw runtime flags to " + rawRuntimeFlags)
75+
}
7276

7377
// Build speculative config if any speculative flags are set
7478
var speculativeConfig *inference.SpeculativeDecodingConfig
@@ -89,10 +93,11 @@ func newUpCommand() *cobra.Command {
8993
ContextSize: &size,
9094
Speculative: speculativeConfig,
9195
},
96+
RawRuntimeFlags: rawRuntimeFlags,
9297
}); err != nil {
93-
configErrFmtString := "failed to configure backend for model %s with context-size %d"
94-
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err)
95-
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, err)
98+
configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s"
99+
_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, rawRuntimeFlags, err)
100+
return fmt.Errorf(configErrFmtString+": %w", model, ctxSize, rawRuntimeFlags, err)
96101
}
97102
sendInfo("Successfully configured backend for model " + model)
98103
}
@@ -114,6 +119,7 @@ func newUpCommand() *cobra.Command {
114119
}
115120
c.Flags().StringArrayVar(&models, "model", nil, "model to use")
116121
c.Flags().Int64Var(&ctxSize, "context-size", -1, "context size for the model")
122+
c.Flags().StringVar(&rawRuntimeFlags, "runtime-flags", "", "raw runtime flags to pass to the inference engine")
117123
c.Flags().StringVar(&backend, "backend", llamacpp.Name, "inference backend to use")
118124
c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
119125
c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")

cmd/cli/commands/configure.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,27 @@ func newConfigureCmd() *cobra.Command {
1111
var flags ConfigureFlags
1212

1313
c := &cobra.Command{
14-
Use: "configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--gpu-memory-utilization=<float>] [--mode=<mode>] [--think] MODEL",
14+
Use: "configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--gpu-memory-utilization=<float>] [--mode=<mode>] [--think] MODEL [-- <runtime-flags...>]",
1515
Short: "Configure runtime options for a model",
1616
Hidden: true,
1717
Args: func(cmd *cobra.Command, args []string) error {
18-
if len(args) != 1 {
19-
return fmt.Errorf(
20-
"Exactly one model must be specified, got %d: %v\n\n"+
21-
"See 'docker model configure --help' for more information",
22-
len(args), args)
18+
argsBeforeDash := cmd.ArgsLenAtDash()
19+
if argsBeforeDash == -1 {
20+
// No "--" used, so we need exactly 1 total argument.
21+
if len(args) != 1 {
22+
return fmt.Errorf(
23+
"Exactly one model must be specified, got %d: %v\n\n"+
24+
"See 'docker model configure --help' for more information",
25+
len(args), args)
26+
}
27+
} else {
28+
// Has "--", so we need exactly 1 argument before it.
29+
if argsBeforeDash != 1 {
30+
return fmt.Errorf(
31+
"Exactly one model must be specified before --, got %d\n\n"+
32+
"See 'docker model configure --help' for more information",
33+
argsBeforeDash)
34+
}
2335
}
2436
return nil
2537
},
@@ -29,6 +41,7 @@ func newConfigureCmd() *cobra.Command {
2941
if err != nil {
3042
return err
3143
}
44+
opts.RuntimeFlags = args[1:]
3245
return desktopClient.ConfigureBackend(opts)
3346
},
3447
ValidArgsFunction: completion.ModelNames(getDesktopClient, -1),

cmd/cli/commands/configure_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,3 +305,90 @@ func TestThinkFlagBehavior(t *testing.T) {
305305
})
306306
}
307307
}
308+
309+
func TestRuntimeFlagsValidation(t *testing.T) {
310+
tests := []struct {
311+
name string
312+
runtimeFlags []string
313+
expectError bool
314+
errorContains string
315+
}{
316+
{
317+
name: "valid runtime flags without paths",
318+
runtimeFlags: []string{"--verbose", "--threads", "4"},
319+
expectError: false,
320+
},
321+
{
322+
name: "empty runtime flags",
323+
runtimeFlags: []string{},
324+
expectError: false,
325+
},
326+
{
327+
name: "reject absolute path in value",
328+
runtimeFlags: []string{"--log-file", "/var/log/model.log"},
329+
expectError: true,
330+
errorContains: "paths are not allowed",
331+
},
332+
{
333+
name: "reject absolute path in flag=value format",
334+
runtimeFlags: []string{"--output-file=/tmp/output.txt"},
335+
expectError: true,
336+
errorContains: "paths are not allowed",
337+
},
338+
{
339+
name: "reject relative path",
340+
runtimeFlags: []string{"--config", "../config.yaml"},
341+
expectError: true,
342+
errorContains: "paths are not allowed",
343+
},
344+
{
345+
name: "reject URL",
346+
runtimeFlags: []string{"--endpoint", "http://example.com/api"},
347+
expectError: true,
348+
errorContains: "paths are not allowed",
349+
},
350+
}
351+
352+
for _, tt := range tests {
353+
t.Run(tt.name, func(t *testing.T) {
354+
flags := ConfigureFlags{}
355+
req, err := flags.BuildConfigureRequest("test-model")
356+
if err != nil {
357+
t.Fatalf("BuildConfigureRequest failed: %v", err)
358+
}
359+
360+
// Set runtime flags after building request
361+
req.RuntimeFlags = tt.runtimeFlags
362+
363+
// Note: The actual validation happens in scheduler.ConfigureRunner,
364+
// but we're testing that the BuildConfigureRequest correctly
365+
// preserves the RuntimeFlags for validation downstream.
366+
// For a true integration test, we would need to mock the scheduler.
367+
368+
if tt.expectError {
369+
// In this unit test context, we verify the flags are preserved
370+
// The actual validation will happen in the scheduler
371+
if len(req.RuntimeFlags) == 0 && len(tt.runtimeFlags) > 0 {
372+
t.Error("RuntimeFlags should be preserved in the request")
373+
}
374+
} else {
375+
if !equalStringSlices(req.RuntimeFlags, tt.runtimeFlags) {
376+
t.Errorf("Expected RuntimeFlags %v, got %v", tt.runtimeFlags, req.RuntimeFlags)
377+
}
378+
}
379+
})
380+
}
381+
}
382+
383+
// equalStringSlices checks if two string slices are equal
384+
func equalStringSlices(a, b []string) bool {
385+
if len(a) != len(b) {
386+
return false
387+
}
388+
for i := range a {
389+
if a[i] != b[i] {
390+
return false
391+
}
392+
}
393+
return true
394+
}

cmd/cli/docs/reference/docker_model_compose_up.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@ options:
3333
experimentalcli: false
3434
kubernetes: false
3535
swarm: false
36+
- option: runtime-flags
37+
value_type: string
38+
description: raw runtime flags to pass to the inference engine
39+
deprecated: false
40+
hidden: false
41+
experimental: false
42+
experimentalcli: false
43+
kubernetes: false
44+
swarm: false
3645
- option: speculative-draft-model
3746
value_type: string
3847
description: draft model for speculative decoding

cmd/cli/docs/reference/docker_model_configure.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
command: docker model configure
22
short: Configure runtime options for a model
33
long: Configure runtime options for a model
4-
usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--gpu-memory-utilization=<float>] [--mode=<mode>] [--think] MODEL
4+
usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--gpu-memory-utilization=<float>] [--mode=<mode>] [--think] MODEL [-- <runtime-flags...>]
55
pname: docker model
66
plink: docker_model.yaml
77
options:

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/gpustack/gguf-parser-go v0.22.1
1111
github.com/jaypipes/ghw v0.19.1
1212
github.com/kolesnikovae/go-winjob v1.0.0
13+
github.com/mattn/go-shellwords v1.0.12
1314
github.com/opencontainers/go-digest v1.0.0
1415
github.com/opencontainers/image-spec v1.1.1
1516
github.com/prometheus/client_model v0.6.2

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
8080
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
8181
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
8282
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
83+
github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk=
84+
github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
8385
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
8486
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
8587
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=

pkg/inference/backend.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ type LlamaCppConfig struct {
8282

8383
type BackendConfiguration struct {
8484
// Shared configuration across all backends
85-
ContextSize *int32 `json:"context-size,omitempty"`
86-
Speculative *SpeculativeDecodingConfig `json:"speculative,omitempty"`
85+
ContextSize *int32 `json:"context-size,omitempty"`
86+
RuntimeFlags []string `json:"runtime-flags,omitempty"`
87+
Speculative *SpeculativeDecodingConfig `json:"speculative,omitempty"`
8788

8889
// Backend-specific configuration
8990
VLLM *VLLMConfig `json:"vllm,omitempty"`

pkg/inference/backends/llamacpp/llamacpp_config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference
7979
args = append(args, "--ctx-size", strconv.FormatInt(int64(*contextSize), 10))
8080
}
8181

82+
// Add arguments from backend config
83+
if config != nil {
84+
args = append(args, config.RuntimeFlags...)
85+
}
86+
8287
// Add arguments for Multimodal projector or jinja (they are mutually exclusive)
8388
if path := bundle.MMPROJPath(); path != "" {
8489
args = append(args, "--mmproj", path)

pkg/inference/backends/llamacpp/llamacpp_config_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,23 @@ func TestGetArgs(t *testing.T) {
225225
"--jinja",
226226
),
227227
},
228+
{
229+
name: "raw flags from backend config",
230+
mode: inference.BackendModeEmbedding,
231+
bundle: &fakeBundle{
232+
ggufPath: modelPath,
233+
},
234+
config: &inference.BackendConfiguration{
235+
RuntimeFlags: []string{"--some", "flag"},
236+
},
237+
expected: append(slices.Clone(baseArgs),
238+
"--model", modelPath,
239+
"--host", socket,
240+
"--embeddings",
241+
"--some", "flag",
242+
"--jinja",
243+
),
244+
},
228245
{
229246
name: "multimodal projector removes jinja",
230247
mode: inference.BackendModeCompletion,

0 commit comments

Comments
 (0)