diff --git a/cmd/cli/commands/compose.go b/cmd/cli/commands/compose.go index 5f3169a5..21386296 100644 --- a/cmd/cli/commands/compose.go +++ b/cmd/cli/commands/compose.go @@ -34,19 +34,6 @@ func newComposeCmd() *cobra.Command { return c } -// Reasoning budget constants for the think parameter conversion -const ( - reasoningBudgetUnlimited int32 = -1 - reasoningBudgetDisabled int32 = 0 - reasoningBudgetMedium int32 = 1024 - reasoningBudgetLow int32 = 256 -) - -// ptr is a helper function to create a pointer to int32 -func ptr(v int32) *int32 { - return &v -} - func newUpCommand() *cobra.Command { var models []string var ctxSize int64 @@ -54,8 +41,6 @@ func newUpCommand() *cobra.Command { var draftModel string var numTokens int var minAcceptanceRate float64 - var mode string - var think string c := &cobra.Command{ Use: "up", RunE: func(cmd *cobra.Command, args []string) error { @@ -81,7 +66,7 @@ func newUpCommand() *cobra.Command { return err } - if cmd.Flags().Changed("context-size") { + if ctxSize > 0 { sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize)) } @@ -96,52 +81,14 @@ func newUpCommand() *cobra.Command { sendInfo(fmt.Sprintf("Enabling speculative decoding with draft model: %s", draftModel)) } - // Parse mode if provided - var backendMode *inference.BackendMode - if mode != "" { - parsedMode, err := parseBackendMode(mode) - if err != nil { - _ = sendError(err.Error()) - return err - } - backendMode = &parsedMode - sendInfo(fmt.Sprintf("Setting backend mode to %s", mode)) - } - - // Parse think parameter for reasoning budget - var reasoningBudget *int32 - if think != "" { - budget, err := parseThinkToReasoningBudget(think) - if err != nil { - _ = sendError(err.Error()) - return err - } - reasoningBudget = budget - sendInfo(fmt.Sprintf("Setting think mode to %s", think)) - } - for _, model := range models { - configuration := inference.BackendConfiguration{ - Speculative: speculativeConfig, - } - if cmd.Flags().Changed("context-size") { - // TODO is the context size the same for all models? - v := int32(ctxSize) - configuration.ContextSize = &v - } - - // Set llama.cpp-specific reasoning budget if provided - if reasoningBudget != nil { - if configuration.LlamaCpp == nil { - configuration.LlamaCpp = &inference.LlamaCppConfig{} - } - configuration.LlamaCpp.ReasoningBudget = reasoningBudget - } - + size := int32(ctxSize) if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{ - Model: model, - Mode: backendMode, - BackendConfiguration: configuration, + Model: model, + BackendConfiguration: inference.BackendConfiguration{ + ContextSize: &size, + Speculative: speculativeConfig, + }, }); err != nil { configErrFmtString := "failed to configure backend for model %s with context-size %d" _ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err) @@ -171,57 +118,10 @@ func newUpCommand() *cobra.Command { c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding") c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively") c.Flags().Float64Var(&minAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding") - c.Flags().StringVar(&mode, "mode", "", "backend operation mode (completion, embedding, reranking)") - c.Flags().StringVar(&think, "think", "", "enable reasoning mode for thinking models (true/false/high/medium/low)") _ = c.MarkFlagRequired("model") return c } -// parseBackendMode parses a string mode value into an inference.BackendMode. -func parseBackendMode(mode string) (inference.BackendMode, error) { - switch strings.ToLower(mode) { - case "completion": - return inference.BackendModeCompletion, nil - case "embedding": - return inference.BackendModeEmbedding, nil - case "reranking": - return inference.BackendModeReranking, nil - default: - return inference.BackendModeCompletion, fmt.Errorf("invalid mode %q: must be one of completion, embedding, reranking", mode) - } -} - -// parseThinkToReasoningBudget converts the think parameter string to a reasoning budget value. -// Accepts: "true", "false", "high", "medium", "low" -// Returns: -// - nil for empty string or "true" (use server default, which is unlimited) -// - -1 for "high" (explicitly set unlimited) -// - 0 for "false" (disable thinking) -// - 1024 for "medium" -// - 256 for "low" -func parseThinkToReasoningBudget(think string) (*int32, error) { - if think == "" { - return nil, nil - } - - switch strings.ToLower(think) { - case "true": - // Use nil to let the server use its default (currently unlimited) - return nil, nil - case "high": - // Explicitly set unlimited reasoning budget - return ptr(reasoningBudgetUnlimited), nil - case "false": - return ptr(reasoningBudgetDisabled), nil - case "medium": - return ptr(reasoningBudgetMedium), nil - case "low": - return ptr(reasoningBudgetLow), nil - default: - return nil, fmt.Errorf("invalid think value %q: must be one of true, false, high, medium, low", think) - } -} - func newDownCommand() *cobra.Command { c := &cobra.Command{ Use: "down", diff --git a/cmd/cli/commands/compose_test.go b/cmd/cli/commands/compose_test.go index 5d2de01b..d4a8b59e 100644 --- a/cmd/cli/commands/compose_test.go +++ b/cmd/cli/commands/compose_test.go @@ -71,84 +71,3 @@ func TestParseBackendMode(t *testing.T) { }) } } - -func TestParseThinkToReasoningBudget(t *testing.T) { - tests := []struct { - name string - input string - expected *int32 - expectError bool - }{ - { - name: "empty string returns nil", - input: "", - expected: nil, - expectError: false, - }, - { - name: "true returns nil (use server default)", - input: "true", - expected: nil, - expectError: false, - }, - { - name: "TRUE returns nil (case insensitive)", - input: "TRUE", - expected: nil, - expectError: false, - }, - { - name: "false disables reasoning", - input: "false", - expected: ptr(reasoningBudgetDisabled), - expectError: false, - }, - { - name: "high explicitly sets unlimited (-1)", - input: "high", - expected: ptr(reasoningBudgetUnlimited), - expectError: false, - }, - { - name: "medium sets 1024 tokens", - input: "medium", - expected: ptr(reasoningBudgetMedium), - expectError: false, - }, - { - name: "low sets 256 tokens", - input: "low", - expected: ptr(reasoningBudgetLow), - expectError: false, - }, - { - name: "invalid value returns error", - input: "invalid", - expected: nil, - expectError: true, - }, - { - name: "numeric string returns error", - input: "1024", - expected: nil, - expectError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := parseThinkToReasoningBudget(tt.input) - if tt.expectError { - require.Error(t, err) - } else { - require.NoError(t, err) - if tt.expected == nil { - assert.Nil(t, result) - } else { - require.NotNil(t, result) - assert.Equal(t, *tt.expected, *result) - } - } - }) - } -} diff --git a/cmd/cli/commands/configure.go b/cmd/cli/commands/configure.go index debdd59e..6c5da30d 100644 --- a/cmd/cli/commands/configure.go +++ b/cmd/cli/commands/configure.go @@ -1,59 +1,17 @@ package commands import ( - "encoding/json" "fmt" - "strconv" "github.com/docker/model-runner/cmd/cli/commands/completion" - "github.com/docker/model-runner/pkg/inference" - - "github.com/docker/model-runner/pkg/inference/scheduling" "github.com/spf13/cobra" ) -// Int32PtrValue implements pflag.Value interface for *int32 pointers -// This allows flags to have a nil default value instead of 0 -type Int32PtrValue struct { - ptr **int32 -} - -func NewInt32PtrValue(p **int32) *Int32PtrValue { - return &Int32PtrValue{ptr: p} -} - -func (v *Int32PtrValue) String() string { - if v.ptr == nil || *v.ptr == nil { - return "" - } - return strconv.FormatInt(int64(**v.ptr), 10) -} - -func (v *Int32PtrValue) Set(s string) error { - val, err := strconv.ParseInt(s, 10, 32) - if err != nil { - return err - } - i32 := int32(val) - *v.ptr = &i32 - return nil -} - -func (v *Int32PtrValue) Type() string { - return "int32" -} - func newConfigureCmd() *cobra.Command { - var opts scheduling.ConfigureRequest - var draftModel string - var numTokens int - var minAcceptanceRate float64 - var hfOverrides string - var contextSize *int32 - var reasoningBudget *int32 + var flags ConfigureFlags c := &cobra.Command{ - Use: "configure [--context-size=] [--speculative-draft-model=] [--hf_overrides=] [--reasoning-budget=] MODEL", + Use: "configure [--context-size=] [--speculative-draft-model=] [--hf_overrides=] [--mode=] [--think] MODEL", Short: "Configure runtime options for a model", Hidden: true, Args: func(cmd *cobra.Command, args []string) error { @@ -63,53 +21,19 @@ func newConfigureCmd() *cobra.Command { "See 'docker model configure --help' for more information", len(args), args) } - opts.Model = args[0] return nil }, RunE: func(cmd *cobra.Command, args []string) error { - // contextSize is nil by default, only set if user provided the flag - opts.ContextSize = contextSize - // Build the speculative config if any speculative flags are set - if draftModel != "" || numTokens > 0 || minAcceptanceRate > 0 { - opts.Speculative = &inference.SpeculativeDecodingConfig{ - DraftModel: draftModel, - NumTokens: numTokens, - MinAcceptanceRate: minAcceptanceRate, - } - } - // Parse and validate HuggingFace overrides if provided (vLLM-specific) - if hfOverrides != "" { - var hfo inference.HFOverrides - if err := json.Unmarshal([]byte(hfOverrides), &hfo); err != nil { - return fmt.Errorf("invalid --hf_overrides JSON: %w", err) - } - // Validate the overrides to prevent command injection - if err := hfo.Validate(); err != nil { - return err - } - if opts.VLLM == nil { - opts.VLLM = &inference.VLLMConfig{} - } - opts.VLLM.HFOverrides = hfo - } - // Set llama.cpp-specific reasoning budget if provided - // reasoningBudget is nil by default, only set if user provided the flag - if reasoningBudget != nil { - if opts.LlamaCpp == nil { - opts.LlamaCpp = &inference.LlamaCppConfig{} - } - opts.LlamaCpp.ReasoningBudget = reasoningBudget + model := args[0] + opts, err := flags.BuildConfigureRequest(model) + if err != nil { + return err } return desktopClient.ConfigureBackend(opts) }, ValidArgsFunction: completion.ModelNames(getDesktopClient, -1), } - c.Flags().Var(NewInt32PtrValue(&contextSize), "context-size", "context size (in tokens)") - c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding") - c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively") - c.Flags().Float64Var(&minAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding") - c.Flags().StringVar(&hfOverrides, "hf_overrides", "", "HuggingFace model config overrides (JSON) - vLLM only") - c.Flags().Var(NewInt32PtrValue(&reasoningBudget), "reasoning-budget", "reasoning budget for reasoning models - llama.cpp only") + flags.RegisterFlags(c) return c } diff --git a/cmd/cli/commands/configure_flags.go b/cmd/cli/commands/configure_flags.go new file mode 100644 index 00000000..abda4ffc --- /dev/null +++ b/cmd/cli/commands/configure_flags.go @@ -0,0 +1,204 @@ +package commands + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + + "github.com/docker/model-runner/pkg/inference" + "github.com/docker/model-runner/pkg/inference/scheduling" + "github.com/spf13/cobra" +) + +// Reasoning budget constants for the think parameter conversion +const ( + reasoningBudgetUnlimited int32 = -1 + reasoningBudgetDisabled int32 = 0 +) + +// Int32PtrValue implements pflag.Value interface for *int32 pointers +// This allows flags to have a nil default value instead of 0 +type Int32PtrValue struct { + ptr **int32 +} + +// NewInt32PtrValue creates a new Int32PtrValue for the given pointer +func NewInt32PtrValue(p **int32) *Int32PtrValue { + return &Int32PtrValue{ptr: p} +} + +func (v *Int32PtrValue) String() string { + if v.ptr == nil || *v.ptr == nil { + return "" + } + return strconv.FormatInt(int64(**v.ptr), 10) +} + +func (v *Int32PtrValue) Set(s string) error { + val, err := strconv.ParseInt(s, 10, 32) + if err != nil { + return err + } + i32 := int32(val) + *v.ptr = &i32 + return nil +} + +func (v *Int32PtrValue) Type() string { + return "int32" +} + +// BoolPtrValue implements pflag.Value interface for *bool pointers +// This allows flags to have a nil default value to detect if explicitly set +type BoolPtrValue struct { + ptr **bool +} + +// NewBoolPtrValue creates a new BoolPtrValue for the given pointer +func NewBoolPtrValue(p **bool) *BoolPtrValue { + return &BoolPtrValue{ptr: p} +} + +func (v *BoolPtrValue) String() string { + if v.ptr == nil || *v.ptr == nil { + return "" + } + return strconv.FormatBool(**v.ptr) +} + +func (v *BoolPtrValue) Set(s string) error { + val, err := strconv.ParseBool(s) + if err != nil { + return err + } + *v.ptr = &val + return nil +} + +func (v *BoolPtrValue) Type() string { + return "bool" +} + +func (v *BoolPtrValue) IsBoolFlag() bool { + return true +} + +// ptr is a helper function to create a pointer to int32 +func ptr(v int32) *int32 { + return &v +} + +// ConfigureFlags holds all the flags for configuring a model backend +type ConfigureFlags struct { + // Backend mode (completion, embedding, reranking) + Mode string + // ContextSize is the context size in tokens + ContextSize *int32 + // Speculative decoding flags + DraftModel string + NumTokens int + MinAcceptanceRate float64 + // vLLM-specific flags + HFOverrides string + // Think parameter for reasoning models + Think *bool +} + +// RegisterFlags registers all configuration flags on the given cobra command. +func (f *ConfigureFlags) RegisterFlags(cmd *cobra.Command) { + cmd.Flags().Var(NewInt32PtrValue(&f.ContextSize), "context-size", "context size (in tokens)") + cmd.Flags().StringVar(&f.DraftModel, "speculative-draft-model", "", "draft model for speculative decoding") + cmd.Flags().IntVar(&f.NumTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively") + cmd.Flags().Float64Var(&f.MinAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding") + cmd.Flags().StringVar(&f.HFOverrides, "hf_overrides", "", "HuggingFace model config overrides (JSON) - vLLM only") + cmd.Flags().Var(NewBoolPtrValue(&f.Think), "think", "enable reasoning mode for thinking models") + cmd.Flags().StringVar(&f.Mode, "mode", "", "backend operation mode (completion, embedding, reranking)") +} + +// BuildConfigureRequest builds a scheduling.ConfigureRequest from the flags. +// The model parameter is the model name to configure. +func (f *ConfigureFlags) BuildConfigureRequest(model string) (scheduling.ConfigureRequest, error) { + req := scheduling.ConfigureRequest{ + Model: model, + } + + // Set context size + req.ContextSize = f.ContextSize + + // Build speculative config if any speculative flags are set + if f.DraftModel != "" || f.NumTokens > 0 || f.MinAcceptanceRate > 0 { + req.Speculative = &inference.SpeculativeDecodingConfig{ + DraftModel: f.DraftModel, + NumTokens: f.NumTokens, + MinAcceptanceRate: f.MinAcceptanceRate, + } + } + + // Parse and validate HuggingFace overrides if provided (vLLM-specific) + if f.HFOverrides != "" { + var hfo inference.HFOverrides + if err := json.Unmarshal([]byte(f.HFOverrides), &hfo); err != nil { + return req, fmt.Errorf("invalid --hf_overrides JSON: %w", err) + } + // Validate the overrides to prevent command injection + if err := hfo.Validate(); err != nil { + return req, err + } + if req.VLLM == nil { + req.VLLM = &inference.VLLMConfig{} + } + req.VLLM.HFOverrides = hfo + } + + // Set reasoning budget from --think flag + reasoningBudget := f.getReasoningBudget() + if reasoningBudget != nil { + if req.LlamaCpp == nil { + req.LlamaCpp = &inference.LlamaCppConfig{} + } + req.LlamaCpp.ReasoningBudget = reasoningBudget + } + + // Parse mode if provided + if f.Mode != "" { + parsedMode, err := parseBackendMode(f.Mode) + if err != nil { + return req, err + } + req.Mode = &parsedMode + } + + return req, nil +} + +// getReasoningBudget determines the reasoning budget from the --think flag. +// Returns nil if flag not set +// Returns -1 (unlimited) when --think or --think=true. +// Returns 0 (disabled) when --think=false. +func (f *ConfigureFlags) getReasoningBudget() *int32 { + // If Think is nil, flag was not set - don't configure + if f.Think == nil { + return nil + } + // If explicitly set to true, enable reasoning (unlimited) + if *f.Think { + return ptr(reasoningBudgetUnlimited) // -1: reasoning enabled (unlimited) + } + // If explicitly set to false, disable reasoning + return ptr(reasoningBudgetDisabled) // 0: reasoning disabled +} + +// parseBackendMode parses a string mode value into an inference.BackendMode. +func parseBackendMode(mode string) (inference.BackendMode, error) { + switch strings.ToLower(mode) { + case "completion": + return inference.BackendModeCompletion, nil + case "embedding": + return inference.BackendModeEmbedding, nil + case "reranking": + return inference.BackendModeReranking, nil + default: + return inference.BackendModeCompletion, fmt.Errorf("invalid mode %q: must be one of completion, embedding, reranking", mode) + } +} diff --git a/cmd/cli/commands/configure_test.go b/cmd/cli/commands/configure_test.go index c43e5dac..99c238e4 100644 --- a/cmd/cli/commands/configure_test.go +++ b/cmd/cli/commands/configure_test.go @@ -4,89 +4,6 @@ import ( "testing" ) -func TestConfigureCmdReasoningBudgetFlag(t *testing.T) { - // Create the configure command - cmd := newConfigureCmd() - - // Verify the --reasoning-budget flag exists - reasoningBudgetFlag := cmd.Flags().Lookup("reasoning-budget") - if reasoningBudgetFlag == nil { - t.Fatal("--reasoning-budget flag not found") - } - - // Verify the default value is empty (nil pointer) - if reasoningBudgetFlag.DefValue != "" { - t.Errorf("Expected default reasoning-budget value to be '' (nil), got '%s'", reasoningBudgetFlag.DefValue) - } - - // Verify the flag type - if reasoningBudgetFlag.Value.Type() != "int32" { - t.Errorf("Expected reasoning-budget flag type to be 'int32', got '%s'", reasoningBudgetFlag.Value.Type()) - } -} - -func TestConfigureCmdReasoningBudgetFlagChanged(t *testing.T) { - tests := []struct { - name string - setValue string - expectChanged bool - expectedValue string - }{ - { - name: "flag not set - should not be changed", - setValue: "", - expectChanged: false, - expectedValue: "", - }, - { - name: "flag set to 0 (disable reasoning) - should be changed", - setValue: "0", - expectChanged: true, - expectedValue: "0", - }, - { - name: "flag set to -1 (unlimited) - should be changed", - setValue: "-1", - expectChanged: true, - expectedValue: "-1", - }, - { - name: "flag set to positive value - should be changed", - setValue: "1024", - expectChanged: true, - expectedValue: "1024", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Create a fresh configure command for each test - cmd := newConfigureCmd() - - // Only set the flag if setValue is not empty - if tt.setValue != "" { - err := cmd.Flags().Set("reasoning-budget", tt.setValue) - if err != nil { - t.Fatalf("Failed to set reasoning-budget flag: %v", err) - } - } - - // Check if the flag was marked as changed - isChanged := cmd.Flags().Changed("reasoning-budget") - if isChanged != tt.expectChanged { - t.Errorf("Expected Changed() = %v, got %v", tt.expectChanged, isChanged) - } - - // Verify the value using String() method - flag := cmd.Flags().Lookup("reasoning-budget") - value := flag.Value.String() - if value != tt.expectedValue { - t.Errorf("Expected value = %s, got %s", tt.expectedValue, value) - } - }) - } -} - func TestConfigureCmdHfOverridesFlag(t *testing.T) { // Create the configure command cmd := newConfigureCmd() @@ -157,3 +74,114 @@ func TestConfigureCmdSpeculativeFlags(t *testing.T) { t.Fatal("--speculative-min-acceptance-rate flag not found") } } + +func TestConfigureCmdModeFlag(t *testing.T) { + // Create the configure command + cmd := newConfigureCmd() + + // Verify the --mode flag exists + modeFlag := cmd.Flags().Lookup("mode") + if modeFlag == nil { + t.Fatal("--mode flag not found") + } + + // Verify the default value is empty + if modeFlag.DefValue != "" { + t.Errorf("Expected default mode value to be empty, got '%s'", modeFlag.DefValue) + } + + // Verify the flag type + if modeFlag.Value.Type() != "string" { + t.Errorf("Expected mode flag type to be 'string', got '%s'", modeFlag.Value.Type()) + } +} + +func TestConfigureCmdThinkFlag(t *testing.T) { + // Create the configure command + cmd := newConfigureCmd() + + // Verify the --think flag exists + thinkFlag := cmd.Flags().Lookup("think") + if thinkFlag == nil { + t.Fatal("--think flag not found") + } + + // Verify the default value is empty + if thinkFlag.DefValue != "" { + t.Errorf("Expected default think value to be empty (nil), got '%s'", thinkFlag.DefValue) + } + + // Verify the flag type + if thinkFlag.Value.Type() != "bool" { + t.Errorf("Expected think flag type to be 'bool', got '%s'", thinkFlag.Value.Type()) + } + + // Test setting the flag to true + err := cmd.Flags().Set("think", "true") + if err != nil { + t.Errorf("Failed to set think flag to true: %v", err) + } + + // Verify the value was set + if thinkFlag.Value.String() != "true" { + t.Errorf("Expected think flag value to be 'true', got '%s'", thinkFlag.Value.String()) + } +} + +func TestThinkFlagBehavior(t *testing.T) { + // Helper to create bool pointer + boolPtr := func(b bool) *bool { return &b } + + tests := []struct { + name string + thinkValue *bool + expectBudget bool + expectedBudget int32 + }{ + { + name: "default - not set (nil)", + thinkValue: nil, + expectBudget: false, + }, + { + name: "explicitly set to true (--think)", + thinkValue: boolPtr(true), + expectBudget: true, + expectedBudget: -1, + }, + { + name: "explicitly set to false (--think=false)", + thinkValue: boolPtr(false), + expectBudget: true, + expectedBudget: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + flags := ConfigureFlags{ + Think: tt.thinkValue, + } + + req, err := flags.BuildConfigureRequest("test-model") + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if tt.expectBudget { + // Reasoning budget should be set + if req.LlamaCpp == nil || req.LlamaCpp.ReasoningBudget == nil { + t.Fatal("Expected reasoning budget to be set") + } + if *req.LlamaCpp.ReasoningBudget != tt.expectedBudget { + t.Errorf("Expected reasoning budget to be %d, got %d", tt.expectedBudget, *req.LlamaCpp.ReasoningBudget) + } + } else { + // Reasoning budget should NOT be set + if req.LlamaCpp != nil && req.LlamaCpp.ReasoningBudget != nil { + t.Errorf("Expected reasoning budget to be nil when not set, got %d", *req.LlamaCpp.ReasoningBudget) + } + } + }) + } +} diff --git a/cmd/cli/docs/reference/docker_model_compose_up.yaml b/cmd/cli/docs/reference/docker_model_compose_up.yaml index 70f72e77..9a0bf1b3 100644 --- a/cmd/cli/docs/reference/docker_model_compose_up.yaml +++ b/cmd/cli/docs/reference/docker_model_compose_up.yaml @@ -23,15 +23,6 @@ options: experimentalcli: false kubernetes: false swarm: false - - option: mode - value_type: string - description: backend operation mode (completion, embedding, reranking) - deprecated: false - hidden: false - experimental: false - experimentalcli: false - kubernetes: false - swarm: false - option: model value_type: stringArray default_value: '[]' @@ -71,16 +62,6 @@ options: experimentalcli: false kubernetes: false swarm: false - - option: think - value_type: string - description: | - enable reasoning mode for thinking models (true/false/high/medium/low) - deprecated: false - hidden: false - experimental: false - experimentalcli: false - kubernetes: false - swarm: false inherited_options: - option: project-name value_type: string diff --git a/cmd/cli/docs/reference/docker_model_configure.yaml b/cmd/cli/docs/reference/docker_model_configure.yaml index 728af82f..f82527c9 100644 --- a/cmd/cli/docs/reference/docker_model_configure.yaml +++ b/cmd/cli/docs/reference/docker_model_configure.yaml @@ -1,7 +1,7 @@ command: docker model configure short: Configure runtime options for a model long: Configure runtime options for a model -usage: docker model configure [--context-size=] [--speculative-draft-model=] [--hf_overrides=] [--reasoning-budget=] MODEL +usage: docker model configure [--context-size=] [--speculative-draft-model=] [--hf_overrides=] [--mode=] [--think] MODEL pname: docker model plink: docker_model.yaml options: @@ -23,9 +23,9 @@ options: experimentalcli: false kubernetes: false swarm: false - - option: reasoning-budget - value_type: int32 - description: reasoning budget for reasoning models - llama.cpp only + - option: mode + value_type: string + description: backend operation mode (completion, embedding, reranking) deprecated: false hidden: false experimental: false @@ -61,6 +61,15 @@ options: experimentalcli: false kubernetes: false swarm: false + - option: think + value_type: bool + description: enable reasoning mode for thinking models + deprecated: false + hidden: false + experimental: false + experimentalcli: false + kubernetes: false + swarm: false deprecated: false hidden: true experimental: false diff --git a/pkg/inference/backends/llamacpp/llamacpp_config_test.go b/pkg/inference/backends/llamacpp/llamacpp_config_test.go index 5d04ad3e..b67939df 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config_test.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config_test.go @@ -239,38 +239,38 @@ func TestGetArgs(t *testing.T) { ), }, { - name: "reasoning budget from backend config", + name: "reasoning budget enabled (-1 unlimited)", mode: inference.BackendModeCompletion, bundle: &fakeBundle{ ggufPath: modelPath, }, config: &inference.BackendConfiguration{ LlamaCpp: &inference.LlamaCppConfig{ - ReasoningBudget: int32ptr(1024), + ReasoningBudget: int32ptr(-1), }, }, expected: append(slices.Clone(baseArgs), "--model", modelPath, "--host", socket, - "--reasoning-budget", "1024", + "--reasoning-budget", "-1", "--jinja", ), }, { - name: "reasoning budget with negative value (unlimited)", + name: "reasoning budget disabled (0)", mode: inference.BackendModeCompletion, bundle: &fakeBundle{ ggufPath: modelPath, }, config: &inference.BackendConfiguration{ LlamaCpp: &inference.LlamaCppConfig{ - ReasoningBudget: int32ptr(-1), + ReasoningBudget: int32ptr(0), }, }, expected: append(slices.Clone(baseArgs), "--model", modelPath, "--host", socket, - "--reasoning-budget", "-1", + "--reasoning-budget", "0", "--jinja", ), }, diff --git a/pkg/ollama/http_handler.go b/pkg/ollama/http_handler.go index 3e4a6ab6..3dd31040 100644 --- a/pkg/ollama/http_handler.go +++ b/pkg/ollama/http_handler.go @@ -24,17 +24,6 @@ const ( reasoningBudgetUnlimited int32 = -1 // reasoningBudgetDisabled disables reasoning (0 tokens) reasoningBudgetDisabled int32 = 0 - // reasoningBudgetMedium represents a medium reasoning budget (1024 tokens) - reasoningBudgetMedium int32 = 1024 - // reasoningBudgetLow represents a low reasoning budget (256 tokens) - reasoningBudgetLow int32 = 256 -) - -// Reasoning level string constants for the think parameter -const ( - reasoningLevelHigh = "high" - reasoningLevelMedium = "medium" - reasoningLevelLow = "low" ) // HTTPHandler implements the Ollama API compatibility layer @@ -775,10 +764,10 @@ func convertMessages(messages []Message) []map[string]interface{} { } // convertThinkToReasoningBudget converts the Ollama 'think' parameter to llama.cpp's 'reasoning_budget'. -// The think parameter can be: -// - bool: true (unlimited reasoning, -1) or false (no reasoning, 0) -// - string: "high" (-1, unlimited), "medium" (1024 tokens), "low" (256 tokens) -// Returns nil if think is nil or invalid, otherwise returns a pointer to the reasoning_budget value. +// The think parameter must be a boolean: +// - true: unlimited reasoning (-1) +// - false: reasoning disabled (0) +// Returns nil if think is nil or not a boolean. func convertThinkToReasoningBudget(think interface{}) *int32 { if think == nil { return nil @@ -787,26 +776,16 @@ func convertThinkToReasoningBudget(think interface{}) *int32 { // Helper to create a pointer to an int32 value ptr := func(v int32) *int32 { return &v } - switch v := think.(type) { - case bool: + // Only accept boolean values + if v, ok := think.(bool); ok { if v { return ptr(reasoningBudgetUnlimited) } return ptr(reasoningBudgetDisabled) - case string: - switch strings.ToLower(v) { - case reasoningLevelHigh: - return ptr(reasoningBudgetUnlimited) - case reasoningLevelMedium: - return ptr(reasoningBudgetMedium) - case reasoningLevelLow: - return ptr(reasoningBudgetLow) - default: - return nil // Invalid string value - } - default: - return nil // Invalid type } + + // Invalid type - return nil + return nil } // convertToInt32 converts various numeric types to int32