diff --git a/cmd/cli/commands/compose.go b/cmd/cli/commands/compose.go
index 5f3169a5..21386296 100644
--- a/cmd/cli/commands/compose.go
+++ b/cmd/cli/commands/compose.go
@@ -34,19 +34,6 @@ func newComposeCmd() *cobra.Command {
 	return c
 }
 
-// Reasoning budget constants for the think parameter conversion
-const (
-	reasoningBudgetUnlimited int32 = -1
-	reasoningBudgetDisabled  int32 = 0
-	reasoningBudgetMedium    int32 = 1024
-	reasoningBudgetLow       int32 = 256
-)
-
-// ptr is a helper function to create a pointer to int32
-func ptr(v int32) *int32 {
-	return &v
-}
-
 func newUpCommand() *cobra.Command {
 	var models []string
 	var ctxSize int64
@@ -54,8 +41,6 @@ func newUpCommand() *cobra.Command {
 	var draftModel string
 	var numTokens int
 	var minAcceptanceRate float64
-	var mode string
-	var think string
 	c := &cobra.Command{
 		Use: "up",
 		RunE: func(cmd *cobra.Command, args []string) error {
@@ -81,7 +66,7 @@ func newUpCommand() *cobra.Command {
 				return err
 			}
 
-			if cmd.Flags().Changed("context-size") {
+			if ctxSize > 0 {
 				sendInfo(fmt.Sprintf("Setting context size to %d", ctxSize))
 			}
 
@@ -96,52 +81,14 @@ func newUpCommand() *cobra.Command {
 				sendInfo(fmt.Sprintf("Enabling speculative decoding with draft model: %s", draftModel))
 			}
 
-			// Parse mode if provided
-			var backendMode *inference.BackendMode
-			if mode != "" {
-				parsedMode, err := parseBackendMode(mode)
-				if err != nil {
-					_ = sendError(err.Error())
-					return err
-				}
-				backendMode = &parsedMode
-				sendInfo(fmt.Sprintf("Setting backend mode to %s", mode))
-			}
-
-			// Parse think parameter for reasoning budget
-			var reasoningBudget *int32
-			if think != "" {
-				budget, err := parseThinkToReasoningBudget(think)
-				if err != nil {
-					_ = sendError(err.Error())
-					return err
-				}
-				reasoningBudget = budget
-				sendInfo(fmt.Sprintf("Setting think mode to %s", think))
-			}
-
 			for _, model := range models {
-				configuration := inference.BackendConfiguration{
-					Speculative: speculativeConfig,
-				}
-				if cmd.Flags().Changed("context-size") {
-					// TODO is the context size the same for all models?
-					v := int32(ctxSize)
-					configuration.ContextSize = &v
-				}
-
-				// Set llama.cpp-specific reasoning budget if provided
-				if reasoningBudget != nil {
-					if configuration.LlamaCpp == nil {
-						configuration.LlamaCpp = &inference.LlamaCppConfig{}
-					}
-					configuration.LlamaCpp.ReasoningBudget = reasoningBudget
-				}
-
+				size := int32(ctxSize)
 				if err := desktopClient.ConfigureBackend(scheduling.ConfigureRequest{
-					Model:                model,
-					Mode:                 backendMode,
-					BackendConfiguration: configuration,
+					Model: model,
+					BackendConfiguration: inference.BackendConfiguration{
+						ContextSize: &size,
+						Speculative: speculativeConfig,
+					},
 				}); err != nil {
 					configErrFmtString := "failed to configure backend for model %s with context-size %d"
 					_ = sendErrorf(configErrFmtString+": %v", model, ctxSize, err)
@@ -171,57 +118,10 @@ func newUpCommand() *cobra.Command {
 	c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
 	c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")
 	c.Flags().Float64Var(&minAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding")
-	c.Flags().StringVar(&mode, "mode", "", "backend operation mode (completion, embedding, reranking)")
-	c.Flags().StringVar(&think, "think", "", "enable reasoning mode for thinking models (true/false/high/medium/low)")
 	_ = c.MarkFlagRequired("model")
 	return c
 }
 
-// parseBackendMode parses a string mode value into an inference.BackendMode.
-func parseBackendMode(mode string) (inference.BackendMode, error) {
-	switch strings.ToLower(mode) {
-	case "completion":
-		return inference.BackendModeCompletion, nil
-	case "embedding":
-		return inference.BackendModeEmbedding, nil
-	case "reranking":
-		return inference.BackendModeReranking, nil
-	default:
-		return inference.BackendModeCompletion, fmt.Errorf("invalid mode %q: must be one of completion, embedding, reranking", mode)
-	}
-}
-
-// parseThinkToReasoningBudget converts the think parameter string to a reasoning budget value.
-// Accepts: "true", "false", "high", "medium", "low"
-// Returns:
-//   - nil for empty string or "true" (use server default, which is unlimited)
-//   - -1 for "high" (explicitly set unlimited)
-//   - 0 for "false" (disable thinking)
-//   - 1024 for "medium"
-//   - 256 for "low"
-func parseThinkToReasoningBudget(think string) (*int32, error) {
-	if think == "" {
-		return nil, nil
-	}
-
-	switch strings.ToLower(think) {
-	case "true":
-		// Use nil to let the server use its default (currently unlimited)
-		return nil, nil
-	case "high":
-		// Explicitly set unlimited reasoning budget
-		return ptr(reasoningBudgetUnlimited), nil
-	case "false":
-		return ptr(reasoningBudgetDisabled), nil
-	case "medium":
-		return ptr(reasoningBudgetMedium), nil
-	case "low":
-		return ptr(reasoningBudgetLow), nil
-	default:
-		return nil, fmt.Errorf("invalid think value %q: must be one of true, false, high, medium, low", think)
-	}
-}
-
 func newDownCommand() *cobra.Command {
 	c := &cobra.Command{
 		Use: "down",
diff --git a/cmd/cli/commands/compose_test.go b/cmd/cli/commands/compose_test.go
index 5d2de01b..d4a8b59e 100644
--- a/cmd/cli/commands/compose_test.go
+++ b/cmd/cli/commands/compose_test.go
@@ -71,84 +71,3 @@ func TestParseBackendMode(t *testing.T) {
 		})
 	}
 }
-
-func TestParseThinkToReasoningBudget(t *testing.T) {
-	tests := []struct {
-		name        string
-		input       string
-		expected    *int32
-		expectError bool
-	}{
-		{
-			name:        "empty string returns nil",
-			input:       "",
-			expected:    nil,
-			expectError: false,
-		},
-		{
-			name:        "true returns nil (use server default)",
-			input:       "true",
-			expected:    nil,
-			expectError: false,
-		},
-		{
-			name:        "TRUE returns nil (case insensitive)",
-			input:       "TRUE",
-			expected:    nil,
-			expectError: false,
-		},
-		{
-			name:        "false disables reasoning",
-			input:       "false",
-			expected:    ptr(reasoningBudgetDisabled),
-			expectError: false,
-		},
-		{
-			name:        "high explicitly sets unlimited (-1)",
-			input:       "high",
-			expected:    ptr(reasoningBudgetUnlimited),
-			expectError: false,
-		},
-		{
-			name:        "medium sets 1024 tokens",
-			input:       "medium",
-			expected:    ptr(reasoningBudgetMedium),
-			expectError: false,
-		},
-		{
-			name:        "low sets 256 tokens",
-			input:       "low",
-			expected:    ptr(reasoningBudgetLow),
-			expectError: false,
-		},
-		{
-			name:        "invalid value returns error",
-			input:       "invalid",
-			expected:    nil,
-			expectError: true,
-		},
-		{
-			name:        "numeric string returns error",
-			input:       "1024",
-			expected:    nil,
-			expectError: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := parseThinkToReasoningBudget(tt.input)
-			if tt.expectError {
-				require.Error(t, err)
-			} else {
-				require.NoError(t, err)
-				if tt.expected == nil {
-					assert.Nil(t, result)
-				} else {
-					require.NotNil(t, result)
-					assert.Equal(t, *tt.expected, *result)
-				}
-			}
-		})
-	}
-}
diff --git a/cmd/cli/commands/configure.go b/cmd/cli/commands/configure.go
index debdd59e..6c5da30d 100644
--- a/cmd/cli/commands/configure.go
+++ b/cmd/cli/commands/configure.go
@@ -1,59 +1,17 @@
 package commands
 
 import (
-	"encoding/json"
 	"fmt"
-	"strconv"
 
 	"github.com/docker/model-runner/cmd/cli/commands/completion"
-	"github.com/docker/model-runner/pkg/inference"
-
-	"github.com/docker/model-runner/pkg/inference/scheduling"
 	"github.com/spf13/cobra"
 )
 
-// Int32PtrValue implements pflag.Value interface for *int32 pointers
-// This allows flags to have a nil default value instead of 0
-type Int32PtrValue struct {
-	ptr **int32
-}
-
-func NewInt32PtrValue(p **int32) *Int32PtrValue {
-	return &Int32PtrValue{ptr: p}
-}
-
-func (v *Int32PtrValue) String() string {
-	if v.ptr == nil || *v.ptr == nil {
-		return ""
-	}
-	return strconv.FormatInt(int64(**v.ptr), 10)
-}
-
-func (v *Int32PtrValue) Set(s string) error {
-	val, err := strconv.ParseInt(s, 10, 32)
-	if err != nil {
-		return err
-	}
-	i32 := int32(val)
-	*v.ptr = &i32
-	return nil
-}
-
-func (v *Int32PtrValue) Type() string {
-	return "int32"
-}
-
 func newConfigureCmd() *cobra.Command {
-	var opts scheduling.ConfigureRequest
-	var draftModel string
-	var numTokens int
-	var minAcceptanceRate float64
-	var hfOverrides string
-	var contextSize *int32
-	var reasoningBudget *int32
+	var flags ConfigureFlags
 
 	c := &cobra.Command{
-		Use:    "configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--reasoning-budget=<n>] MODEL",
+		Use:    "configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--mode=<mode>] [--think] MODEL",
 		Short:  "Configure runtime options for a model",
 		Hidden: true,
 		Args: func(cmd *cobra.Command, args []string) error {
@@ -63,53 +21,19 @@ func newConfigureCmd() *cobra.Command {
 						"See 'docker model configure --help' for more information",
 					len(args), args)
 			}
-			opts.Model = args[0]
 			return nil
 		},
 		RunE: func(cmd *cobra.Command, args []string) error {
-			// contextSize is nil by default, only set if user provided the flag
-			opts.ContextSize = contextSize
-			// Build the speculative config if any speculative flags are set
-			if draftModel != "" || numTokens > 0 || minAcceptanceRate > 0 {
-				opts.Speculative = &inference.SpeculativeDecodingConfig{
-					DraftModel:        draftModel,
-					NumTokens:         numTokens,
-					MinAcceptanceRate: minAcceptanceRate,
-				}
-			}
-			// Parse and validate HuggingFace overrides if provided (vLLM-specific)
-			if hfOverrides != "" {
-				var hfo inference.HFOverrides
-				if err := json.Unmarshal([]byte(hfOverrides), &hfo); err != nil {
-					return fmt.Errorf("invalid --hf_overrides JSON: %w", err)
-				}
-				// Validate the overrides to prevent command injection
-				if err := hfo.Validate(); err != nil {
-					return err
-				}
-				if opts.VLLM == nil {
-					opts.VLLM = &inference.VLLMConfig{}
-				}
-				opts.VLLM.HFOverrides = hfo
-			}
-			// Set llama.cpp-specific reasoning budget if provided
-			// reasoningBudget is nil by default, only set if user provided the flag
-			if reasoningBudget != nil {
-				if opts.LlamaCpp == nil {
-					opts.LlamaCpp = &inference.LlamaCppConfig{}
-				}
-				opts.LlamaCpp.ReasoningBudget = reasoningBudget
+			model := args[0]
+			opts, err := flags.BuildConfigureRequest(model)
+			if err != nil {
+				return err
 			}
 			return desktopClient.ConfigureBackend(opts)
 		},
 		ValidArgsFunction: completion.ModelNames(getDesktopClient, -1),
 	}
 
-	c.Flags().Var(NewInt32PtrValue(&contextSize), "context-size", "context size (in tokens)")
-	c.Flags().StringVar(&draftModel, "speculative-draft-model", "", "draft model for speculative decoding")
-	c.Flags().IntVar(&numTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")
-	c.Flags().Float64Var(&minAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding")
-	c.Flags().StringVar(&hfOverrides, "hf_overrides", "", "HuggingFace model config overrides (JSON) - vLLM only")
-	c.Flags().Var(NewInt32PtrValue(&reasoningBudget), "reasoning-budget", "reasoning budget for reasoning models - llama.cpp only")
+	flags.RegisterFlags(c)
 	return c
 }
diff --git a/cmd/cli/commands/configure_flags.go b/cmd/cli/commands/configure_flags.go
new file mode 100644
index 00000000..abda4ffc
--- /dev/null
+++ b/cmd/cli/commands/configure_flags.go
@@ -0,0 +1,204 @@
+package commands
+
+import (
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/docker/model-runner/pkg/inference"
+	"github.com/docker/model-runner/pkg/inference/scheduling"
+	"github.com/spf13/cobra"
+)
+
+// Reasoning budget constants for the think parameter conversion
+const (
+	reasoningBudgetUnlimited int32 = -1
+	reasoningBudgetDisabled  int32 = 0
+)
+
+// Int32PtrValue implements pflag.Value interface for *int32 pointers
+// This allows flags to have a nil default value instead of 0
+type Int32PtrValue struct {
+	ptr **int32
+}
+
+// NewInt32PtrValue creates a new Int32PtrValue for the given pointer
+func NewInt32PtrValue(p **int32) *Int32PtrValue {
+	return &Int32PtrValue{ptr: p}
+}
+
+func (v *Int32PtrValue) String() string {
+	if v.ptr == nil || *v.ptr == nil {
+		return ""
+	}
+	return strconv.FormatInt(int64(**v.ptr), 10)
+}
+
+func (v *Int32PtrValue) Set(s string) error {
+	val, err := strconv.ParseInt(s, 10, 32)
+	if err != nil {
+		return err
+	}
+	i32 := int32(val)
+	*v.ptr = &i32
+	return nil
+}
+
+func (v *Int32PtrValue) Type() string {
+	return "int32"
+}
+
+// BoolPtrValue implements pflag.Value interface for *bool pointers
+// This allows flags to have a nil default value to detect if explicitly set
+type BoolPtrValue struct {
+	ptr **bool
+}
+
+// NewBoolPtrValue creates a new BoolPtrValue for the given pointer
+func NewBoolPtrValue(p **bool) *BoolPtrValue {
+	return &BoolPtrValue{ptr: p}
+}
+
+func (v *BoolPtrValue) String() string {
+	if v.ptr == nil || *v.ptr == nil {
+		return ""
+	}
+	return strconv.FormatBool(**v.ptr)
+}
+
+func (v *BoolPtrValue) Set(s string) error {
+	val, err := strconv.ParseBool(s)
+	if err != nil {
+		return err
+	}
+	*v.ptr = &val
+	return nil
+}
+
+func (v *BoolPtrValue) Type() string {
+	return "bool"
+}
+
+func (v *BoolPtrValue) IsBoolFlag() bool {
+	return true
+}
+
+// ptr is a helper function to create a pointer to int32
+func ptr(v int32) *int32 {
+	return &v
+}
+
+// ConfigureFlags holds all the flags for configuring a model backend
+type ConfigureFlags struct {
+	// Backend mode (completion, embedding, reranking)
+	Mode string
+	// ContextSize is the context size in tokens
+	ContextSize *int32
+	// Speculative decoding flags
+	DraftModel        string
+	NumTokens         int
+	MinAcceptanceRate float64
+	// vLLM-specific flags
+	HFOverrides string
+	// Think parameter for reasoning models
+	Think *bool
+}
+
+// RegisterFlags registers all configuration flags on the given cobra command.
+func (f *ConfigureFlags) RegisterFlags(cmd *cobra.Command) {
+	cmd.Flags().Var(NewInt32PtrValue(&f.ContextSize), "context-size", "context size (in tokens)")
+	cmd.Flags().StringVar(&f.DraftModel, "speculative-draft-model", "", "draft model for speculative decoding")
+	cmd.Flags().IntVar(&f.NumTokens, "speculative-num-tokens", 0, "number of tokens to predict speculatively")
+	cmd.Flags().Float64Var(&f.MinAcceptanceRate, "speculative-min-acceptance-rate", 0, "minimum acceptance rate for speculative decoding")
+	cmd.Flags().StringVar(&f.HFOverrides, "hf_overrides", "", "HuggingFace model config overrides (JSON) - vLLM only")
+	cmd.Flags().Var(NewBoolPtrValue(&f.Think), "think", "enable reasoning mode for thinking models")
+	cmd.Flags().StringVar(&f.Mode, "mode", "", "backend operation mode (completion, embedding, reranking)")
+}
+
+// BuildConfigureRequest builds a scheduling.ConfigureRequest from the flags.
+// The model parameter is the model name to configure.
+func (f *ConfigureFlags) BuildConfigureRequest(model string) (scheduling.ConfigureRequest, error) {
+	req := scheduling.ConfigureRequest{
+		Model: model,
+	}
+
+	// Set context size
+	req.ContextSize = f.ContextSize
+
+	// Build speculative config if any speculative flags are set
+	if f.DraftModel != "" || f.NumTokens > 0 || f.MinAcceptanceRate > 0 {
+		req.Speculative = &inference.SpeculativeDecodingConfig{
+			DraftModel:        f.DraftModel,
+			NumTokens:         f.NumTokens,
+			MinAcceptanceRate: f.MinAcceptanceRate,
+		}
+	}
+
+	// Parse and validate HuggingFace overrides if provided (vLLM-specific)
+	if f.HFOverrides != "" {
+		var hfo inference.HFOverrides
+		if err := json.Unmarshal([]byte(f.HFOverrides), &hfo); err != nil {
+			return req, fmt.Errorf("invalid --hf_overrides JSON: %w", err)
+		}
+		// Validate the overrides to prevent command injection
+		if err := hfo.Validate(); err != nil {
+			return req, err
+		}
+		if req.VLLM == nil {
+			req.VLLM = &inference.VLLMConfig{}
+		}
+		req.VLLM.HFOverrides = hfo
+	}
+
+	// Set reasoning budget from --think flag
+	reasoningBudget := f.getReasoningBudget()
+	if reasoningBudget != nil {
+		if req.LlamaCpp == nil {
+			req.LlamaCpp = &inference.LlamaCppConfig{}
+		}
+		req.LlamaCpp.ReasoningBudget = reasoningBudget
+	}
+
+	// Parse mode if provided
+	if f.Mode != "" {
+		parsedMode, err := parseBackendMode(f.Mode)
+		if err != nil {
+			return req, err
+		}
+		req.Mode = &parsedMode
+	}
+
+	return req, nil
+}
+
+// getReasoningBudget determines the reasoning budget from the --think flag.
+// Returns nil if flag not set
+// Returns -1 (unlimited) when --think or --think=true.
+// Returns 0 (disabled) when --think=false.
+func (f *ConfigureFlags) getReasoningBudget() *int32 {
+	// If Think is nil, flag was not set - don't configure
+	if f.Think == nil {
+		return nil
+	}
+	// If explicitly set to true, enable reasoning (unlimited)
+	if *f.Think {
+		return ptr(reasoningBudgetUnlimited) // -1: reasoning enabled (unlimited)
+	}
+	// If explicitly set to false, disable reasoning
+	return ptr(reasoningBudgetDisabled) // 0: reasoning disabled
+}
+
+// parseBackendMode parses a string mode value into an inference.BackendMode.
+func parseBackendMode(mode string) (inference.BackendMode, error) {
+	switch strings.ToLower(mode) {
+	case "completion":
+		return inference.BackendModeCompletion, nil
+	case "embedding":
+		return inference.BackendModeEmbedding, nil
+	case "reranking":
+		return inference.BackendModeReranking, nil
+	default:
+		return inference.BackendModeCompletion, fmt.Errorf("invalid mode %q: must be one of completion, embedding, reranking", mode)
+	}
+}
diff --git a/cmd/cli/commands/configure_test.go b/cmd/cli/commands/configure_test.go
index c43e5dac..99c238e4 100644
--- a/cmd/cli/commands/configure_test.go
+++ b/cmd/cli/commands/configure_test.go
@@ -4,89 +4,6 @@ import (
 	"testing"
 )
 
-func TestConfigureCmdReasoningBudgetFlag(t *testing.T) {
-	// Create the configure command
-	cmd := newConfigureCmd()
-
-	// Verify the --reasoning-budget flag exists
-	reasoningBudgetFlag := cmd.Flags().Lookup("reasoning-budget")
-	if reasoningBudgetFlag == nil {
-		t.Fatal("--reasoning-budget flag not found")
-	}
-
-	// Verify the default value is empty (nil pointer)
-	if reasoningBudgetFlag.DefValue != "" {
-		t.Errorf("Expected default reasoning-budget value to be '' (nil), got '%s'", reasoningBudgetFlag.DefValue)
-	}
-
-	// Verify the flag type
-	if reasoningBudgetFlag.Value.Type() != "int32" {
-		t.Errorf("Expected reasoning-budget flag type to be 'int32', got '%s'", reasoningBudgetFlag.Value.Type())
-	}
-}
-
-func TestConfigureCmdReasoningBudgetFlagChanged(t *testing.T) {
-	tests := []struct {
-		name          string
-		setValue      string
-		expectChanged bool
-		expectedValue string
-	}{
-		{
-			name:          "flag not set - should not be changed",
-			setValue:      "",
-			expectChanged: false,
-			expectedValue: "",
-		},
-		{
-			name:          "flag set to 0 (disable reasoning) - should be changed",
-			setValue:      "0",
-			expectChanged: true,
-			expectedValue: "0",
-		},
-		{
-			name:          "flag set to -1 (unlimited) - should be changed",
-			setValue:      "-1",
-			expectChanged: true,
-			expectedValue: "-1",
-		},
-		{
-			name:          "flag set to positive value - should be changed",
-			setValue:      "1024",
-			expectChanged: true,
-			expectedValue: "1024",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Create a fresh configure command for each test
-			cmd := newConfigureCmd()
-
-			// Only set the flag if setValue is not empty
-			if tt.setValue != "" {
-				err := cmd.Flags().Set("reasoning-budget", tt.setValue)
-				if err != nil {
-					t.Fatalf("Failed to set reasoning-budget flag: %v", err)
-				}
-			}
-
-			// Check if the flag was marked as changed
-			isChanged := cmd.Flags().Changed("reasoning-budget")
-			if isChanged != tt.expectChanged {
-				t.Errorf("Expected Changed() = %v, got %v", tt.expectChanged, isChanged)
-			}
-
-			// Verify the value using String() method
-			flag := cmd.Flags().Lookup("reasoning-budget")
-			value := flag.Value.String()
-			if value != tt.expectedValue {
-				t.Errorf("Expected value = %s, got %s", tt.expectedValue, value)
-			}
-		})
-	}
-}
-
 func TestConfigureCmdHfOverridesFlag(t *testing.T) {
 	// Create the configure command
 	cmd := newConfigureCmd()
@@ -157,3 +74,114 @@ func TestConfigureCmdSpeculativeFlags(t *testing.T) {
 		t.Fatal("--speculative-min-acceptance-rate flag not found")
 	}
 }
+
+func TestConfigureCmdModeFlag(t *testing.T) {
+	// Create the configure command
+	cmd := newConfigureCmd()
+
+	// Verify the --mode flag exists
+	modeFlag := cmd.Flags().Lookup("mode")
+	if modeFlag == nil {
+		t.Fatal("--mode flag not found")
+	}
+
+	// Verify the default value is empty
+	if modeFlag.DefValue != "" {
+		t.Errorf("Expected default mode value to be empty, got '%s'", modeFlag.DefValue)
+	}
+
+	// Verify the flag type
+	if modeFlag.Value.Type() != "string" {
+		t.Errorf("Expected mode flag type to be 'string', got '%s'", modeFlag.Value.Type())
+	}
+}
+
+func TestConfigureCmdThinkFlag(t *testing.T) {
+	// Create the configure command
+	cmd := newConfigureCmd()
+
+	// Verify the --think flag exists
+	thinkFlag := cmd.Flags().Lookup("think")
+	if thinkFlag == nil {
+		t.Fatal("--think flag not found")
+	}
+
+	// Verify the default value is empty
+	if thinkFlag.DefValue != "" {
+		t.Errorf("Expected default think value to be empty (nil), got '%s'", thinkFlag.DefValue)
+	}
+
+	// Verify the flag type
+	if thinkFlag.Value.Type() != "bool" {
+		t.Errorf("Expected think flag type to be 'bool', got '%s'", thinkFlag.Value.Type())
+	}
+
+	// Test setting the flag to true
+	err := cmd.Flags().Set("think", "true")
+	if err != nil {
+		t.Errorf("Failed to set think flag to true: %v", err)
+	}
+
+	// Verify the value was set
+	if thinkFlag.Value.String() != "true" {
+		t.Errorf("Expected think flag value to be 'true', got '%s'", thinkFlag.Value.String())
+	}
+}
+
+func TestThinkFlagBehavior(t *testing.T) {
+	// Helper to create bool pointer
+	boolPtr := func(b bool) *bool { return &b }
+
+	tests := []struct {
+		name           string
+		thinkValue     *bool
+		expectBudget   bool
+		expectedBudget int32
+	}{
+		{
+			name:         "default - not set (nil)",
+			thinkValue:   nil,
+			expectBudget: false,
+		},
+		{
+			name:           "explicitly set to true (--think)",
+			thinkValue:     boolPtr(true),
+			expectBudget:   true,
+			expectedBudget: -1,
+		},
+		{
+			name:           "explicitly set to false (--think=false)",
+			thinkValue:     boolPtr(false),
+			expectBudget:   true,
+			expectedBudget: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			flags := ConfigureFlags{
+				Think: tt.thinkValue,
+			}
+
+			req, err := flags.BuildConfigureRequest("test-model")
+			if err != nil {
+				t.Fatalf("Unexpected error: %v", err)
+			}
+
+			if tt.expectBudget {
+				// Reasoning budget should be set
+				if req.LlamaCpp == nil || req.LlamaCpp.ReasoningBudget == nil {
+					t.Fatal("Expected reasoning budget to be set")
+				}
+				if *req.LlamaCpp.ReasoningBudget != tt.expectedBudget {
+					t.Errorf("Expected reasoning budget to be %d, got %d", tt.expectedBudget, *req.LlamaCpp.ReasoningBudget)
+				}
+			} else {
+				// Reasoning budget should NOT be set
+				if req.LlamaCpp != nil && req.LlamaCpp.ReasoningBudget != nil {
+					t.Errorf("Expected reasoning budget to be nil when not set, got %d", *req.LlamaCpp.ReasoningBudget)
+				}
+			}
+		})
+	}
+}
diff --git a/cmd/cli/docs/reference/docker_model_compose_up.yaml b/cmd/cli/docs/reference/docker_model_compose_up.yaml
index 70f72e77..9a0bf1b3 100644
--- a/cmd/cli/docs/reference/docker_model_compose_up.yaml
+++ b/cmd/cli/docs/reference/docker_model_compose_up.yaml
@@ -23,15 +23,6 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
-    - option: mode
-      value_type: string
-      description: backend operation mode (completion, embedding, reranking)
-      deprecated: false
-      hidden: false
-      experimental: false
-      experimentalcli: false
-      kubernetes: false
-      swarm: false
     - option: model
       value_type: stringArray
       default_value: '[]'
@@ -71,16 +62,6 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
-    - option: think
-      value_type: string
-      description: |
-        enable reasoning mode for thinking models (true/false/high/medium/low)
-      deprecated: false
-      hidden: false
-      experimental: false
-      experimentalcli: false
-      kubernetes: false
-      swarm: false
 inherited_options:
     - option: project-name
       value_type: string
diff --git a/cmd/cli/docs/reference/docker_model_configure.yaml b/cmd/cli/docs/reference/docker_model_configure.yaml
index 728af82f..f82527c9 100644
--- a/cmd/cli/docs/reference/docker_model_configure.yaml
+++ b/cmd/cli/docs/reference/docker_model_configure.yaml
@@ -1,7 +1,7 @@
 command: docker model configure
 short: Configure runtime options for a model
 long: Configure runtime options for a model
-usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--reasoning-budget=<n>] MODEL
+usage: docker model configure [--context-size=<n>] [--speculative-draft-model=<model>] [--hf_overrides=<json>] [--mode=<mode>] [--think] MODEL
 pname: docker model
 plink: docker_model.yaml
 options:
@@ -23,9 +23,9 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
-    - option: reasoning-budget
-      value_type: int32
-      description: reasoning budget for reasoning models - llama.cpp only
+    - option: mode
+      value_type: string
+      description: backend operation mode (completion, embedding, reranking)
       deprecated: false
       hidden: false
       experimental: false
@@ -61,6 +61,15 @@ options:
       experimentalcli: false
       kubernetes: false
       swarm: false
+    - option: think
+      value_type: bool
+      description: enable reasoning mode for thinking models
+      deprecated: false
+      hidden: false
+      experimental: false
+      experimentalcli: false
+      kubernetes: false
+      swarm: false
 deprecated: false
 hidden: true
 experimental: false
diff --git a/pkg/inference/backends/llamacpp/llamacpp_config_test.go b/pkg/inference/backends/llamacpp/llamacpp_config_test.go
index 5d04ad3e..b67939df 100644
--- a/pkg/inference/backends/llamacpp/llamacpp_config_test.go
+++ b/pkg/inference/backends/llamacpp/llamacpp_config_test.go
@@ -239,38 +239,38 @@ func TestGetArgs(t *testing.T) {
 			),
 		},
 		{
-			name: "reasoning budget from backend config",
+			name: "reasoning budget enabled (-1 unlimited)",
 			mode: inference.BackendModeCompletion,
 			bundle: &fakeBundle{
 				ggufPath: modelPath,
 			},
 			config: &inference.BackendConfiguration{
 				LlamaCpp: &inference.LlamaCppConfig{
-					ReasoningBudget: int32ptr(1024),
+					ReasoningBudget: int32ptr(-1),
 				},
 			},
 			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
-				"--reasoning-budget", "1024",
+				"--reasoning-budget", "-1",
 				"--jinja",
 			),
 		},
 		{
-			name: "reasoning budget with negative value (unlimited)",
+			name: "reasoning budget disabled (0)",
 			mode: inference.BackendModeCompletion,
 			bundle: &fakeBundle{
 				ggufPath: modelPath,
 			},
 			config: &inference.BackendConfiguration{
 				LlamaCpp: &inference.LlamaCppConfig{
-					ReasoningBudget: int32ptr(-1),
+					ReasoningBudget: int32ptr(0),
 				},
 			},
 			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
-				"--reasoning-budget", "-1",
+				"--reasoning-budget", "0",
 				"--jinja",
 			),
 		},
diff --git a/pkg/ollama/http_handler.go b/pkg/ollama/http_handler.go
index 3e4a6ab6..3dd31040 100644
--- a/pkg/ollama/http_handler.go
+++ b/pkg/ollama/http_handler.go
@@ -24,17 +24,6 @@ const (
 	reasoningBudgetUnlimited int32 = -1
 	// reasoningBudgetDisabled disables reasoning (0 tokens)
 	reasoningBudgetDisabled int32 = 0
-	// reasoningBudgetMedium represents a medium reasoning budget (1024 tokens)
-	reasoningBudgetMedium int32 = 1024
-	// reasoningBudgetLow represents a low reasoning budget (256 tokens)
-	reasoningBudgetLow int32 = 256
-)
-
-// Reasoning level string constants for the think parameter
-const (
-	reasoningLevelHigh   = "high"
-	reasoningLevelMedium = "medium"
-	reasoningLevelLow    = "low"
 )
 
 // HTTPHandler implements the Ollama API compatibility layer
@@ -775,10 +764,10 @@ func convertMessages(messages []Message) []map[string]interface{} {
 }
 
 // convertThinkToReasoningBudget converts the Ollama 'think' parameter to llama.cpp's 'reasoning_budget'.
-// The think parameter can be:
-// - bool: true (unlimited reasoning, -1) or false (no reasoning, 0)
-// - string: "high" (-1, unlimited), "medium" (1024 tokens), "low" (256 tokens)
-// Returns nil if think is nil or invalid, otherwise returns a pointer to the reasoning_budget value.
+// The think parameter must be a boolean:
+// - true: unlimited reasoning (-1)
+// - false: reasoning disabled (0)
+// Returns nil if think is nil or not a boolean.
 func convertThinkToReasoningBudget(think interface{}) *int32 {
 	if think == nil {
 		return nil
@@ -787,26 +776,16 @@ func convertThinkToReasoningBudget(think interface{}) *int32 {
 	// Helper to create a pointer to an int32 value
 	ptr := func(v int32) *int32 { return &v }
 
-	switch v := think.(type) {
-	case bool:
+	// Only accept boolean values
+	if v, ok := think.(bool); ok {
 		if v {
 			return ptr(reasoningBudgetUnlimited)
 		}
 		return ptr(reasoningBudgetDisabled)
-	case string:
-		switch strings.ToLower(v) {
-		case reasoningLevelHigh:
-			return ptr(reasoningBudgetUnlimited)
-		case reasoningLevelMedium:
-			return ptr(reasoningBudgetMedium)
-		case reasoningLevelLow:
-			return ptr(reasoningBudgetLow)
-		default:
-			return nil // Invalid string value
-		}
-	default:
-		return nil // Invalid type
 	}
+
+	// Invalid type - return nil
+	return nil
 }
 
 // convertToInt32 converts various numeric types to int32