docker · ericcurtin · Oct 22, 2025 · Oct 22, 2025
diff --git a/cmd/cli/commands/backend.go b/cmd/cli/commands/backend.go
diff --git a/cmd/cli/commands/list.go b/cmd/cli/commands/list.go
@@ -19,33 +19,19 @@ import (
 
 func newListCmd() *cobra.Command {
 	var jsonFormat, openai, quiet bool
-	var backend string
 	c := &cobra.Command{
 		Use:     "list [OPTIONS]",
 		Aliases: []string{"ls"},
 		Short:   "List the models pulled to your local environment",
 		RunE: func(cmd *cobra.Command, args []string) error {
-			// Validate backend if specified
-			if backend != "" {
-				if err := validateBackend(backend); err != nil {
-					return err
-				}
-			}
-
-			if (backend == "openai" || openai) && quiet {
+			if openai && quiet {
 				return fmt.Errorf("--quiet flag cannot be used with --openai flag or OpenAI backend")
 			}
 
-			// Validate API key for OpenAI backend
-			apiKey, err := ensureAPIKey(backend)
-			if err != nil {
-				return err
-			}
-
 			// If we're doing an automatic install, only show the installation
 			// status if it won't corrupt machine-readable output.
 			var standaloneInstallPrinter standalone.StatusPrinter
-			if !jsonFormat && !openai && !quiet && backend == "" {
+			if !jsonFormat && !openai && !quiet {
 				standaloneInstallPrinter = cmd
 			}
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), standaloneInstallPrinter); err != nil {
@@ -55,7 +41,7 @@ func newListCmd() *cobra.Command {
 			if len(args) > 0 {
 				modelFilter = args[0]
 			}
-			models, err := listModels(openai, backend, desktopClient, quiet, jsonFormat, apiKey, modelFilter)
+			models, err := listModels(openai, desktopClient, quiet, jsonFormat, modelFilter)
 			if err != nil {
 				return err
 			}
@@ -67,14 +53,12 @@ func newListCmd() *cobra.Command {
 	c.Flags().BoolVar(&jsonFormat, "json", false, "List models in a JSON format")
 	c.Flags().BoolVar(&openai, "openai", false, "List models in an OpenAI format")
 	c.Flags().BoolVarP(&quiet, "quiet", "q", false, "Only show model IDs")
-	c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys()))
-	c.Flags().MarkHidden("backend")
 	return c
 }
 
-func listModels(openai bool, backend string, desktopClient *desktop.Client, quiet bool, jsonFormat bool, apiKey string, modelFilter string) (string, error) {
-	if openai || backend == "openai" {
-		models, err := desktopClient.ListOpenAI(backend, apiKey)
+func listModels(openai bool, desktopClient *desktop.Client, quiet bool, jsonFormat bool, modelFilter string) (string, error) {
+	if openai {
+		models, err := desktopClient.ListOpenAI()
 		if err != nil {
 			err = handleClientError(err, "Failed to list models")
 			return "", handleNotRunningError(err)

diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
@@ -87,7 +87,7 @@ func readMultilineInput(cmd *cobra.Command, scanner *bufio.Scanner) (string, err
 }
 
 // generateInteractiveWithReadline provides an enhanced interactive mode with readline support
-func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.Client, backend, model, apiKey string) error {
+func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
 	usage := func() {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /bye            Exit")
@@ -122,7 +122,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 	})
 	if err != nil {
 		// Fall back to basic input mode if readline initialization fails
-		return generateInteractiveBasic(cmd, desktopClient, backend, model, apiKey)
+		return generateInteractiveBasic(cmd, desktopClient, model)
 	}
 
 	// Disable history if the environment variable is set
@@ -221,7 +221,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				}
 			}()
 
-			err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, backend, model, userInput, apiKey)
+			err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
 
 			// Clean up signal handler
 			signal.Stop(sigChan)
@@ -246,7 +246,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 }
 
 // generateInteractiveBasic provides a basic interactive mode (fallback)
-func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, backend, model, apiKey string) error {
+func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
 	scanner := bufio.NewScanner(os.Stdin)
 	for {
 		userInput, err := readMultilineInput(cmd, scanner)
@@ -282,7 +282,7 @@ func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client,
 			}
 		}()
 
-		err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, backend, model, userInput, apiKey)
+		err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
 
 		cancelChat()
 		signal.Stop(sigChan)
@@ -484,12 +484,12 @@ func renderMarkdown(content string) (string, error) {
 }
 
 // chatWithMarkdown performs chat and streams the response with selective markdown rendering.
-func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, backend, model, prompt, apiKey string) error {
-	return chatWithMarkdownContext(cmd.Context(), cmd, client, backend, model, prompt, apiKey)
+func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
+	return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt)
 }
 
 // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering.
-func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, backend, model, prompt, apiKey string) error {
+func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
 	colorMode, _ := cmd.Flags().GetString("color")
 	useMarkdown := shouldUseMarkdown(colorMode)
 	debug, _ := cmd.Flags().GetBool("debug")
@@ -504,15 +504,15 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 
 	if !useMarkdown {
 		// Simple case: just stream as plain text
-		return client.ChatWithContext(ctx, backend, model, prompt, apiKey, imageURLs, func(content string) {
+		return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
 			cmd.Print(content)
 		}, false)
 	}
 
 	// For markdown: use streaming buffer to render code blocks as they complete
 	markdownBuffer := NewStreamingMarkdownBuffer()
 
-	err = client.ChatWithContext(ctx, backend, model, prompt, apiKey, imageURLs, func(content string) {
+	err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
 		// Use the streaming markdown buffer to intelligently render content
 		rendered, err := markdownBuffer.AddContent(content, true)
 		if err != nil {
@@ -539,7 +539,6 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 
 func newRunCmd() *cobra.Command {
 	var debug bool
-	var backend string
 	var ignoreRuntimeMemoryCheck bool
 	var colorMode string
 	var detach bool
@@ -557,19 +556,6 @@ func newRunCmd() *cobra.Command {
 			}
 		},
 		RunE: func(cmd *cobra.Command, args []string) error {
-			// Validate backend if specified
-			if backend != "" {
-				if err := validateBackend(backend); err != nil {
-					return err
-				}
-			}
-
-			// Validate API key for OpenAI backend
-			apiKey, err := ensureAPIKey(backend)
-			if err != nil {
-				return err
-			}
-
 			// Normalize model name to add default org and tag if missing
 			model := models.NormalizeModelName(args[0])
 			prompt := ""
@@ -607,24 +593,21 @@ func newRunCmd() *cobra.Command {
 				return fmt.Errorf("unable to initialize standalone model runner: %w", err)
 			}
 
-			// Do not validate the model in case of using OpenAI's backend, let OpenAI handle it
-			if backend != "openai" {
-				_, err := desktopClient.Inspect(model, false)
-				if err != nil {
-					if !errors.Is(err, desktop.ErrNotFound) {
-						return handleNotRunningError(handleClientError(err, "Failed to inspect model"))
-					}
-					cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.")
-					if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil {
-						return err
-					}
+			_, err := desktopClient.Inspect(model, false)
+			if err != nil {
+				if !errors.Is(err, desktop.ErrNotFound) {
+					return handleNotRunningError(handleClientError(err, "Failed to inspect model"))
+				}
+				cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.")
+				if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil {
+					return err
 				}
 			}
 
 			// Handle --detach flag: just load the model without interaction
 			if detach {
 				// Make a minimal request to load the model into memory
-				err := desktopClient.Chat(backend, model, "", apiKey, nil, func(content string) {
+				err := desktopClient.Chat(model, "", nil, func(content string) {
 					// Silently discard output in detach mode
 				}, false)
 				if err != nil {
@@ -637,7 +620,7 @@ func newRunCmd() *cobra.Command {
 			}
 
 			if prompt != "" {
-				if err := chatWithMarkdown(cmd, desktopClient, backend, model, prompt, apiKey); err != nil {
+				if err := chatWithMarkdown(cmd, desktopClient, model, prompt); err != nil {
 					return handleClientError(err, "Failed to generate a response")
 				}
 				cmd.Println()
@@ -646,11 +629,11 @@ func newRunCmd() *cobra.Command {
 
 			// Use enhanced readline-based interactive mode when terminal is available
 			if term.IsTerminal(int(os.Stdin.Fd())) {
-				return generateInteractiveWithReadline(cmd, desktopClient, backend, model, apiKey)
+				return generateInteractiveWithReadline(cmd, desktopClient, model)
 			}
 
 			// Fall back to basic mode if not a terminal
-			return generateInteractiveBasic(cmd, desktopClient, backend, model, apiKey)
+			return generateInteractiveBasic(cmd, desktopClient, model)
 		},
 		ValidArgsFunction: completion.ModelNames(getDesktopClient, 1),
 	}
@@ -667,8 +650,6 @@ func newRunCmd() *cobra.Command {
 	}
 
 	c.Flags().BoolVar(&debug, "debug", false, "Enable debug logging")
-	c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys()))
-	c.Flags().MarkHidden("backend")
 	c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.")
 	c.Flags().StringVar(&colorMode, "color", "auto", "Use colored output (auto|yes|no)")
 	c.Flags().BoolVarP(&detach, "detach", "d", false, "Load the model in the background without interaction")

diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go
@@ -24,8 +24,6 @@ import (
 	"go.opentelemetry.io/otel"
 )
 
-const DefaultBackend = "llama.cpp"
-
 var (
 	ErrNotFound           = errors.New("model not found")
 	ErrServiceUnavailable = errors.New("service unavailable")
@@ -233,32 +231,18 @@ func (c *Client) List() ([]dmrm.Model, error) {
 	return modelsJson, nil
 }
 
-func (c *Client) ListOpenAI(backend, apiKey string) (dmrm.OpenAIModelList, error) {
-	if backend == "" {
-		backend = DefaultBackend
-	}
-	modelsRoute := fmt.Sprintf("%s/%s/v1/models", inference.InferencePrefix, backend)
-
-	// Use doRequestWithAuth to support API key authentication
-	resp, err := c.doRequestWithAuth(http.MethodGet, modelsRoute, nil, "openai", apiKey)
-	if err != nil {
-		return dmrm.OpenAIModelList{}, c.handleQueryError(err, modelsRoute)
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		return dmrm.OpenAIModelList{}, fmt.Errorf("failed to list models: %s", resp.Status)
-	}
-
-	body, err := io.ReadAll(resp.Body)
+func (c *Client) ListOpenAI() (dmrm.OpenAIModelList, error) {
+	modelsRoute := inference.InferencePrefix + "/v1/models"
+	body, err := c.listRaw(modelsRoute, "")
 	if err != nil {
-		return dmrm.OpenAIModelList{}, fmt.Errorf("failed to read response body: %w", err)
+		return dmrm.OpenAIModelList{}, err
 	}
 
 	var modelsJson dmrm.OpenAIModelList
 	if err := json.Unmarshal(body, &modelsJson); err != nil {
 		return modelsJson, fmt.Errorf("failed to unmarshal response body: %w", err)
 	}
+
 	return modelsJson, nil
 }
 
@@ -357,12 +341,12 @@ func (c *Client) fullModelID(id string) (string, error) {
 }
 
 // Chat performs a chat request and streams the response content with selective markdown rendering.
-func (c *Client) Chat(backend, model, prompt, apiKey string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
-	return c.ChatWithContext(context.Background(), backend, model, prompt, apiKey, imageURLs, outputFunc, shouldUseMarkdown)
+func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
+	return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown)
 }
 
 // ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
-func (c *Client) ChatWithContext(ctx context.Context, backend, model, prompt, apiKey string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
+func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
 	model = dmrm.NormalizeModelName(model)
 	if !strings.Contains(strings.Trim(model, "/"), "/") {
 		// Do an extra API call to check if the model parameter isn't a model ID.
@@ -417,20 +401,13 @@ func (c *Client) ChatWithContext(ctx context.Context, backend, model, prompt, ap
 		return fmt.Errorf("error marshaling request: %w", err)
 	}
 
-	var completionsPath string
-	if backend != "" {
-		completionsPath = inference.InferencePrefix + "/" + backend + "/v1/chat/completions"
-	} else {
-		completionsPath = inference.InferencePrefix + "/v1/chat/completions"
-	}
+	completionsPath := inference.InferencePrefix + "/v1/chat/completions"
 
 	resp, err := c.doRequestWithAuthContext(
 		ctx,
 		http.MethodPost,
 		completionsPath,
 		bytes.NewReader(jsonData),
-		backend,
-		apiKey,
 	)
 	if err != nil {
 		return c.handleQueryError(err, completionsPath)
@@ -785,15 +762,15 @@ func (c *Client) Requests(modelFilter string, streaming bool, includeExisting bo
 
 // doRequest is a helper function that performs HTTP requests and handles 503 responses
 func (c *Client) doRequest(method, path string, body io.Reader) (*http.Response, error) {
-	return c.doRequestWithAuth(method, path, body, "", "")
+	return c.doRequestWithAuth(method, path, body)
 }
 
 // doRequestWithAuth is a helper function that performs HTTP requests with optional authentication
-func (c *Client) doRequestWithAuth(method, path string, body io.Reader, backend, apiKey string) (*http.Response, error) {
-	return c.doRequestWithAuthContext(context.Background(), method, path, body, backend, apiKey)
+func (c *Client) doRequestWithAuth(method, path string, body io.Reader) (*http.Response, error) {
+	return c.doRequestWithAuthContext(context.Background(), method, path, body)
 }
 
-func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path string, body io.Reader, backend, apiKey string) (*http.Response, error) {
+func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
 	req, err := http.NewRequestWithContext(ctx, method, c.modelRunner.URL(path), body)
 	if err != nil {
 		return nil, fmt.Errorf("error creating request: %w", err)
@@ -804,11 +781,6 @@ func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path stri
 
 	req.Header.Set("User-Agent", "docker-model-cli/"+Version)
 
-	// Add Authorization header for OpenAI backend
-	if apiKey != "" {
-		req.Header.Set("Authorization", "Bearer "+apiKey)
-	}
-
 	resp, err := c.modelRunner.Client().Do(req)
 	if err != nil {
 		return nil, err