diff --git a/cmd/cli/commands/backend.go b/cmd/cli/commands/backend.go deleted file mode 100644 index a6cbeb5ae..000000000 --- a/cmd/cli/commands/backend.go +++ /dev/null @@ -1,44 +0,0 @@ -package commands - -import ( - "errors" - "fmt" - "maps" - "os" - "slices" - "strings" -) - -// ValidBackends is a map of valid backends -var ValidBackends = map[string]bool{ - "llama.cpp": true, - "openai": true, - "vllm": true, -} - -// validateBackend checks if the provided backend is valid -func validateBackend(backend string) error { - if !ValidBackends[backend] { - return fmt.Errorf("invalid backend '%s'. Valid backends are: %s", - backend, ValidBackendsKeys()) - } - return nil -} - -// ensureAPIKey retrieves the API key if needed -func ensureAPIKey(backend string) (string, error) { - if backend == "openai" { - apiKey := os.Getenv("OPENAI_API_KEY") - if apiKey == "" { - return "", errors.New("OPENAI_API_KEY environment variable is required when using --backend=openai") - } - return apiKey, nil - } - return "", nil -} - -func ValidBackendsKeys() string { - keys := slices.Collect(maps.Keys(ValidBackends)) - slices.Sort(keys) - return strings.Join(keys, ", ") -} diff --git a/cmd/cli/commands/list.go b/cmd/cli/commands/list.go index b4dfa9e44..fac3b09aa 100644 --- a/cmd/cli/commands/list.go +++ b/cmd/cli/commands/list.go @@ -19,33 +19,19 @@ import ( func newListCmd() *cobra.Command { var jsonFormat, openai, quiet bool - var backend string c := &cobra.Command{ Use: "list [OPTIONS]", Aliases: []string{"ls"}, Short: "List the models pulled to your local environment", RunE: func(cmd *cobra.Command, args []string) error { - // Validate backend if specified - if backend != "" { - if err := validateBackend(backend); err != nil { - return err - } - } - - if (backend == "openai" || openai) && quiet { + if openai && quiet { return fmt.Errorf("--quiet flag cannot be used with --openai flag or OpenAI backend") } - // Validate API key for OpenAI backend - apiKey, err := ensureAPIKey(backend) - if err != nil { - return err - } - // If we're doing an automatic install, only show the installation // status if it won't corrupt machine-readable output. var standaloneInstallPrinter standalone.StatusPrinter - if !jsonFormat && !openai && !quiet && backend == "" { + if !jsonFormat && !openai && !quiet { standaloneInstallPrinter = cmd } if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), standaloneInstallPrinter); err != nil { @@ -55,7 +41,7 @@ func newListCmd() *cobra.Command { if len(args) > 0 { modelFilter = args[0] } - models, err := listModels(openai, backend, desktopClient, quiet, jsonFormat, apiKey, modelFilter) + models, err := listModels(openai, desktopClient, quiet, jsonFormat, modelFilter) if err != nil { return err } @@ -67,14 +53,12 @@ func newListCmd() *cobra.Command { c.Flags().BoolVar(&jsonFormat, "json", false, "List models in a JSON format") c.Flags().BoolVar(&openai, "openai", false, "List models in an OpenAI format") c.Flags().BoolVarP(&quiet, "quiet", "q", false, "Only show model IDs") - c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys())) - c.Flags().MarkHidden("backend") return c } -func listModels(openai bool, backend string, desktopClient *desktop.Client, quiet bool, jsonFormat bool, apiKey string, modelFilter string) (string, error) { - if openai || backend == "openai" { - models, err := desktopClient.ListOpenAI(backend, apiKey) +func listModels(openai bool, desktopClient *desktop.Client, quiet bool, jsonFormat bool, modelFilter string) (string, error) { + if openai { + models, err := desktopClient.ListOpenAI() if err != nil { err = handleClientError(err, "Failed to list models") return "", handleNotRunningError(err) diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go index aee86dc0a..0ab2e3c71 100644 --- a/cmd/cli/commands/run.go +++ b/cmd/cli/commands/run.go @@ -87,7 +87,7 @@ func readMultilineInput(cmd *cobra.Command, scanner *bufio.Scanner) (string, err } // generateInteractiveWithReadline provides an enhanced interactive mode with readline support -func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.Client, backend, model, apiKey string) error { +func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.Client, model string) error { usage := func() { fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, " /bye Exit") @@ -122,7 +122,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. }) if err != nil { // Fall back to basic input mode if readline initialization fails - return generateInteractiveBasic(cmd, desktopClient, backend, model, apiKey) + return generateInteractiveBasic(cmd, desktopClient, model) } // Disable history if the environment variable is set @@ -221,7 +221,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. } }() - err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, backend, model, userInput, apiKey) + err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput) // Clean up signal handler signal.Stop(sigChan) @@ -246,7 +246,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop. } // generateInteractiveBasic provides a basic interactive mode (fallback) -func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, backend, model, apiKey string) error { +func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error { scanner := bufio.NewScanner(os.Stdin) for { userInput, err := readMultilineInput(cmd, scanner) @@ -282,7 +282,7 @@ func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, } }() - err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, backend, model, userInput, apiKey) + err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput) cancelChat() signal.Stop(sigChan) @@ -484,12 +484,12 @@ func renderMarkdown(content string) (string, error) { } // chatWithMarkdown performs chat and streams the response with selective markdown rendering. -func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, backend, model, prompt, apiKey string) error { - return chatWithMarkdownContext(cmd.Context(), cmd, client, backend, model, prompt, apiKey) +func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error { + return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt) } // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering. -func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, backend, model, prompt, apiKey string) error { +func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error { colorMode, _ := cmd.Flags().GetString("color") useMarkdown := shouldUseMarkdown(colorMode) debug, _ := cmd.Flags().GetBool("debug") @@ -504,7 +504,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de if !useMarkdown { // Simple case: just stream as plain text - return client.ChatWithContext(ctx, backend, model, prompt, apiKey, imageURLs, func(content string) { + return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) { cmd.Print(content) }, false) } @@ -512,7 +512,7 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de // For markdown: use streaming buffer to render code blocks as they complete markdownBuffer := NewStreamingMarkdownBuffer() - err = client.ChatWithContext(ctx, backend, model, prompt, apiKey, imageURLs, func(content string) { + err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) { // Use the streaming markdown buffer to intelligently render content rendered, err := markdownBuffer.AddContent(content, true) if err != nil { @@ -539,7 +539,6 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de func newRunCmd() *cobra.Command { var debug bool - var backend string var ignoreRuntimeMemoryCheck bool var colorMode string var detach bool @@ -557,19 +556,6 @@ func newRunCmd() *cobra.Command { } }, RunE: func(cmd *cobra.Command, args []string) error { - // Validate backend if specified - if backend != "" { - if err := validateBackend(backend); err != nil { - return err - } - } - - // Validate API key for OpenAI backend - apiKey, err := ensureAPIKey(backend) - if err != nil { - return err - } - // Normalize model name to add default org and tag if missing model := models.NormalizeModelName(args[0]) prompt := "" @@ -607,24 +593,21 @@ func newRunCmd() *cobra.Command { return fmt.Errorf("unable to initialize standalone model runner: %w", err) } - // Do not validate the model in case of using OpenAI's backend, let OpenAI handle it - if backend != "openai" { - _, err := desktopClient.Inspect(model, false) - if err != nil { - if !errors.Is(err, desktop.ErrNotFound) { - return handleNotRunningError(handleClientError(err, "Failed to inspect model")) - } - cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.") - if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil { - return err - } + _, err := desktopClient.Inspect(model, false) + if err != nil { + if !errors.Is(err, desktop.ErrNotFound) { + return handleNotRunningError(handleClientError(err, "Failed to inspect model")) + } + cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.") + if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil { + return err } } // Handle --detach flag: just load the model without interaction if detach { // Make a minimal request to load the model into memory - err := desktopClient.Chat(backend, model, "", apiKey, nil, func(content string) { + err := desktopClient.Chat(model, "", nil, func(content string) { // Silently discard output in detach mode }, false) if err != nil { @@ -637,7 +620,7 @@ func newRunCmd() *cobra.Command { } if prompt != "" { - if err := chatWithMarkdown(cmd, desktopClient, backend, model, prompt, apiKey); err != nil { + if err := chatWithMarkdown(cmd, desktopClient, model, prompt); err != nil { return handleClientError(err, "Failed to generate a response") } cmd.Println() @@ -646,11 +629,11 @@ func newRunCmd() *cobra.Command { // Use enhanced readline-based interactive mode when terminal is available if term.IsTerminal(int(os.Stdin.Fd())) { - return generateInteractiveWithReadline(cmd, desktopClient, backend, model, apiKey) + return generateInteractiveWithReadline(cmd, desktopClient, model) } // Fall back to basic mode if not a terminal - return generateInteractiveBasic(cmd, desktopClient, backend, model, apiKey) + return generateInteractiveBasic(cmd, desktopClient, model) }, ValidArgsFunction: completion.ModelNames(getDesktopClient, 1), } @@ -667,8 +650,6 @@ func newRunCmd() *cobra.Command { } c.Flags().BoolVar(&debug, "debug", false, "Enable debug logging") - c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys())) - c.Flags().MarkHidden("backend") c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.") c.Flags().StringVar(&colorMode, "color", "auto", "Use colored output (auto|yes|no)") c.Flags().BoolVarP(&detach, "detach", "d", false, "Load the model in the background without interaction") diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go index a80f28e3b..069bbfe97 100644 --- a/cmd/cli/desktop/desktop.go +++ b/cmd/cli/desktop/desktop.go @@ -24,8 +24,6 @@ import ( "go.opentelemetry.io/otel" ) -const DefaultBackend = "llama.cpp" - var ( ErrNotFound = errors.New("model not found") ErrServiceUnavailable = errors.New("service unavailable") @@ -233,32 +231,18 @@ func (c *Client) List() ([]dmrm.Model, error) { return modelsJson, nil } -func (c *Client) ListOpenAI(backend, apiKey string) (dmrm.OpenAIModelList, error) { - if backend == "" { - backend = DefaultBackend - } - modelsRoute := fmt.Sprintf("%s/%s/v1/models", inference.InferencePrefix, backend) - - // Use doRequestWithAuth to support API key authentication - resp, err := c.doRequestWithAuth(http.MethodGet, modelsRoute, nil, "openai", apiKey) - if err != nil { - return dmrm.OpenAIModelList{}, c.handleQueryError(err, modelsRoute) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return dmrm.OpenAIModelList{}, fmt.Errorf("failed to list models: %s", resp.Status) - } - - body, err := io.ReadAll(resp.Body) +func (c *Client) ListOpenAI() (dmrm.OpenAIModelList, error) { + modelsRoute := inference.InferencePrefix + "/v1/models" + body, err := c.listRaw(modelsRoute, "") if err != nil { - return dmrm.OpenAIModelList{}, fmt.Errorf("failed to read response body: %w", err) + return dmrm.OpenAIModelList{}, err } var modelsJson dmrm.OpenAIModelList if err := json.Unmarshal(body, &modelsJson); err != nil { return modelsJson, fmt.Errorf("failed to unmarshal response body: %w", err) } + return modelsJson, nil } @@ -357,12 +341,12 @@ func (c *Client) fullModelID(id string) (string, error) { } // Chat performs a chat request and streams the response content with selective markdown rendering. -func (c *Client) Chat(backend, model, prompt, apiKey string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error { - return c.ChatWithContext(context.Background(), backend, model, prompt, apiKey, imageURLs, outputFunc, shouldUseMarkdown) +func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error { + return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown) } // ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering. -func (c *Client) ChatWithContext(ctx context.Context, backend, model, prompt, apiKey string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error { +func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error { model = dmrm.NormalizeModelName(model) if !strings.Contains(strings.Trim(model, "/"), "/") { // Do an extra API call to check if the model parameter isn't a model ID. @@ -417,20 +401,13 @@ func (c *Client) ChatWithContext(ctx context.Context, backend, model, prompt, ap return fmt.Errorf("error marshaling request: %w", err) } - var completionsPath string - if backend != "" { - completionsPath = inference.InferencePrefix + "/" + backend + "/v1/chat/completions" - } else { - completionsPath = inference.InferencePrefix + "/v1/chat/completions" - } + completionsPath := inference.InferencePrefix + "/v1/chat/completions" resp, err := c.doRequestWithAuthContext( ctx, http.MethodPost, completionsPath, bytes.NewReader(jsonData), - backend, - apiKey, ) if err != nil { return c.handleQueryError(err, completionsPath) @@ -785,15 +762,15 @@ func (c *Client) Requests(modelFilter string, streaming bool, includeExisting bo // doRequest is a helper function that performs HTTP requests and handles 503 responses func (c *Client) doRequest(method, path string, body io.Reader) (*http.Response, error) { - return c.doRequestWithAuth(method, path, body, "", "") + return c.doRequestWithAuth(method, path, body) } // doRequestWithAuth is a helper function that performs HTTP requests with optional authentication -func (c *Client) doRequestWithAuth(method, path string, body io.Reader, backend, apiKey string) (*http.Response, error) { - return c.doRequestWithAuthContext(context.Background(), method, path, body, backend, apiKey) +func (c *Client) doRequestWithAuth(method, path string, body io.Reader) (*http.Response, error) { + return c.doRequestWithAuthContext(context.Background(), method, path, body) } -func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path string, body io.Reader, backend, apiKey string) (*http.Response, error) { +func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, method, c.modelRunner.URL(path), body) if err != nil { return nil, fmt.Errorf("error creating request: %w", err) @@ -804,11 +781,6 @@ func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path stri req.Header.Set("User-Agent", "docker-model-cli/"+Version) - // Add Authorization header for OpenAI backend - if apiKey != "" { - req.Header.Set("Authorization", "Bearer "+apiKey) - } - resp, err := c.modelRunner.Client().Do(req) if err != nil { return nil, err diff --git a/cmd/cli/desktop/desktop_test.go b/cmd/cli/desktop/desktop_test.go index e4f5d881d..02e0032bf 100644 --- a/cmd/cli/desktop/desktop_test.go +++ b/cmd/cli/desktop/desktop_test.go @@ -63,7 +63,7 @@ func TestChatHuggingFaceModel(t *testing.T) { Body: io.NopCloser(bytes.NewBufferString("data: {\"choices\":[{\"delta\":{\"content\":\"Hello there!\"}}]}\n")), }, nil) - err := client.Chat("", modelName, prompt, "", nil, func(s string) {}, false) + err := client.Chat(modelName, prompt, nil, func(s string) {}, false) assert.NoError(t, err) } diff --git a/cmd/cli/docs/reference/docker_model_list.yaml b/cmd/cli/docs/reference/docker_model_list.yaml index 778cd4de5..ee713d525 100644 --- a/cmd/cli/docs/reference/docker_model_list.yaml +++ b/cmd/cli/docs/reference/docker_model_list.yaml @@ -6,15 +6,6 @@ usage: docker model list [OPTIONS] pname: docker model plink: docker_model.yaml options: - - option: backend - value_type: string - description: Specify the backend to use (llama.cpp, openai, vllm) - deprecated: false - hidden: true - experimental: false - experimentalcli: false - kubernetes: false - swarm: false - option: json value_type: bool default_value: "false" diff --git a/cmd/cli/docs/reference/docker_model_run.yaml b/cmd/cli/docs/reference/docker_model_run.yaml index 13ad91ff3..e8dbc9fc0 100644 --- a/cmd/cli/docs/reference/docker_model_run.yaml +++ b/cmd/cli/docs/reference/docker_model_run.yaml @@ -10,15 +10,6 @@ usage: docker model run MODEL [PROMPT] pname: docker model plink: docker_model.yaml options: - - option: backend - value_type: string - description: Specify the backend to use (llama.cpp, openai, vllm) - deprecated: false - hidden: true - experimental: false - experimentalcli: false - kubernetes: false - swarm: false - option: color value_type: string default_value: auto