Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions cmd/cli/commands/backend.go

This file was deleted.

28 changes: 6 additions & 22 deletions cmd/cli/commands/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,19 @@ import (

func newListCmd() *cobra.Command {
var jsonFormat, openai, quiet bool
var backend string
c := &cobra.Command{
Use: "list [OPTIONS]",
Aliases: []string{"ls"},
Short: "List the models pulled to your local environment",
RunE: func(cmd *cobra.Command, args []string) error {
// Validate backend if specified
if backend != "" {
if err := validateBackend(backend); err != nil {
return err
}
}

if (backend == "openai" || openai) && quiet {
if openai && quiet {
return fmt.Errorf("--quiet flag cannot be used with --openai flag or OpenAI backend")
}

// Validate API key for OpenAI backend
apiKey, err := ensureAPIKey(backend)
if err != nil {
return err
}

// If we're doing an automatic install, only show the installation
// status if it won't corrupt machine-readable output.
var standaloneInstallPrinter standalone.StatusPrinter
if !jsonFormat && !openai && !quiet && backend == "" {
if !jsonFormat && !openai && !quiet {
standaloneInstallPrinter = cmd
}
if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), standaloneInstallPrinter); err != nil {
Expand All @@ -55,7 +41,7 @@ func newListCmd() *cobra.Command {
if len(args) > 0 {
modelFilter = args[0]
}
models, err := listModels(openai, backend, desktopClient, quiet, jsonFormat, apiKey, modelFilter)
models, err := listModels(openai, desktopClient, quiet, jsonFormat, modelFilter)
if err != nil {
return err
}
Expand All @@ -67,14 +53,12 @@ func newListCmd() *cobra.Command {
c.Flags().BoolVar(&jsonFormat, "json", false, "List models in a JSON format")
c.Flags().BoolVar(&openai, "openai", false, "List models in an OpenAI format")
c.Flags().BoolVarP(&quiet, "quiet", "q", false, "Only show model IDs")
c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys()))
c.Flags().MarkHidden("backend")
return c
}

func listModels(openai bool, backend string, desktopClient *desktop.Client, quiet bool, jsonFormat bool, apiKey string, modelFilter string) (string, error) {
if openai || backend == "openai" {
models, err := desktopClient.ListOpenAI(backend, apiKey)
func listModels(openai bool, desktopClient *desktop.Client, quiet bool, jsonFormat bool, modelFilter string) (string, error) {
if openai {
models, err := desktopClient.ListOpenAI()
if err != nil {
err = handleClientError(err, "Failed to list models")
return "", handleNotRunningError(err)
Expand Down
63 changes: 22 additions & 41 deletions cmd/cli/commands/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func readMultilineInput(cmd *cobra.Command, scanner *bufio.Scanner) (string, err
}

// generateInteractiveWithReadline provides an enhanced interactive mode with readline support
func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.Client, backend, model, apiKey string) error {
func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /bye Exit")
Expand Down Expand Up @@ -122,7 +122,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
})
if err != nil {
// Fall back to basic input mode if readline initialization fails
return generateInteractiveBasic(cmd, desktopClient, backend, model, apiKey)
return generateInteractiveBasic(cmd, desktopClient, model)
}

// Disable history if the environment variable is set
Expand Down Expand Up @@ -221,7 +221,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
}
}()

err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, backend, model, userInput, apiKey)
err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)

// Clean up signal handler
signal.Stop(sigChan)
Expand All @@ -246,7 +246,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
}

// generateInteractiveBasic provides a basic interactive mode (fallback)
func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, backend, model, apiKey string) error {
func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
scanner := bufio.NewScanner(os.Stdin)
for {
userInput, err := readMultilineInput(cmd, scanner)
Expand Down Expand Up @@ -282,7 +282,7 @@ func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client,
}
}()

err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, backend, model, userInput, apiKey)
err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)

cancelChat()
signal.Stop(sigChan)
Expand Down Expand Up @@ -484,12 +484,12 @@ func renderMarkdown(content string) (string, error) {
}

// chatWithMarkdown performs chat and streams the response with selective markdown rendering.
func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, backend, model, prompt, apiKey string) error {
return chatWithMarkdownContext(cmd.Context(), cmd, client, backend, model, prompt, apiKey)
func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt)
}

// chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering.
func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, backend, model, prompt, apiKey string) error {
func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
colorMode, _ := cmd.Flags().GetString("color")
useMarkdown := shouldUseMarkdown(colorMode)
debug, _ := cmd.Flags().GetBool("debug")
Expand All @@ -504,15 +504,15 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de

if !useMarkdown {
// Simple case: just stream as plain text
return client.ChatWithContext(ctx, backend, model, prompt, apiKey, imageURLs, func(content string) {
return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
cmd.Print(content)
}, false)
}

// For markdown: use streaming buffer to render code blocks as they complete
markdownBuffer := NewStreamingMarkdownBuffer()

err = client.ChatWithContext(ctx, backend, model, prompt, apiKey, imageURLs, func(content string) {
err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
// Use the streaming markdown buffer to intelligently render content
rendered, err := markdownBuffer.AddContent(content, true)
if err != nil {
Expand All @@ -539,7 +539,6 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de

func newRunCmd() *cobra.Command {
var debug bool
var backend string
var ignoreRuntimeMemoryCheck bool
var colorMode string
var detach bool
Expand All @@ -557,19 +556,6 @@ func newRunCmd() *cobra.Command {
}
},
RunE: func(cmd *cobra.Command, args []string) error {
// Validate backend if specified
if backend != "" {
if err := validateBackend(backend); err != nil {
return err
}
}

// Validate API key for OpenAI backend
apiKey, err := ensureAPIKey(backend)
if err != nil {
return err
}

// Normalize model name to add default org and tag if missing
model := models.NormalizeModelName(args[0])
prompt := ""
Expand Down Expand Up @@ -607,24 +593,21 @@ func newRunCmd() *cobra.Command {
return fmt.Errorf("unable to initialize standalone model runner: %w", err)
}

// Do not validate the model in case of using OpenAI's backend, let OpenAI handle it
if backend != "openai" {
_, err := desktopClient.Inspect(model, false)
if err != nil {
if !errors.Is(err, desktop.ErrNotFound) {
return handleNotRunningError(handleClientError(err, "Failed to inspect model"))
}
cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.")
if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil {
return err
}
_, err := desktopClient.Inspect(model, false)
if err != nil {
if !errors.Is(err, desktop.ErrNotFound) {
return handleNotRunningError(handleClientError(err, "Failed to inspect model"))
}
cmd.Println("Unable to find model '" + model + "' locally. Pulling from the server.")
if err := pullModel(cmd, desktopClient, model, ignoreRuntimeMemoryCheck); err != nil {
return err
}
}

// Handle --detach flag: just load the model without interaction
if detach {
// Make a minimal request to load the model into memory
err := desktopClient.Chat(backend, model, "", apiKey, nil, func(content string) {
err := desktopClient.Chat(model, "", nil, func(content string) {
// Silently discard output in detach mode
}, false)
if err != nil {
Expand All @@ -637,7 +620,7 @@ func newRunCmd() *cobra.Command {
}

if prompt != "" {
if err := chatWithMarkdown(cmd, desktopClient, backend, model, prompt, apiKey); err != nil {
if err := chatWithMarkdown(cmd, desktopClient, model, prompt); err != nil {
return handleClientError(err, "Failed to generate a response")
}
cmd.Println()
Expand All @@ -646,11 +629,11 @@ func newRunCmd() *cobra.Command {

// Use enhanced readline-based interactive mode when terminal is available
if term.IsTerminal(int(os.Stdin.Fd())) {
return generateInteractiveWithReadline(cmd, desktopClient, backend, model, apiKey)
return generateInteractiveWithReadline(cmd, desktopClient, model)
}

// Fall back to basic mode if not a terminal
return generateInteractiveBasic(cmd, desktopClient, backend, model, apiKey)
return generateInteractiveBasic(cmd, desktopClient, model)
},
ValidArgsFunction: completion.ModelNames(getDesktopClient, 1),
}
Expand All @@ -667,8 +650,6 @@ func newRunCmd() *cobra.Command {
}

c.Flags().BoolVar(&debug, "debug", false, "Enable debug logging")
c.Flags().StringVar(&backend, "backend", "", fmt.Sprintf("Specify the backend to use (%s)", ValidBackendsKeys()))
c.Flags().MarkHidden("backend")
c.Flags().BoolVar(&ignoreRuntimeMemoryCheck, "ignore-runtime-memory-check", false, "Do not block pull if estimated runtime memory for model exceeds system resources.")
c.Flags().StringVar(&colorMode, "color", "auto", "Use colored output (auto|yes|no)")
c.Flags().BoolVarP(&detach, "detach", "d", false, "Load the model in the background without interaction")
Expand Down
54 changes: 13 additions & 41 deletions cmd/cli/desktop/desktop.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ import (
"go.opentelemetry.io/otel"
)

const DefaultBackend = "llama.cpp"

var (
ErrNotFound = errors.New("model not found")
ErrServiceUnavailable = errors.New("service unavailable")
Expand Down Expand Up @@ -233,32 +231,18 @@ func (c *Client) List() ([]dmrm.Model, error) {
return modelsJson, nil
}

func (c *Client) ListOpenAI(backend, apiKey string) (dmrm.OpenAIModelList, error) {
if backend == "" {
backend = DefaultBackend
}
modelsRoute := fmt.Sprintf("%s/%s/v1/models", inference.InferencePrefix, backend)

// Use doRequestWithAuth to support API key authentication
resp, err := c.doRequestWithAuth(http.MethodGet, modelsRoute, nil, "openai", apiKey)
if err != nil {
return dmrm.OpenAIModelList{}, c.handleQueryError(err, modelsRoute)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return dmrm.OpenAIModelList{}, fmt.Errorf("failed to list models: %s", resp.Status)
}

body, err := io.ReadAll(resp.Body)
func (c *Client) ListOpenAI() (dmrm.OpenAIModelList, error) {
modelsRoute := inference.InferencePrefix + "/v1/models"
body, err := c.listRaw(modelsRoute, "")
if err != nil {
return dmrm.OpenAIModelList{}, fmt.Errorf("failed to read response body: %w", err)
return dmrm.OpenAIModelList{}, err
}

var modelsJson dmrm.OpenAIModelList
if err := json.Unmarshal(body, &modelsJson); err != nil {
return modelsJson, fmt.Errorf("failed to unmarshal response body: %w", err)
}

return modelsJson, nil
}

Expand Down Expand Up @@ -357,12 +341,12 @@ func (c *Client) fullModelID(id string) (string, error) {
}

// Chat performs a chat request and streams the response content with selective markdown rendering.
func (c *Client) Chat(backend, model, prompt, apiKey string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
return c.ChatWithContext(context.Background(), backend, model, prompt, apiKey, imageURLs, outputFunc, shouldUseMarkdown)
func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown)
}

// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
func (c *Client) ChatWithContext(ctx context.Context, backend, model, prompt, apiKey string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
model = dmrm.NormalizeModelName(model)
if !strings.Contains(strings.Trim(model, "/"), "/") {
// Do an extra API call to check if the model parameter isn't a model ID.
Expand Down Expand Up @@ -417,20 +401,13 @@ func (c *Client) ChatWithContext(ctx context.Context, backend, model, prompt, ap
return fmt.Errorf("error marshaling request: %w", err)
}

var completionsPath string
if backend != "" {
completionsPath = inference.InferencePrefix + "/" + backend + "/v1/chat/completions"
} else {
completionsPath = inference.InferencePrefix + "/v1/chat/completions"
}
completionsPath := inference.InferencePrefix + "/v1/chat/completions"

resp, err := c.doRequestWithAuthContext(
ctx,
http.MethodPost,
completionsPath,
bytes.NewReader(jsonData),
backend,
apiKey,
)
if err != nil {
return c.handleQueryError(err, completionsPath)
Expand Down Expand Up @@ -785,15 +762,15 @@ func (c *Client) Requests(modelFilter string, streaming bool, includeExisting bo

// doRequest is a helper function that performs HTTP requests and handles 503 responses
func (c *Client) doRequest(method, path string, body io.Reader) (*http.Response, error) {
return c.doRequestWithAuth(method, path, body, "", "")
return c.doRequestWithAuth(method, path, body)
}

// doRequestWithAuth is a helper function that performs HTTP requests with optional authentication
func (c *Client) doRequestWithAuth(method, path string, body io.Reader, backend, apiKey string) (*http.Response, error) {
return c.doRequestWithAuthContext(context.Background(), method, path, body, backend, apiKey)
func (c *Client) doRequestWithAuth(method, path string, body io.Reader) (*http.Response, error) {
return c.doRequestWithAuthContext(context.Background(), method, path, body)
}

func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path string, body io.Reader, backend, apiKey string) (*http.Response, error) {
func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, method, c.modelRunner.URL(path), body)
if err != nil {
return nil, fmt.Errorf("error creating request: %w", err)
Expand All @@ -804,11 +781,6 @@ func (c *Client) doRequestWithAuthContext(ctx context.Context, method, path stri

req.Header.Set("User-Agent", "docker-model-cli/"+Version)

// Add Authorization header for OpenAI backend
if apiKey != "" {
req.Header.Set("Authorization", "Bearer "+apiKey)
}

resp, err := c.modelRunner.Client().Do(req)
if err != nil {
return nil, err
Expand Down
Loading
Loading