Add conversation history support to chat modes

ericcurtin · ericcurtin · commit 515d22a874f7 · 2025-12-23T12:01:03.000Z
The interactive chat modes now maintain conversation context across
exchanges by tracking user inputs and assistant responses. This allows
for more natural multi-turn conversations where the assistant can
reference previous messages in the current session.

The changes introduce a new API method ChatWithMessagesContext that
accepts conversation history and returns the assistant's response for
history tracking. Both the readline-based and basic interactive modes
now use this enhanced functionality.

Signed-off-by: Eric Curtin &lt;eric.curtin@docker.com&gt;
diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
@@ -140,8 +140,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 		AltPlaceholder: `Use """ to end multi-line input`,
 	})
 	if err != nil {
-		// Fall back to basic input mode if readline initialization fails
-		return generateInteractiveBasic(cmd, desktopClient, model)
+		return err
 	}
 
 	// Disable history if the environment variable is set
@@ -154,6 +153,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 
 	var sb strings.Builder
 	var multiline bool
+	var conversationHistory []desktop.OpenAIChatMessage
 
 	// Add a helper function to handle file inclusion when @ is pressed
 	// We'll implement a basic version here that shows a message when @ is pressed
@@ -245,7 +245,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				}
 			}()
 
-			err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
+			assistantResponse, err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput, conversationHistory)
 
 			// Clean up signal handler
 			signal.Stop(sigChan)
@@ -263,70 +263,22 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				continue
 			}
 
+			// Add the user message and assistant response to conversation history
+			conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+				Role:    "user",
+				Content: userInput,
+			})
+			conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+				Role:    "assistant",
+				Content: assistantResponse,
+			})
+
 			cmd.Println()
 			sb.Reset()
 		}
 	}
 }
 
-// generateInteractiveBasic provides a basic interactive mode (fallback)
-func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
-	scanner := bufio.NewScanner(os.Stdin)
-	for {
-		userInput, err := readMultilineInput(cmd, scanner)
-		if err != nil {
-			if err.Error() == "EOF" {
-				break
-			}
-			return fmt.Errorf("Error reading input: %w", err)
-		}
-
-		if strings.ToLower(strings.TrimSpace(userInput)) == "/bye" {
-			break
-		}
-
-		if strings.TrimSpace(userInput) == "" {
-			continue
-		}
-
-		// Create a cancellable context for the chat request
-		// This allows us to cancel the request if the user presses Ctrl+C during response generation
-		chatCtx, cancelChat := context.WithCancel(cmd.Context())
-
-		// Set up signal handler to cancel the context on Ctrl+C
-		sigChan := make(chan os.Signal, 1)
-		signal.Notify(sigChan, syscall.SIGINT)
-		go func() {
-			select {
-			case <-sigChan:
-				cancelChat()
-			case <-chatCtx.Done():
-				// Context cancelled, exit goroutine
-				// Context cancelled, exit goroutine
-			}
-		}()
-
-		err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
-
-		cancelChat()
-		signal.Stop(sigChan)
-		cancelChat()
-
-		if err != nil {
-			// Check if the error is due to context cancellation (Ctrl+C during response)
-			if errors.Is(err, context.Canceled) {
-				fmt.Println("\nUse Ctrl + d or /bye to exit.")
-			} else {
-				cmd.PrintErrln(handleClientError(err, "Failed to generate a response"))
-			}
-			continue
-		}
-
-		cmd.Println()
-	}
-	return nil
-}
-
 var (
 	markdownRenderer *glamour.TermRenderer
 	lastWidth        int
@@ -509,40 +461,42 @@ func renderMarkdown(content string) (string, error) {
 
 // chatWithMarkdown performs chat and streams the response with selective markdown rendering.
 func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
-	return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt)
+	_, err := chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt, nil)
+	return err
 }
 
 // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering.
-func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
+// It accepts an optional conversation history and returns the assistant's response for history tracking.
+func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string, conversationHistory []desktop.OpenAIChatMessage) (string, error) {
 	colorMode, _ := cmd.Flags().GetString("color")
 	useMarkdown := shouldUseMarkdown(colorMode)
 	debug, _ := cmd.Flags().GetBool("debug")
 
 	// Process file inclusions first (files referenced with @ symbol)
 	prompt, err := processFileInclusions(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process file inclusions: %w", err)
+		return "", fmt.Errorf("failed to process file inclusions: %w", err)
 	}
 
 	var imageURLs []string
 	cleanedPrompt, imgs, err := processImagesInPrompt(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process images: %w", err)
+		return "", fmt.Errorf("failed to process images: %w", err)
 	}
 	prompt = cleanedPrompt
 	imageURLs = imgs
 
 	if !useMarkdown {
 		// Simple case: just stream as plain text
-		return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+		return client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 			cmd.Print(content)
 		}, false)
 	}
 
 	// For markdown: use streaming buffer to render code blocks as they complete
 	markdownBuffer := NewStreamingMarkdownBuffer()
 
-	err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+	assistantResponse, err := client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 		// Use the streaming markdown buffer to intelligently render content
 		rendered, err := markdownBuffer.AddContent(content, true)
 		if err != nil {
@@ -556,15 +510,15 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 		}
 	}, true)
 	if err != nil {
-		return err
+		return assistantResponse, err
 	}
 
 	// Flush any remaining content from the markdown buffer
 	if remaining, flushErr := markdownBuffer.Flush(true); flushErr == nil && remaining != "" {
 		cmd.Print(remaining)
 	}
 
-	return nil
+	return assistantResponse, nil
 }
 
 func newRunCmd() *cobra.Command {
@@ -641,14 +595,10 @@ func newRunCmd() *cobra.Command {
 					return nil
 				}
 
-				// Interactive mode for external OpenAI endpoint
-				if term.IsTerminal(int(os.Stdin.Fd())) {
-					termenv.SetDefaultOutput(
-						termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
-					)
-					return generateInteractiveWithReadline(cmd, openaiClient, model)
-				}
-				return generateInteractiveBasic(cmd, openaiClient, model)
+				termenv.SetDefaultOutput(
+					termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
+				)
+				return generateInteractiveWithReadline(cmd, openaiClient, model)
 			}
 
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil {
@@ -746,19 +696,15 @@ func newRunCmd() *cobra.Command {
 				return nil
 			}
 
-			// Use enhanced readline-based interactive mode when terminal is available
-			if term.IsTerminal(int(os.Stdin.Fd())) {
-				// Initialize termenv with color caching before starting interactive session.
-				// This queries the terminal background color once and caches it, preventing
-				// OSC response sequences from appearing in stdin during the interactive loop.
-				termenv.SetDefaultOutput(
-					termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
-				)
-				return generateInteractiveWithReadline(cmd, desktopClient, model)
-			}
+			// Initialize termenv with color caching before starting interactive session.
+			// This queries the terminal background color once and caches it, preventing
+			// OSC response sequences from appearing in stdin during the interactive loop.
+			termenv.SetDefaultOutput(
+				termenv.NewOutput(asPrinter(cmd), termenv.WithColorCache(true)),
+			)
+
+			return generateInteractiveWithReadline(cmd, desktopClient, model)
 
-			// Fall back to basic mode if not a terminal
-			return generateInteractiveBasic(cmd, desktopClient, model)
 		},
 		ValidArgsFunction: completion.ModelNames(getDesktopClient, 1),
 	}
diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go
@@ -350,13 +350,14 @@ func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(
 	return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown)
 }
 
-// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
-func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
-	// Build the message content - either simple string or multimodal array
+// ChatWithMessagesContext performs a chat request with conversation history and returns the assistant's response.
+// This allows maintaining conversation context across multiple exchanges.
+func (c *Client) ChatWithMessagesContext(ctx context.Context, model string, conversationHistory []OpenAIChatMessage, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) (string, error) {
+	// Build the current user message content - either simple string or multimodal array
 	var messageContent interface{}
 	if len(imageURLs) > 0 {
 		// Multimodal message with images
-		contentParts := make([]ContentPart, 0, len(imageURLs))
+		contentParts := make([]ContentPart, 0, len(imageURLs)+1)
 
 		// Add all images first
 		for _, imageURL := range imageURLs {
@@ -382,20 +383,23 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		messageContent = prompt
 	}
 
+	// Build messages array with conversation history plus current message
+	messages := make([]OpenAIChatMessage, 0, len(conversationHistory)+1)
+	messages = append(messages, conversationHistory...)
+	messages = append(messages, OpenAIChatMessage{
+		Role:    "user",
+		Content: messageContent,
+	})
+
 	reqBody := OpenAIChatRequest{
-		Model: model,
-		Messages: []OpenAIChatMessage{
-			{
-				Role:    "user",
-				Content: messageContent,
-			},
-		},
-		Stream: true,
+		Model:    model,
+		Messages: messages,
+		Stream:   true,
 	}
 
 	jsonData, err := json.Marshal(reqBody)
 	if err != nil {
-		return fmt.Errorf("error marshaling request: %w", err)
+		return "", fmt.Errorf("error marshaling request: %w", err)
 	}
 
 	completionsPath := c.modelRunner.OpenAIPathPrefix() + "/chat/completions"
@@ -407,13 +411,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		bytes.NewReader(jsonData),
 	)
 	if err != nil {
-		return c.handleQueryError(err, completionsPath)
+		return "", c.handleQueryError(err, completionsPath)
 	}
 	defer resp.Body.Close()
 
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
-		return fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
+		return "", fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
 	}
 
 	type chatPrinterState int
@@ -426,6 +430,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 	printerState := chatPrinterNone
 	reasoningFmt := color.New().Add(color.Italic)
 
+	var assistantResponse strings.Builder
 	var finalUsage *struct {
 		CompletionTokens int `json:"completion_tokens"`
 		PromptTokens     int `json:"prompt_tokens"`
@@ -437,7 +442,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		// Check if context was cancelled
 		select {
 		case <-ctx.Done():
-			return ctx.Err()
+			return assistantResponse.String(), ctx.Err()
 		default:
 		}
 
@@ -458,7 +463,7 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 
 		var streamResp OpenAIChatResponse
 		if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
-			return fmt.Errorf("error parsing stream response: %w", err)
+			return assistantResponse.String(), fmt.Errorf("error parsing stream response: %w", err)
 		}
 
 		if streamResp.Usage != nil {
@@ -493,12 +498,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 				}
 				printerState = chatPrinterContent
 				outputFunc(chunk)
+				assistantResponse.WriteString(chunk)
 			}
 		}
 	}
 
 	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("error reading response stream: %w", err)
+		return assistantResponse.String(), fmt.Errorf("error reading response stream: %w", err)
 	}
 
 	if finalUsage != nil {
@@ -514,7 +520,13 @@ func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imag
 		outputFunc(usageFmt.Sprint(usageInfo))
 	}
 
-	return nil
+	return assistantResponse.String(), nil
+}
+
+// ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
+func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
+	_, err := c.ChatWithMessagesContext(ctx, model, nil, prompt, imageURLs, outputFunc, shouldUseMarkdown)
+	return err
 }
 
 func (c *Client) Remove(modelArgs []string, force bool) (string, error) {