Add conversation history support to chat modes

ericcurtin · ericcurtin · commit 03d7d6823078 · 2025-12-23T11:42:16.000Z
The interactive chat modes now maintain conversation context across
exchanges by tracking user inputs and assistant responses. This allows
for more natural multi-turn conversations where the assistant can
reference previous messages in the current session.

The changes introduce a new API method ChatWithMessagesContext that
accepts conversation history and returns the assistant's response for
history tracking. Both the readline-based and basic interactive modes
now use this enhanced functionality.

Signed-off-by: Eric Curtin &lt;eric.curtin@docker.com&gt;
diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
@@ -154,6 +154,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 
 	var sb strings.Builder
 	var multiline bool
+	var conversationHistory []desktop.OpenAIChatMessage
 
 	// Add a helper function to handle file inclusion when @ is pressed
 	// We'll implement a basic version here that shows a message when @ is pressed
@@ -245,7 +246,7 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				}
 			}()
 
-			err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
+			assistantResponse, err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput, conversationHistory)
 
 			// Clean up signal handler
 			signal.Stop(sigChan)
@@ -263,6 +264,16 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 				continue
 			}
 
+			// Add the user message and assistant response to conversation history
+			conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+				Role:    "user",
+				Content: userInput,
+			})
+			conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+				Role:    "assistant",
+				Content: assistantResponse,
+			})
+
 			cmd.Println()
 			sb.Reset()
 		}
@@ -272,6 +283,8 @@ func generateInteractiveWithReadline(cmd *cobra.Command, desktopClient *desktop.
 // generateInteractiveBasic provides a basic interactive mode (fallback)
 func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client, model string) error {
 	scanner := bufio.NewScanner(os.Stdin)
+	var conversationHistory []desktop.OpenAIChatMessage
+
 	for {
 		userInput, err := readMultilineInput(cmd, scanner)
 		if err != nil {
@@ -306,7 +319,7 @@ func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client,
 			}
 		}()
 
-		err = chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput)
+		assistantResponse, err := chatWithMarkdownContext(chatCtx, cmd, desktopClient, model, userInput, conversationHistory)
 
 		cancelChat()
 		signal.Stop(sigChan)
@@ -322,6 +335,16 @@ func generateInteractiveBasic(cmd *cobra.Command, desktopClient *desktop.Client,
 			continue
 		}
 
+		// Add the user message and assistant response to conversation history
+		conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+			Role:    "user",
+			Content: userInput,
+		})
+		conversationHistory = append(conversationHistory, desktop.OpenAIChatMessage{
+			Role:    "assistant",
+			Content: assistantResponse,
+		})
+
 		cmd.Println()
 	}
 	return nil
@@ -509,40 +532,42 @@ func renderMarkdown(content string) (string, error) {
 
 // chatWithMarkdown performs chat and streams the response with selective markdown rendering.
 func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
-	return chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt)
+	_, err := chatWithMarkdownContext(cmd.Context(), cmd, client, model, prompt, nil)
+	return err
 }
 
 // chatWithMarkdownContext performs chat with context support and streams the response with selective markdown rendering.
-func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string) error {
+// It accepts an optional conversation history and returns the assistant's response for history tracking.
+func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *desktop.Client, model, prompt string, conversationHistory []desktop.OpenAIChatMessage) (string, error) {
 	colorMode, _ := cmd.Flags().GetString("color")
 	useMarkdown := shouldUseMarkdown(colorMode)
 	debug, _ := cmd.Flags().GetBool("debug")
 
 	// Process file inclusions first (files referenced with @ symbol)
 	prompt, err := processFileInclusions(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process file inclusions: %w", err)
+		return "", fmt.Errorf("failed to process file inclusions: %w", err)
 	}
 
 	var imageURLs []string
 	cleanedPrompt, imgs, err := processImagesInPrompt(prompt)
 	if err != nil {
-		return fmt.Errorf("failed to process images: %w", err)
+		return "", fmt.Errorf("failed to process images: %w", err)
 	}
 	prompt = cleanedPrompt
 	imageURLs = imgs
 
 	if !useMarkdown {
 		// Simple case: just stream as plain text
-		return client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+		return client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 			cmd.Print(content)
 		}, false)
 	}
 
 	// For markdown: use streaming buffer to render code blocks as they complete
 	markdownBuffer := NewStreamingMarkdownBuffer()
 
-	err = client.ChatWithContext(ctx, model, prompt, imageURLs, func(content string) {
+	assistantResponse, err := client.ChatWithMessagesContext(ctx, model, conversationHistory, prompt, imageURLs, func(content string) {
 		// Use the streaming markdown buffer to intelligently render content
 		rendered, err := markdownBuffer.AddContent(content, true)
 		if err != nil {
@@ -556,15 +581,15 @@ func chatWithMarkdownContext(ctx context.Context, cmd *cobra.Command, client *de
 		}
 	}, true)
 	if err != nil {
-		return err
+		return assistantResponse, err
 	}
 
 	// Flush any remaining content from the markdown buffer
 	if remaining, flushErr := markdownBuffer.Flush(true); flushErr == nil && remaining != "" {
 		cmd.Print(remaining)
 	}
 
-	return nil
+	return assistantResponse, nil
 }
 
 func newRunCmd() *cobra.Command {
diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go
@@ -350,6 +350,179 @@ func (c *Client) Chat(model, prompt string, imageURLs []string, outputFunc func(
 	return c.ChatWithContext(context.Background(), model, prompt, imageURLs, outputFunc, shouldUseMarkdown)
 }
 
+// ChatWithMessagesContext performs a chat request with conversation history and returns the assistant's response.
+// This allows maintaining conversation context across multiple exchanges.
+func (c *Client) ChatWithMessagesContext(ctx context.Context, model string, conversationHistory []OpenAIChatMessage, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) (string, error) {
+	// Build the current user message content - either simple string or multimodal array
+	var messageContent interface{}
+	if len(imageURLs) > 0 {
+		// Multimodal message with images
+		contentParts := make([]ContentPart, 0, len(imageURLs))
+
+		// Add all images first
+		for _, imageURL := range imageURLs {
+			contentParts = append(contentParts, ContentPart{
+				Type: "image_url",
+				ImageURL: &ImageURL{
+					URL: imageURL,
+				},
+			})
+		}
+
+		// Add text prompt if present
+		if prompt != "" {
+			contentParts = append(contentParts, ContentPart{
+				Type: "text",
+				Text: prompt,
+			})
+		}
+
+		messageContent = contentParts
+	} else {
+		// Simple text-only message
+		messageContent = prompt
+	}
+
+	// Build messages array with conversation history plus current message
+	messages := make([]OpenAIChatMessage, 0, len(conversationHistory)+1)
+	messages = append(messages, conversationHistory...)
+	messages = append(messages, OpenAIChatMessage{
+		Role:    "user",
+		Content: messageContent,
+	})
+
+	reqBody := OpenAIChatRequest{
+		Model:    model,
+		Messages: messages,
+		Stream:   true,
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return "", fmt.Errorf("error marshaling request: %w", err)
+	}
+
+	completionsPath := c.modelRunner.OpenAIPathPrefix() + "/chat/completions"
+
+	resp, err := c.doRequestWithAuthContext(
+		ctx,
+		http.MethodPost,
+		completionsPath,
+		bytes.NewReader(jsonData),
+	)
+	if err != nil {
+		return "", c.handleQueryError(err, completionsPath)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return "", fmt.Errorf("error response: status=%d body=%s", resp.StatusCode, body)
+	}
+
+	type chatPrinterState int
+	const (
+		chatPrinterNone chatPrinterState = iota
+		chatPrinterReasoning
+		chatPrinterContent
+	)
+
+	printerState := chatPrinterNone
+	reasoningFmt := color.New().Add(color.Italic)
+
+	var assistantResponse strings.Builder
+	var finalUsage *struct {
+		CompletionTokens int `json:"completion_tokens"`
+		PromptTokens     int `json:"prompt_tokens"`
+		TotalTokens      int `json:"total_tokens"`
+	}
+
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		// Check if context was cancelled
+		select {
+		case <-ctx.Done():
+			return assistantResponse.String(), ctx.Err()
+		default:
+		}
+
+		line := scanner.Text()
+		if line == "" {
+			continue
+		}
+
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+
+		data := strings.TrimPrefix(line, "data: ")
+
+		if data == "[DONE]" {
+			break
+		}
+
+		var streamResp OpenAIChatResponse
+		if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
+			return assistantResponse.String(), fmt.Errorf("error parsing stream response: %w", err)
+		}
+
+		if streamResp.Usage != nil {
+			finalUsage = streamResp.Usage
+		}
+
+		if len(streamResp.Choices) > 0 {
+			if streamResp.Choices[0].Delta.ReasoningContent != "" {
+				chunk := streamResp.Choices[0].Delta.ReasoningContent
+				if printerState == chatPrinterContent {
+					outputFunc("\n\n")
+				}
+				if printerState != chatPrinterReasoning {
+					const thinkingHeader = "Thinking:\n"
+					if reasoningFmt != nil {
+						reasoningFmt.Print(thinkingHeader)
+					} else {
+						outputFunc(thinkingHeader)
+					}
+				}
+				printerState = chatPrinterReasoning
+				if reasoningFmt != nil {
+					reasoningFmt.Print(chunk)
+				} else {
+					outputFunc(chunk)
+				}
+			}
+			if streamResp.Choices[0].Delta.Content != "" {
+				chunk := streamResp.Choices[0].Delta.Content
+				if printerState == chatPrinterReasoning {
+					outputFunc("\n\n--\n\n")
+				}
+				printerState = chatPrinterContent
+				outputFunc(chunk)
+				assistantResponse.WriteString(chunk)
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return assistantResponse.String(), fmt.Errorf("error reading response stream: %w", err)
+	}
+
+	if finalUsage != nil {
+		usageInfo := fmt.Sprintf("\n\nToken usage: %d prompt + %d completion = %d total",
+			finalUsage.PromptTokens,
+			finalUsage.CompletionTokens,
+			finalUsage.TotalTokens)
+
+		usageFmt := color.New(color.FgHiBlack)
+		if !shouldUseMarkdown {
+			usageFmt.DisableColor()
+		}
+		outputFunc(usageFmt.Sprint(usageInfo))
+	}
+
+	return assistantResponse.String(), nil
+}
+
 // ChatWithContext performs a chat request with context support for cancellation and streams the response content with selective markdown rendering.
 func (c *Client) ChatWithContext(ctx context.Context, model, prompt string, imageURLs []string, outputFunc func(string), shouldUseMarkdown bool) error {
 	// Build the message content - either simple string or multimodal array