cagent/pkg/cli/runner.go at 363438529a5d1b38cb68ace6b832b7eaba7335c5 · docker/cagent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
package cli

import (
	"cmp"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log/slog"
	"os"
	"path/filepath"
	"strings"

	"github.com/docker/cagent/pkg/chat"
	"github.com/docker/cagent/pkg/input"
	"github.com/docker/cagent/pkg/runtime"
	"github.com/docker/cagent/pkg/session"
	"github.com/docker/cagent/pkg/telemetry"
)

// RuntimeError wraps runtime errors to distinguish them from usage errors
type RuntimeError struct {
	Err error
}

func (e RuntimeError) Error() string {
	return e.Err.Error()
}

func (e RuntimeError) Unwrap() error {
	return e.Err
}

// maxAutoExtensions is the maximum number of times --yolo mode will
// auto-continue when max iterations is reached, to prevent infinite loops.
const maxAutoExtensions = 5

// maxIterAction describes what the caller should do after a MaxIterationsReachedEvent.
type maxIterAction int

const (
	maxIterContinue maxIterAction = iota // auto-approved, keep running
	maxIterStop                          // safety cap reached, caller should stop
	maxIterPrompt                        // not in yolo mode, caller should prompt the user
)

// handleMaxIterationsAutoApprove decides whether to auto-extend iterations in
// --yolo mode. Returns maxIterContinue (approved), maxIterStop (cap reached),
// or maxIterPrompt (not in auto-approve mode, caller should ask the user).
func handleMaxIterationsAutoApprove(autoApprove bool, autoExtensions *int, maxIter int) maxIterAction {
	if !autoApprove {
		return maxIterPrompt
	}
	*autoExtensions++
	if *autoExtensions <= maxAutoExtensions {
		slog.Info("Auto-extending iterations in yolo mode",
			"extension", *autoExtensions,
			"max_extensions", maxAutoExtensions,
			"current_max", maxIter)
		return maxIterContinue
	}
	slog.Warn("Max auto-extensions reached in yolo mode, stopping",
		"total_extensions", *autoExtensions)
	return maxIterStop
}

// Config holds configuration for running an agent in CLI mode
type Config struct {
	AppName        string
	AttachmentPath string
	AutoApprove    bool
	HideToolCalls  bool
	OutputJSON     bool
}

// Run executes an agent in non-TUI mode, handling user input and runtime events.
// userMessages contains the user messages to send. If a single message is "-",
// input is read from stdin. If empty, an interactive prompt loop is started.
func Run(ctx context.Context, out *Printer, cfg Config, rt runtime.Runtime, sess *session.Session, userMessages []string) error {
	// Create a cancellable context for this agentic loop and wire Ctrl+C to cancel it
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()

	// Ensure telemetry is initialized and add to context so runtime can access it
	telemetry.EnsureGlobalTelemetryInitialized()
	if telemetryClient := telemetry.GetGlobalTelemetryClient(); telemetryClient != nil {
		ctx = telemetry.WithClient(ctx, telemetryClient)
	}

	sess.Title = "Running agent"
	// If the last received event was an error, return it. That way the exit code
	// will be non-zero if the agent failed.
	var lastErr error

	oneLoop := func(text string, rd io.Reader) error {
		autoExtensions := 0

		userInput := strings.TrimSpace(text)
		if userInput == "" {
			return nil
		}

		sess.AddMessage(PrepareUserMessage(ctx, rt, userInput, cfg.AttachmentPath))

		if cfg.OutputJSON {
			for event := range rt.RunStream(ctx, sess) {
				switch e := event.(type) {
				case *runtime.ToolCallConfirmationEvent:
					if !cfg.AutoApprove {
						rt.Resume(ctx, runtime.ResumeReject(""))
					}
				case *runtime.MaxIterationsReachedEvent:
					switch handleMaxIterationsAutoApprove(cfg.AutoApprove, &autoExtensions, e.MaxIterations) {
					case maxIterContinue:
						rt.Resume(ctx, runtime.ResumeApprove())
					default: // maxIterStop or maxIterPrompt (no interactive prompt in JSON mode)
						rt.Resume(ctx, runtime.ResumeReject(""))
						return nil
					}
				case *runtime.ErrorEvent:
					return fmt.Errorf("%s", e.Error)
				}

				buf, err := json.Marshal(event)
				if err != nil {
					return err
				}
				out.Println(string(buf))
			}

			return nil
		}

		firstLoop := true
		lastAgent := rt.CurrentAgentName()
		var lastConfirmedToolCallID string
		for event := range rt.RunStream(ctx, sess) {
			agentName := event.GetAgentName()
			if agentName != "" && (firstLoop || lastAgent != agentName) {
				if !firstLoop {
					out.Println()
				}
				out.PrintAgentName(agentName)
				firstLoop = false
				lastAgent = agentName
			}
			switch e := event.(type) {
			case *runtime.AgentChoiceEvent:
				out.Print(e.Content)
			case *runtime.AgentChoiceReasoningEvent:
				out.Print(e.Content)
			case *runtime.ToolCallConfirmationEvent:
				result := out.PrintToolCallWithConfirmation(ctx, e.ToolCall, rd)
				// If interrupted, skip resuming; the runtime will notice context cancellation and stop
				if ctx.Err() != nil {
					continue
				}
				lastConfirmedToolCallID = e.ToolCall.ID // Store the ID to avoid duplicate printing
				switch result {
				case ConfirmationApprove:
					rt.Resume(ctx, runtime.ResumeApprove())
				case ConfirmationApproveSession:
					sess.ToolsApproved = true
					rt.Resume(ctx, runtime.ResumeApproveSession())
				case ConfirmationReject:
					rt.Resume(ctx, runtime.ResumeReject(""))
					lastConfirmedToolCallID = "" // Clear on reject since tool won't execute
				case ConfirmationAbort:
					// Stop the agent loop immediately
					cancel()
					continue
				}
			case *runtime.ToolCallEvent:
				if cfg.HideToolCalls {
					continue
				}
				// Only print if this wasn't already shown during confirmation
				if e.ToolCall.ID != lastConfirmedToolCallID {
					out.PrintToolCall(e.ToolCall)
				}
			case *runtime.ToolCallResponseEvent:
				if cfg.HideToolCalls {
					continue
				}
				out.PrintToolCallResponse(e.ToolCall, e.Response)
				// Clear the confirmed ID after the tool completes
				if e.ToolCall.ID == lastConfirmedToolCallID {
					lastConfirmedToolCallID = ""
				}
			case *runtime.ErrorEvent:
				lowerErr := strings.ToLower(e.Error)
				if strings.Contains(lowerErr, "context cancel") && ctx.Err() != nil { // treat Ctrl+C cancellations as non-errors
					lastErr = nil
				} else {
					lastErr = fmt.Errorf("%s", e.Error)
					out.PrintError(lastErr)
				}
			case *runtime.MaxIterationsReachedEvent:
				switch handleMaxIterationsAutoApprove(cfg.AutoApprove, &autoExtensions, e.MaxIterations) {
				case maxIterContinue:
					rt.Resume(ctx, runtime.ResumeApprove())
				case maxIterStop:
					rt.Resume(ctx, runtime.ResumeReject(""))
					return nil
				case maxIterPrompt:
					result := out.PromptMaxIterationsContinue(ctx, e.MaxIterations)
					switch result {
					case ConfirmationApprove:
						rt.Resume(ctx, runtime.ResumeApprove())
					case ConfirmationReject:
						rt.Resume(ctx, runtime.ResumeReject(""))
						return nil
					case ConfirmationAbort:
						rt.Resume(ctx, runtime.ResumeReject(""))
						return nil
					}
				}
			case *runtime.ElicitationRequestEvent:
				serverURL, ok := e.Meta["cagent/server_url"].(string)
				if !ok || serverURL == "" {
					slog.Warn("Skipping elicitation: missing or invalid server_url (non-interactive session?)")
					_ = rt.ResumeElicitation(ctx, "decline", nil)
					return nil
				}

				result := out.PromptOAuthAuthorization(ctx, serverURL)

				if ctx.Err() != nil {
					return ctx.Err()
				}

				switch result {
				case ConfirmationApprove:
					_ = rt.ResumeElicitation(ctx, "accept", nil)
				case ConfirmationReject:
					_ = rt.ResumeElicitation(ctx, "decline", nil)
					return fmt.Errorf("OAuth authorization rejected by user")
				}
			}
		}

		// Wrap runtime errors to prevent duplicate error messages and usage display
		if lastErr != nil {
			return RuntimeError{Err: lastErr}
		}
		return nil
	}

	switch {
	case len(userMessages) == 1 && userMessages[0] == "-":
		// Single "-" argument: read from stdin
		buf, err := io.ReadAll(os.Stdin)
		if err != nil {
			return fmt.Errorf("failed to read from stdin: %w", err)
		}

		if err := oneLoop(string(buf), os.Stdin); err != nil {
			return err
		}
	case len(userMessages) > 0:
		// One or more messages: multi-turn conversation
		for _, msg := range userMessages {
			if err := oneLoop(msg, os.Stdin); err != nil {
				return err
			}
		}
	default:
		// No messages: interactive prompt loop
		out.PrintWelcomeMessage(cfg.AppName)
		firstQuestion := true
		for {
			if !firstQuestion {
				out.Println()
				out.Println()
			}
			out.Print("> ")
			firstQuestion = false

			line, err := input.ReadLine(ctx, os.Stdin)
			if err != nil {
				return err
			}

			if err := oneLoop(line, os.Stdin); err != nil {
				return err
			}
		}
	}

	// Wrap runtime errors to prevent duplicate error messages and usage display
	if lastErr != nil {
		return RuntimeError{Err: lastErr}
	}
	return nil
}

// PrepareUserMessage resolves commands, parses /attach directives, and creates
// a user message with optional image attachment. This is the common flow for
// both TUI and CLI modes.
//
// Parameters:
//   - ctx: context for command resolution
//   - rt: runtime for command resolution
//   - userInput: the raw user input (may contain /commands and /attach directives)
//   - globalAttachPath: attachment path from --attach flag (can be empty)
//
// Returns the prepared session.Message ready to be added to the session.
func PrepareUserMessage(ctx context.Context, rt runtime.Runtime, userInput, globalAttachPath string) *session.Message {
	// Resolve any /command to its prompt text
	resolvedContent := runtime.ResolveCommand(ctx, rt, userInput)

	// Parse for /attach commands in the message
	messageText, attachPath := ParseAttachCommand(resolvedContent)

	// Use either the per-message attachment or the global one
	finalAttachPath := cmp.Or(attachPath, globalAttachPath)

	return CreateUserMessageWithAttachment(messageText, finalAttachPath)
}

// ParseAttachCommand parses user input for /attach commands
// Returns the message text (with /attach commands removed) and the attachment path
func ParseAttachCommand(userInput string) (messageText, attachPath string) {
	lines := strings.Split(userInput, "\n")
	var messageLines []string

	for _, line := range lines {
		// Look for /attach anywhere in the line
		attachIndex := strings.Index(line, "/attach ")
		if attachIndex != -1 {
			// Extract the part before /attach
			beforeAttach := line[:attachIndex]

			// Extract the part after /attach (starting after "/attach ")
			afterAttachStart := attachIndex + 8 // Length of "/attach "
			if afterAttachStart < len(line) {
				afterAttach := line[afterAttachStart:]

				// Split on spaces to get the file path (first token) and any remaining text
				tokens := strings.Fields(afterAttach)
				if len(tokens) > 0 {
					attachPath = tokens[0]

					// Reconstruct the line with /attach and file path removed
					var remainingText string
					if len(tokens) > 1 {
						remainingText = strings.Join(tokens[1:], " ")
					}

					// Combine the text before /attach and any text after the file path
					var parts []string
					if strings.TrimSpace(beforeAttach) != "" {
						parts = append(parts, strings.TrimSpace(beforeAttach))
					}
					if remainingText != "" {
						parts = append(parts, remainingText)
					}
					reconstructedLine := strings.Join(parts, " ")
					if reconstructedLine != "" {
						messageLines = append(messageLines, reconstructedLine)
					}
				}
			}
		} else {
			// Keep lines without /attach commands
			messageLines = append(messageLines, line)
		}
	}

	// Join the message lines back together
	messageText = strings.TrimSpace(strings.Join(messageLines, "\n"))
	return messageText, attachPath
}

// CreateUserMessageWithAttachment creates a user message with optional file attachment.
// Text files are inlined directly as text content for cross-provider compatibility.
// Binary files (images, PDFs) are stored as file references for provider-specific upload.
func CreateUserMessageWithAttachment(userContent, attachmentPath string) *session.Message {
	if attachmentPath == "" {
		return session.UserMessage(userContent)
	}

	// Validate file exists
	absPath, err := filepath.Abs(attachmentPath)
	if err != nil {
		slog.Warn("Failed to get absolute path for attachment", "path", attachmentPath, "error", err)
		return session.UserMessage(userContent)
	}

	fi, err := os.Stat(absPath)
	if err != nil {
		slog.Warn("Attachment file not accessible", "path", absPath, "error", err)
		return session.UserMessage(userContent)
	}

	// Ensure we have some text content when attaching a file
	textContent := cmp.Or(strings.TrimSpace(userContent), "Please analyze this attached file.")

	multiContent := []chat.MessagePart{
		{
			Type: chat.MessagePartTypeText,
			Text: textContent,
		},
	}

	switch {
	case chat.IsTextFile(absPath):
		// Text files are inlined directly as text content.
		if fi.Size() > chat.MaxInlineFileSize {
			slog.Warn("Attachment text file too large to inline", "path", absPath, "size", fi.Size())
			return session.UserMessage(userContent)
		}
		content, err := chat.ReadFileForInline(absPath)
		if err != nil {
			slog.Warn("Failed to read attachment file", "path", absPath, "error", err)
			return session.UserMessage(userContent)
		}
		multiContent = append(multiContent, chat.MessagePart{
			Type: chat.MessagePartTypeText,
			Text: content,
		})

	default:
		// Binary files (images, PDFs) are kept as file references.
		mimeType := chat.DetectMimeType(absPath)
		if !chat.IsSupportedMimeType(mimeType) {
			slog.Warn("Unsupported attachment file type", "path", absPath, "mime_type", mimeType)
			return session.UserMessage(userContent)
		}
		multiContent = append(multiContent, chat.MessagePart{
			Type: chat.MessagePartTypeFile,
			File: &chat.MessageFile{
				Path:     absPath,
				MimeType: mimeType,
			},
		})
	}

	return session.UserMessage(textContent, multiContent...)
}