diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 0000000..c7fe86b --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,29 @@ +# End-to-End Testing Framework + +This directory contains the end-to-end (E2E) testing framework for the AgentAPI project. The framework simulates realistic agent interactions using a script-based approach with JSON configuration files. + +## TL;DR + +```shell +go test ./e2e +``` + +## How it Works + +The testing framework (`echo_test.go`) does the following: +- Reads a file in `testdata/`. +- Starts the AgentAPI server with a fake agent (`echo.go`). This fake agent reads the scripted conversation from the specified JSON file. +- The testing framework then sends messages to the fake agent. +- The fake agent validates the expected messages and sends predefined responses. +- The testing framework validates the actual responses against expected outcomes. + +## Adding a new test + +1. Create a new JSON file in `testdata/` with a unique name. +2. Define the scripted conversation in the JSON file. Each message must have the following fields: + - `expectMessage`: The message from the user that the fake agent expects. + - `thinkDurationMS`: How long the fake agent should 'think' before responding. + - `responseMessage`: The message the fake agent should respond with. +3. Add a new test case in `echo_test.go` that references the newly created JSON file. + > Be sure that the name of the test case exactly matches the name of the JSON file. +4. Run the E2E tests to verify the new test case. diff --git a/e2e/echo.go b/e2e/echo.go new file mode 100644 index 0000000..7388d5e --- /dev/null +++ b/e2e/echo.go @@ -0,0 +1,210 @@ +package main + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "os" + "os/signal" + "regexp" + "strings" + "time" + + "github.com/acarl005/stripansi" + st "github.com/coder/agentapi/lib/screentracker" +) + +type ScriptEntry struct { + ExpectMessage string `json:"expectMessage"` + ThinkDurationMS int64 `json:"thinkDurationMS"` + ResponseMessage string `json:"responseMessage"` +} + +func main() { + if len(os.Args) != 2 { + fmt.Println("Usage: echo ") + os.Exit(1) + } + + runEchoAgent(os.Args[1]) +} + +func loadScript(scriptPath string) ([]ScriptEntry, error) { + data, err := os.ReadFile(scriptPath) + if err != nil { + return nil, fmt.Errorf("failed to read script file: %w", err) + } + + var script []ScriptEntry + if err := json.Unmarshal(data, &script); err != nil { + return nil, fmt.Errorf("failed to parse script JSON: %w", err) + } + + return script, nil +} + +func runEchoAgent(scriptPath string) { + script, err := loadScript(scriptPath) + if err != nil { + fmt.Printf("Error loading script: %v\n", err) + os.Exit(1) + } + + if len(script) == 0 { + fmt.Println("Script is empty") + os.Exit(1) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt) + go func() { + for { + select { + case <-sigCh: + cancel() + fmt.Println("Exiting...") + os.Exit(0) + case <-ctx.Done(): + return + } + } + }() + + var messages []st.ConversationMessage + redrawTerminal(messages, false) + + scriptIndex := 0 + scanner := bufio.NewScanner(os.Stdin) + + for scriptIndex < len(script) { + entry := script[scriptIndex] + expectedMsg := strings.TrimSpace(entry.ExpectMessage) + + // Handle initial/follow-up messages (empty ExpectMessage) + if expectedMsg == "" { + // Show thinking state if there's a delay + if entry.ThinkDurationMS > 0 { + redrawTerminal(messages, true) + spinnerCtx, spinnerCancel := context.WithCancel(ctx) + go runSpinner(spinnerCtx) + time.Sleep(time.Duration(entry.ThinkDurationMS) * time.Millisecond) + if spinnerCancel != nil { + spinnerCancel() + } + } + + messages = append(messages, st.ConversationMessage{ + Role: st.ConversationRoleAgent, + Message: entry.ResponseMessage, + Time: time.Now(), + }) + redrawTerminal(messages, false) + scriptIndex++ + continue + } + + // Wait for user input for non-initial messages + if !scanner.Scan() { + break + } + + input := scanner.Text() + input = cleanTerminalInput(input) + if input == "" { + continue + } + + if input != expectedMsg { + fmt.Printf("Error: Expected message '%s' but received '%s'\n", expectedMsg, input) + os.Exit(1) + } + + messages = append(messages, st.ConversationMessage{ + Role: st.ConversationRoleUser, + Message: entry.ExpectMessage, + Time: time.Now(), + }) + redrawTerminal(messages, false) + + // Show thinking state if there's a delay + if entry.ThinkDurationMS > 0 { + redrawTerminal(messages, true) + spinnerCtx, spinnerCancel := context.WithCancel(ctx) + go runSpinner(spinnerCtx) + time.Sleep(time.Duration(entry.ThinkDurationMS) * time.Millisecond) + spinnerCancel() + } + + messages = append(messages, st.ConversationMessage{ + Role: st.ConversationRoleAgent, + Message: entry.ResponseMessage, + Time: time.Now(), + }) + redrawTerminal(messages, false) + scriptIndex++ + } + + // Now just do nothing. + <-make(chan struct{}) +} + +func redrawTerminal(messages []st.ConversationMessage, thinking bool) { + fmt.Print("\033[2J\033[H") // Clear screen and move cursor to home + + // Show conversation history + for _, msg := range messages { + if msg.Role == st.ConversationRoleUser { + fmt.Printf("> %s\n", msg.Message) + } else { + fmt.Printf("%s\n", msg.Message) + } + } + + if thinking { + fmt.Print("Thinking... ") + } else { + fmt.Print("> ") + } +} + +func cleanTerminalInput(input string) string { + // Strip ANSI escape sequences + input = stripansi.Strip(input) + + // Remove bracketed paste mode sequences (^[[200~ and ^[[201~) + bracketedPasteRe := regexp.MustCompile(`\x1b\[\d+~`) + input = bracketedPasteRe.ReplaceAllString(input, "") + + // Remove backspace sequences (character followed by ^H) + backspaceRe := regexp.MustCompile(`.\x08`) + input = backspaceRe.ReplaceAllString(input, "") + + // Remove other common control characters + input = strings.ReplaceAll(input, "\x08", "") // backspace + input = strings.ReplaceAll(input, "\x7f", "") // delete + input = strings.ReplaceAll(input, "\x1b", "") // escape (if any remain) + + return strings.TrimSpace(input) +} + +func runSpinner(ctx context.Context) { + spinnerChars := []string{"|", "/", "-", "\\"} + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + i := 0 + + for { + select { + case <-ticker.C: + fmt.Printf("\rThinking %s", spinnerChars[i%len(spinnerChars)]) + i++ + case <-ctx.Done(): + // Clear spinner on cancellation + fmt.Print("\r" + strings.Repeat(" ", 20) + "\r") + return + } + } +} diff --git a/e2e/echo_test.go b/e2e/echo_test.go new file mode 100644 index 0000000..5784027 --- /dev/null +++ b/e2e/echo_test.go @@ -0,0 +1,237 @@ +package main_test + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + agentapisdk "github.com/coder/agentapi-sdk-go" + "github.com/stretchr/testify/require" +) + +const ( + testTimeout = 30 * time.Second + operationTimeout = 5 * time.Second + healthCheckTimeout = 10 * time.Second +) + +type ScriptEntry struct { + ExpectMessage string `json:"expectMessage"` + ThinkDurationMS int64 `json:"thinkDurationMS"` + ResponseMessage string `json:"responseMessage"` +} + +func TestE2E(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + t.Run("basic", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + script, apiClient := setup(ctx, t) + require.NoError(t, waitAgentAPIStable(ctx, apiClient, operationTimeout)) + messageReq := agentapisdk.PostMessageParams{ + Content: "This is a test message.", + Type: agentapisdk.MessageTypeUser, + } + _, err := apiClient.PostMessage(ctx, messageReq) + require.NoError(t, err, "Failed to send message via SDK") + require.NoError(t, waitAgentAPIStable(ctx, apiClient, operationTimeout)) + msgResp, err := apiClient.GetMessages(ctx) + require.NoError(t, err, "Failed to get messages via SDK") + require.Len(t, msgResp.Messages, 3) + require.Equal(t, script[0].ResponseMessage, strings.TrimSpace(msgResp.Messages[0].Content)) + require.Equal(t, script[1].ExpectMessage, strings.TrimSpace(msgResp.Messages[1].Content)) + require.Equal(t, script[1].ResponseMessage, strings.TrimSpace(msgResp.Messages[2].Content)) + }) + + t.Run("thinking", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + script, apiClient := setup(ctx, t) + messageReq := agentapisdk.PostMessageParams{ + Content: "What is the answer to life, the universe, and everything?", + Type: agentapisdk.MessageTypeUser, + } + _, err := apiClient.PostMessage(ctx, messageReq) + require.NoError(t, err, "Failed to send message via SDK") + statusResp, err := apiClient.GetStatus(ctx) + require.NoError(t, err) + require.Equal(t, agentapisdk.StatusRunning, statusResp.Status) + require.NoError(t, waitAgentAPIStable(ctx, apiClient, 5*time.Second)) + msgResp, err := apiClient.GetMessages(ctx) + require.NoError(t, err, "Failed to get messages via SDK") + require.Len(t, msgResp.Messages, 3) + require.Equal(t, script[0].ResponseMessage, strings.TrimSpace(msgResp.Messages[0].Content)) + require.Equal(t, script[1].ExpectMessage, strings.TrimSpace(msgResp.Messages[1].Content)) + parts := strings.Split(msgResp.Messages[2].Content, "\n") + require.Len(t, parts, 2) + require.Equal(t, script[1].ResponseMessage, strings.TrimSpace(parts[0])) + require.Equal(t, script[2].ResponseMessage, strings.TrimSpace(parts[1])) + }) +} + +func setup(ctx context.Context, t testing.TB) ([]ScriptEntry, *agentapisdk.Client) { + t.Helper() + + scriptFilePath := filepath.Join("testdata", filepath.Base(t.Name())+".json") + data, err := os.ReadFile(scriptFilePath) + require.NoError(t, err, "Failed to read test script file: %s", scriptFilePath) + + var script []ScriptEntry + err = json.Unmarshal(data, &script) + require.NoError(t, err, "Failed to unmarshal script from %s", scriptFilePath) + + binaryPath := os.Getenv("AGENTAPI_BINARY_PATH") + if binaryPath == "" { + cwd, err := os.Getwd() + require.NoError(t, err, "Failed to get current working directory") + binaryPath = filepath.Join(cwd, "..", "out", "agentapi") + _, err = os.Stat(binaryPath) + if err != nil { + t.Logf("Building binary at %s", binaryPath) + buildCmd := exec.CommandContext(ctx, "go", "build", "-o", binaryPath, ".") + buildCmd.Dir = filepath.Join(cwd, "..") + t.Logf("run: %s", buildCmd.String()) + require.NoError(t, buildCmd.Run(), "Failed to build binary") + } + } + + serverPort, err := getFreePort() + require.NoError(t, err, "Failed to get free port for server") + + cwd, err := os.Getwd() + require.NoError(t, err, "Failed to get current working directory") + + cmd := exec.CommandContext(ctx, binaryPath, "server", + fmt.Sprintf("--port=%d", serverPort), + "--", + "go", "run", filepath.Join(cwd, "echo.go"), scriptFilePath) + + // Capture output for debugging + stdout, err := cmd.StdoutPipe() + require.NoError(t, err, "Failed to create stdout pipe") + + stderr, err := cmd.StderrPipe() + require.NoError(t, err, "Failed to create stderr pipe") + + // Start process + err = cmd.Start() + require.NoError(t, err, "Failed to start agentapi server") + + // Log output in background + var wg sync.WaitGroup + wg.Add(2) + + go func() { + defer wg.Done() + logOutput(t, "SERVER-STDOUT", stdout) + }() + + go func() { + defer wg.Done() + logOutput(t, "SERVER-STDERR", stderr) + }() + + // Clean up process + t.Cleanup(func() { + if cmd.Process != nil { + _ = cmd.Process.Kill() + _ = cmd.Wait() + } + wg.Wait() + }) + + serverURL := fmt.Sprintf("http://localhost:%d", serverPort) + require.NoError(t, waitForServer(ctx, t, serverURL, healthCheckTimeout), "Server not ready") + apiClient, err := agentapisdk.NewClient(serverURL) + require.NoError(t, err, "Failed to create agentapi SDK client") + + require.NoError(t, waitAgentAPIStable(ctx, apiClient, operationTimeout)) + return script, apiClient +} + +// logOutput logs process output with prefix +func logOutput(t testing.TB, prefix string, r io.Reader) { + t.Helper() + scanner := bufio.NewScanner(r) + for scanner.Scan() { + t.Logf("[%s] %s", prefix, scanner.Text()) + } +} + +// waitForServer waits for a server to be ready +func waitForServer(ctx context.Context, t testing.TB, url string, timeout time.Duration) error { + t.Helper() + client := &http.Client{Timeout: time.Second} + healthCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-healthCtx.Done(): + require.Failf(t, "failed to start server", "server at %s not ready within timeout: %w", url, healthCtx.Err()) + case <-ticker.C: + resp, err := client.Get(url) + if err == nil { + _ = resp.Body.Close() + return nil + } + t.Logf("Server not ready yet: %s", err) + } + } +} + +func waitAgentAPIStable(ctx context.Context, apiClient *agentapisdk.Client, waitFor time.Duration) error { + waitCtx, waitCancel := context.WithTimeout(ctx, waitFor) + defer waitCancel() + + tick := time.NewTicker(100 * time.Millisecond) + defer tick.Stop() + for { + select { + case <-waitCtx.Done(): + return waitCtx.Err() + case <-tick.C: + sr, err := apiClient.GetStatus(ctx) + if err != nil { + continue + } + if sr.Status == agentapisdk.StatusStable { + return nil + } + } + } +} + +// getFreePort returns a free TCP port +func getFreePort() (int, error) { + addr, err := net.ResolveTCPAddr("tcp", "localhost:0") + if err != nil { + return 0, err + } + + l, err := net.ListenTCP("tcp", addr) + if err != nil { + return 0, err + } + defer func() { _ = l.Close() }() + + return l.Addr().(*net.TCPAddr).Port, nil +} diff --git a/e2e/testdata/basic.json b/e2e/testdata/basic.json new file mode 100644 index 0000000..35fe5b6 --- /dev/null +++ b/e2e/testdata/basic.json @@ -0,0 +1,10 @@ +[ + { + "expectMessage": "", + "responseMessage": "Hello! I'm ready to help you. Please send me a message to echo back." + }, + { + "expectMessage": "This is a test message.", + "responseMessage": "Echo: This is a test message." + } +] diff --git a/e2e/testdata/thinking.json b/e2e/testdata/thinking.json new file mode 100644 index 0000000..29ac044 --- /dev/null +++ b/e2e/testdata/thinking.json @@ -0,0 +1,17 @@ +[ + { + "expectMessage": "", + "thinkDurationMS": 1000, + "responseMessage": "Welcome! I'm an AI that enjoys pondering deep questions. Ask me anything!" + }, + { + "expectMessage": "What is the answer to life, the universe, and everything?", + "thinkDurationMS": 0, + "responseMessage": "Gosh, I'll need some time to think about that..." + }, + { + "expectMessage": "", + "thinkDurationMS": 2000, + "responseMessage": "42" + } +] diff --git a/go.mod b/go.mod index 5250f4f..7cddbe6 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,9 @@ go 1.23.2 require ( github.com/ActiveState/termtest/xpty v0.6.0 + github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/charmbracelet/bubbletea v1.3.4 + github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225 github.com/danielgtaylor/huma/v2 v2.32.0 github.com/go-chi/chi/v5 v5.2.2 github.com/go-chi/cors v1.2.1 diff --git a/go.sum b/go.sum index 3406c1a..fbc2ed7 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX github.com/Netflix/go-expect v0.0.0-20180615182759-c93bf25de8e8/go.mod h1:oX5x61PbNXchhh0oikYAH+4Pcfw5LKv21+Jnpr6r6Pc= github.com/Netflix/go-expect v0.0.0-20200312175327-da48e75238e2 h1:y2avNRjCeJT8b7svzjhKZjsvW5Jki/iAqTBEPJURaUg= github.com/Netflix/go-expect v0.0.0-20200312175327-da48e75238e2/go.mod h1:oX5x61PbNXchhh0oikYAH+4Pcfw5LKv21+Jnpr6r6Pc= +github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= +github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= github.com/autarch/testify v1.2.2 h1:9Q9V6zqhP7R6dv+zRUddv6kXKLo6ecQhnFRFWM71i1c= github.com/autarch/testify v1.2.2/go.mod h1:oDbHKfFv2/D5UtVrxkk90OKcb6P4/AqF1Pcf6ZbvDQo= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= @@ -21,6 +23,8 @@ github.com/charmbracelet/x/ansi v0.8.0 h1:9GTq3xq9caJW8ZrBTe0LIe2fvfLR/bYXKTx2ll github.com/charmbracelet/x/ansi v0.8.0/go.mod h1:wdYl/ONOLHLIVmQaxbIYEC/cRKOQyjTkowiI4blgS9Q= github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ= github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= +github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225 h1:tRIViZ5JRmzdOEo5wUWngaGEFBG8OaE1o2GIHN5ujJ8= +github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225/go.mod h1:rNLVpYgEVeu1Zk29K64z6Od8RBP9DwqCu9OfCzh8MR4= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=