diff --git a/src/lib/__tests__/agent-interface.test.ts b/src/lib/__tests__/agent-interface.test.ts new file mode 100644 index 0000000..35a2283 --- /dev/null +++ b/src/lib/__tests__/agent-interface.test.ts @@ -0,0 +1,185 @@ +import { runAgent } from '../agent-interface'; +import type { WizardOptions } from '../../utils/types'; + +// Mock dependencies +jest.mock('../../utils/clack'); +jest.mock('../../utils/analytics'); +jest.mock('../../utils/debug'); + +// Mock the SDK module +const mockQuery = jest.fn(); +jest.mock('@anthropic-ai/claude-agent-sdk', () => ({ + query: (...args: unknown[]) => mockQuery(...args), +})); + +// Get mocked clack for spinner +import clack from '../../utils/clack'; +const mockClack = clack as jest.Mocked; + +describe('runAgent', () => { + let mockSpinner: { + start: jest.Mock; + stop: jest.Mock; + message: string; + }; + + const defaultOptions: WizardOptions = { + debug: false, + installDir: '/test/dir', + forceInstall: false, + default: false, + signup: false, + localMcp: false, + ci: false, + }; + + const defaultAgentConfig = { + workingDirectory: '/test/dir', + mcpServers: {}, + model: 'claude-opus-4-5-20251101', + }; + + beforeEach(() => { + jest.clearAllMocks(); + + mockSpinner = { + start: jest.fn(), + stop: jest.fn(), + message: '', + }; + + mockClack.spinner = jest.fn().mockReturnValue(mockSpinner); + mockClack.log = { + step: jest.fn(), + success: jest.fn(), + error: jest.fn(), + warn: jest.fn(), + warning: jest.fn(), + info: jest.fn(), + message: jest.fn(), + }; + }); + + describe('race condition handling', () => { + it('should return success when agent completes successfully then SDK cleanup fails', async () => { + // This simulates the race condition: + // 1. Agent completes with success result + // 2. signalDone() is called, completing the prompt generator + // 3. SDK tries to send cleanup command while streaming is active + // 4. SDK throws an error + // The fix should recognize we already got a success and return success anyway + + function* mockGeneratorWithCleanupError() { + yield { + type: 'system', + subtype: 'init', + model: 'claude-opus-4-5-20251101', + tools: [], + mcp_servers: [], + }; + + yield { + type: 'result', + subtype: 'success', + is_error: false, + result: 'Agent completed successfully', + }; + + // Simulate the SDK cleanup error that occurs after success + throw new Error('only prompt commands are supported in streaming mode'); + } + + mockQuery.mockReturnValue(mockGeneratorWithCleanupError()); + + const result = await runAgent( + defaultAgentConfig, + 'test prompt', + defaultOptions, + mockSpinner as unknown as ReturnType, + { + successMessage: 'Test success', + errorMessage: 'Test error', + }, + ); + + // Should return success (empty object), not throw + expect(result).toEqual({}); + expect(mockSpinner.stop).toHaveBeenCalledWith('Test success'); + }); + + it('should still throw when no success result was received before error', async () => { + // If we never got a success result, errors should propagate normally + + function* mockGeneratorWithOnlyError() { + yield { + type: 'system', + subtype: 'init', + model: 'claude-opus-4-5-20251101', + tools: [], + mcp_servers: [], + }; + + // No success result, just an error + throw new Error('Actual SDK error'); + } + + mockQuery.mockReturnValue(mockGeneratorWithOnlyError()); + + await expect( + runAgent( + defaultAgentConfig, + 'test prompt', + defaultOptions, + mockSpinner as unknown as ReturnType, + { + successMessage: 'Test success', + errorMessage: 'Test error', + }, + ), + ).rejects.toThrow('Actual SDK error'); + + expect(mockSpinner.stop).toHaveBeenCalledWith('Test error'); + }); + + it('should not treat error results as success', async () => { + // A result with is_error: true should not count as success + // Even if subtype is 'success', the is_error flag takes precedence + + function* mockGeneratorWithErrorResult() { + yield { + type: 'system', + subtype: 'init', + model: 'claude-opus-4-5-20251101', + tools: [], + mcp_servers: [], + }; + + yield { + type: 'result', + subtype: 'success', // subtype can be success but is_error true + is_error: true, + result: 'API Error: 500 Internal Server Error', + }; + + throw new Error('Process exited with code 1'); + } + + mockQuery.mockReturnValue(mockGeneratorWithErrorResult()); + + const result = await runAgent( + defaultAgentConfig, + 'test prompt', + defaultOptions, + mockSpinner as unknown as ReturnType, + { + successMessage: 'Test success', + errorMessage: 'Test error', + }, + ); + + // Should return API error, not success + expect(result.error).toBe('WIZARD_API_ERROR'); + expect(result.message).toContain('API Error'); + }); + }); +}); diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts index 0e43d0d..c67ec25 100644 --- a/src/lib/agent-interface.ts +++ b/src/lib/agent-interface.ts @@ -288,6 +288,8 @@ export function initializeAgent( const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost); process.env.ANTHROPIC_BASE_URL = gatewayUrl; process.env.ANTHROPIC_AUTH_TOKEN = config.posthogApiKey; + // Use CLAUDE_CODE_OAUTH_TOKEN to override any stored /login credentials + process.env.CLAUDE_CODE_OAUTH_TOKEN = config.posthogApiKey; // Disable experimental betas (like input_examples) that the LLM gateway doesn't support process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = 'true'; @@ -377,6 +379,8 @@ export async function runAgent( const startTime = Date.now(); const collectedText: string[] = []; + // Track if we received a successful result (before any cleanup errors) + let receivedSuccessResult = false; // Workaround for SDK bug: stdin closes before canUseTool responses can be sent. // The fix is to use an async generator for the prompt that stays open until @@ -398,6 +402,31 @@ export async function runAgent( await resultReceived; }; + // Helper to handle successful completion (used in normal path and race condition recovery) + const completeWithSuccess = ( + suppressedError?: Error, + ): { error?: AgentErrorType; message?: string } => { + const durationMs = Date.now() - startTime; + const durationSeconds = Math.round(durationMs / 1000); + + if (suppressedError) { + logToFile( + `Ignoring post-completion error, agent completed successfully in ${durationSeconds}s`, + ); + logToFile('Suppressed error:', suppressedError.message); + } else { + logToFile(`Agent run completed in ${durationSeconds}s`); + } + + analytics.capture(WIZARD_INTERACTION_EVENT_NAME, { + action: 'agent integration completed', + duration_ms: durationMs, + duration_seconds: durationSeconds, + }); + spinner.stop(successMessage); + return {}; + }; + try { // Tools needed for the wizard: // - File operations: Read, Write, Edit @@ -428,7 +457,11 @@ export async function runAgent( settingSources: ['project'], // Explicitly enable required tools including Skill allowedTools, - env: { ...process.env }, + env: { + ...process.env, + // Prevent user's Anthropic API key from overriding the wizard's OAuth token + ANTHROPIC_API_KEY: undefined, + }, canUseTool: (toolName: string, input: unknown) => { logToFile('canUseTool called:', { toolName, input }); const result = wizardCanUseTool( @@ -454,11 +487,15 @@ export async function runAgent( handleSDKMessage(message, options, spinner, collectedText); // Signal completion when result received if (message.type === 'result') { + // Track successful results before any potential cleanup errors + // The SDK may emit a second error result during cleanup due to a race condition + if (message.subtype === 'success' && !message.is_error) { + receivedSuccessResult = true; + } signalDone!(); } } - const durationMs = Date.now() - startTime; const outputText = collectedText.join('\n'); // Check for error markers in the agent's output @@ -487,19 +524,19 @@ export async function runAgent( return { error: AgentErrorType.API_ERROR, message: outputText }; } - logToFile(`Agent run completed in ${Math.round(durationMs / 1000)}s`); - analytics.capture(WIZARD_INTERACTION_EVENT_NAME, { - action: 'agent integration completed', - duration_ms: durationMs, - duration_seconds: Math.round(durationMs / 1000), - }); - - spinner.stop(successMessage); - return {}; + return completeWithSuccess(); } catch (error) { // Signal done to unblock the async generator signalDone!(); + // If we already received a successful result, the error is from SDK cleanup + // This happens due to a race condition: the SDK tries to send a cleanup command + // after the prompt stream closes, but streaming mode is still active. + // See: https://github.com/anthropics/claude-agent-sdk-typescript/issues/41 + if (receivedSuccessResult) { + return completeWithSuccess(error as Error); + } + // Check if we collected an API error before the exception was thrown const outputText = collectedText.join('\n');