fix: Resilience to Agent SDK cleanup burps (#231)

daniloc · claude · web-flow · commit 757203855422 · 2026-01-20T11:17:51.000-05:00
Co-authored-by: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/src/lib/__tests__/agent-interface.test.ts b/src/lib/__tests__/agent-interface.test.ts
@@ -0,0 +1,185 @@
+import { runAgent } from '../agent-interface';
+import type { WizardOptions } from '../../utils/types';
+
+// Mock dependencies
+jest.mock('../../utils/clack');
+jest.mock('../../utils/analytics');
+jest.mock('../../utils/debug');
+
+// Mock the SDK module
+const mockQuery = jest.fn();
+jest.mock('@anthropic-ai/claude-agent-sdk', () => ({
+  query: (...args: unknown[]) => mockQuery(...args),
+}));
+
+// Get mocked clack for spinner
+import clack from '../../utils/clack';
+const mockClack = clack as jest.Mocked<typeof clack>;
+
+describe('runAgent', () => {
+  let mockSpinner: {
+    start: jest.Mock;
+    stop: jest.Mock;
+    message: string;
+  };
+
+  const defaultOptions: WizardOptions = {
+    debug: false,
+    installDir: '/test/dir',
+    forceInstall: false,
+    default: false,
+    signup: false,
+    localMcp: false,
+    ci: false,
+  };
+
+  const defaultAgentConfig = {
+    workingDirectory: '/test/dir',
+    mcpServers: {},
+    model: 'claude-opus-4-5-20251101',
+  };
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+
+    mockSpinner = {
+      start: jest.fn(),
+      stop: jest.fn(),
+      message: '',
+    };
+
+    mockClack.spinner = jest.fn().mockReturnValue(mockSpinner);
+    mockClack.log = {
+      step: jest.fn(),
+      success: jest.fn(),
+      error: jest.fn(),
+      warn: jest.fn(),
+      warning: jest.fn(),
+      info: jest.fn(),
+      message: jest.fn(),
+    };
+  });
+
+  describe('race condition handling', () => {
+    it('should return success when agent completes successfully then SDK cleanup fails', async () => {
+      // This simulates the race condition:
+      // 1. Agent completes with success result
+      // 2. signalDone() is called, completing the prompt generator
+      // 3. SDK tries to send cleanup command while streaming is active
+      // 4. SDK throws an error
+      // The fix should recognize we already got a success and return success anyway
+
+      function* mockGeneratorWithCleanupError() {
+        yield {
+          type: 'system',
+          subtype: 'init',
+          model: 'claude-opus-4-5-20251101',
+          tools: [],
+          mcp_servers: [],
+        };
+
+        yield {
+          type: 'result',
+          subtype: 'success',
+          is_error: false,
+          result: 'Agent completed successfully',
+        };
+
+        // Simulate the SDK cleanup error that occurs after success
+        throw new Error('only prompt commands are supported in streaming mode');
+      }
+
+      mockQuery.mockReturnValue(mockGeneratorWithCleanupError());
+
+      const result = await runAgent(
+        defaultAgentConfig,
+        'test prompt',
+        defaultOptions,
+        mockSpinner as unknown as ReturnType<typeof clack.spinner>,
+        {
+          successMessage: 'Test success',
+          errorMessage: 'Test error',
+        },
+      );
+
+      // Should return success (empty object), not throw
+      expect(result).toEqual({});
+      expect(mockSpinner.stop).toHaveBeenCalledWith('Test success');
+    });
+
+    it('should still throw when no success result was received before error', async () => {
+      // If we never got a success result, errors should propagate normally
+
+      function* mockGeneratorWithOnlyError() {
+        yield {
+          type: 'system',
+          subtype: 'init',
+          model: 'claude-opus-4-5-20251101',
+          tools: [],
+          mcp_servers: [],
+        };
+
+        // No success result, just an error
+        throw new Error('Actual SDK error');
+      }
+
+      mockQuery.mockReturnValue(mockGeneratorWithOnlyError());
+
+      await expect(
+        runAgent(
+          defaultAgentConfig,
+          'test prompt',
+          defaultOptions,
+          mockSpinner as unknown as ReturnType<typeof clack.spinner>,
+          {
+            successMessage: 'Test success',
+            errorMessage: 'Test error',
+          },
+        ),
+      ).rejects.toThrow('Actual SDK error');
+
+      expect(mockSpinner.stop).toHaveBeenCalledWith('Test error');
+    });
+
+    it('should not treat error results as success', async () => {
+      // A result with is_error: true should not count as success
+      // Even if subtype is 'success', the is_error flag takes precedence
+
+      function* mockGeneratorWithErrorResult() {
+        yield {
+          type: 'system',
+          subtype: 'init',
+          model: 'claude-opus-4-5-20251101',
+          tools: [],
+          mcp_servers: [],
+        };
+
+        yield {
+          type: 'result',
+          subtype: 'success', // subtype can be success but is_error true
+          is_error: true,
+          result: 'API Error: 500 Internal Server Error',
+        };
+
+        throw new Error('Process exited with code 1');
+      }
+
+      mockQuery.mockReturnValue(mockGeneratorWithErrorResult());
+
+      const result = await runAgent(
+        defaultAgentConfig,
+        'test prompt',
+        defaultOptions,
+        mockSpinner as unknown as ReturnType<typeof clack.spinner>,
+        {
+          successMessage: 'Test success',
+          errorMessage: 'Test error',
+        },
+      );
+
+      // Should return API error, not success
+      expect(result.error).toBe('WIZARD_API_ERROR');
+      expect(result.message).toContain('API Error');
+    });
+  });
+});
diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts
@@ -288,6 +288,8 @@ export function initializeAgent(
     const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost);
     process.env.ANTHROPIC_BASE_URL = gatewayUrl;
     process.env.ANTHROPIC_AUTH_TOKEN = config.posthogApiKey;
+    // Use CLAUDE_CODE_OAUTH_TOKEN to override any stored /login credentials
+    process.env.CLAUDE_CODE_OAUTH_TOKEN = config.posthogApiKey;
     // Disable experimental betas (like input_examples) that the LLM gateway doesn't support
     process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = 'true';
 
@@ -377,6 +379,8 @@ export async function runAgent(
 
   const startTime = Date.now();
   const collectedText: string[] = [];
+  // Track if we received a successful result (before any cleanup errors)
+  let receivedSuccessResult = false;
 
   // Workaround for SDK bug: stdin closes before canUseTool responses can be sent.
   // The fix is to use an async generator for the prompt that stays open until
@@ -398,6 +402,31 @@ export async function runAgent(
     await resultReceived;
   };
 
+  // Helper to handle successful completion (used in normal path and race condition recovery)
+  const completeWithSuccess = (
+    suppressedError?: Error,
+  ): { error?: AgentErrorType; message?: string } => {
+    const durationMs = Date.now() - startTime;
+    const durationSeconds = Math.round(durationMs / 1000);
+
+    if (suppressedError) {
+      logToFile(
+        `Ignoring post-completion error, agent completed successfully in ${durationSeconds}s`,
+      );
+      logToFile('Suppressed error:', suppressedError.message);
+    } else {
+      logToFile(`Agent run completed in ${durationSeconds}s`);
+    }
+
+    analytics.capture(WIZARD_INTERACTION_EVENT_NAME, {
+      action: 'agent integration completed',
+      duration_ms: durationMs,
+      duration_seconds: durationSeconds,
+    });
+    spinner.stop(successMessage);
+    return {};
+  };
+
   try {
     // Tools needed for the wizard:
     // - File operations: Read, Write, Edit
@@ -428,7 +457,11 @@ export async function runAgent(
         settingSources: ['project'],
         // Explicitly enable required tools including Skill
         allowedTools,
-        env: { ...process.env },
+        env: {
+          ...process.env,
+          // Prevent user's Anthropic API key from overriding the wizard's OAuth token
+          ANTHROPIC_API_KEY: undefined,
+        },
         canUseTool: (toolName: string, input: unknown) => {
           logToFile('canUseTool called:', { toolName, input });
           const result = wizardCanUseTool(
@@ -454,11 +487,15 @@ export async function runAgent(
       handleSDKMessage(message, options, spinner, collectedText);
       // Signal completion when result received
       if (message.type === 'result') {
+        // Track successful results before any potential cleanup errors
+        // The SDK may emit a second error result during cleanup due to a race condition
+        if (message.subtype === 'success' && !message.is_error) {
+          receivedSuccessResult = true;
+        }
         signalDone!();
       }
     }
 
-    const durationMs = Date.now() - startTime;
     const outputText = collectedText.join('\n');
 
     // Check for error markers in the agent's output
@@ -487,19 +524,19 @@ export async function runAgent(
       return { error: AgentErrorType.API_ERROR, message: outputText };
     }
 
-    logToFile(`Agent run completed in ${Math.round(durationMs / 1000)}s`);
-    analytics.capture(WIZARD_INTERACTION_EVENT_NAME, {
-      action: 'agent integration completed',
-      duration_ms: durationMs,
-      duration_seconds: Math.round(durationMs / 1000),
-    });
-
-    spinner.stop(successMessage);
-    return {};
+    return completeWithSuccess();
   } catch (error) {
     // Signal done to unblock the async generator
     signalDone!();
 
+    // If we already received a successful result, the error is from SDK cleanup
+    // This happens due to a race condition: the SDK tries to send a cleanup command
+    // after the prompt stream closes, but streaming mode is still active.
+    // See: https://github.com/anthropics/claude-agent-sdk-typescript/issues/41
+    if (receivedSuccessResult) {
+      return completeWithSuccess(error as Error);
+    }
+
     // Check if we collected an API error before the exception was thrown
     const outputText = collectedText.join('\n');