Skip to content

Commit fbda510

Browse files
dicksontsaiclaude
andauthored
Fix subprocess deadlock with MCP servers via stderr redirection (#103)
## Summary Fixes a critical deadlock issue that occurs when MCP servers produce verbose stderr output. The SDK would hang indefinitely when the stderr pipe buffer filled up. ## The Problem The deadlock occurred due to sequential reading of subprocess streams: 1. SDK reads stdout completely before reading stderr 2. When stderr pipe buffer fills (64KB on Linux, 16KB on macOS), subprocess blocks on write 3. Subprocess can't continue to stdout, parent waits for stdout → **DEADLOCK** 🔒 ## The Solution Redirect stderr to a temporary file instead of a pipe: - **No pipe buffer** = no possibility of deadlock - Temp file can grow as needed (no 64KB limit) - Still capture stderr for error reporting (last 100 lines) - Works consistently across all async backends ## Implementation Details - `stderr=tempfile.NamedTemporaryFile()` instead of `stderr=PIPE` - Use `deque(maxlen=100)` to keep only recent stderr lines in memory - Temp file is automatically cleaned up on disconnect - Add `[stderr truncated, showing last 100 lines]` message when buffer is full ## Testing - Verified no deadlock with 150+ lines of stderr output - Confirmed stderr is still captured for error reporting - All existing tests pass - Works with asyncio, trio, and other anyio backends ## Impact - Fixes consistent hangs in production with MCP servers - No functional regression - stderr handling is preserved - Simpler than concurrent reading alternatives - More robust than pipe-based solutions Fixes the issue reported in Slack where SDK would hang indefinitely when receiving messages from MCP servers with verbose logging. 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <[email protected]>
1 parent 0de87a2 commit fbda510

File tree

1 file changed

+37
-36
lines changed

1 file changed

+37
-36
lines changed

src/claude_code_sdk/_internal/transport/subprocess_cli.py

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import logging
55
import os
66
import shutil
7+
import tempfile
8+
from collections import deque
79
from collections.abc import AsyncIterable, AsyncIterator
810
from pathlib import Path
911
from subprocess import PIPE
@@ -46,6 +48,7 @@ def __init__(
4648
self._request_counter = 0
4749
self._close_stdin_after_prompt = close_stdin_after_prompt
4850
self._task_group: anyio.abc.TaskGroup | None = None
51+
self._stderr_file: Any = None # tempfile.NamedTemporaryFile
4952

5053
def _find_cli(self) -> str:
5154
"""Find Claude Code CLI binary."""
@@ -143,20 +146,24 @@ async def connect(self) -> None:
143146

144147
cmd = self._build_command()
145148
try:
149+
# Create a temp file for stderr to avoid pipe buffer deadlock
150+
# We can't use context manager as we need it for the subprocess lifetime
151+
self._stderr_file = tempfile.NamedTemporaryFile( # noqa: SIM115
152+
mode="w+", prefix="claude_stderr_", suffix=".log", delete=False
153+
)
154+
146155
# Enable stdin pipe for both modes (but we'll close it for string mode)
147156
self._process = await anyio.open_process(
148157
cmd,
149158
stdin=PIPE,
150159
stdout=PIPE,
151-
stderr=PIPE,
160+
stderr=self._stderr_file,
152161
cwd=self._cwd,
153162
env={**os.environ, "CLAUDE_CODE_ENTRYPOINT": "sdk-py"},
154163
)
155164

156165
if self._process.stdout:
157166
self._stdout_stream = TextReceiveStream(self._process.stdout)
158-
if self._process.stderr:
159-
self._stderr_stream = TextReceiveStream(self._process.stderr)
160167

161168
# Handle stdin based on mode
162169
if self._is_streaming:
@@ -204,6 +211,15 @@ async def disconnect(self) -> None:
204211
except ProcessLookupError:
205212
pass
206213

214+
# Clean up temp file
215+
if self._stderr_file:
216+
try:
217+
self._stderr_file.close()
218+
Path(self._stderr_file.name).unlink()
219+
except Exception:
220+
pass
221+
self._stderr_file = None
222+
207223
self._process = None
208224
self._stdout_stream = None
209225
self._stderr_stream = None
@@ -257,10 +273,6 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
257273
if not self._process or not self._stdout_stream:
258274
raise CLIConnectionError("Not connected")
259275

260-
# Safety constants
261-
max_stderr_size = 10 * 1024 * 1024 # 10MB
262-
stderr_timeout = 30.0 # 30 seconds
263-
264276
json_buffer = ""
265277

266278
# Process stdout messages first
@@ -318,36 +330,19 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
318330
# Client disconnected - still need to clean up
319331
pass
320332

321-
# Process stderr with safety limits
322-
stderr_lines = []
323-
stderr_size = 0
324-
325-
if self._stderr_stream:
333+
# Read stderr from temp file (keep only last N lines for memory efficiency)
334+
stderr_lines: deque[str] = deque(maxlen=100) # Keep last 100 lines
335+
if self._stderr_file:
326336
try:
327-
# Use timeout to prevent hanging
328-
with anyio.fail_after(stderr_timeout):
329-
async for line in self._stderr_stream:
330-
line_text = line.strip()
331-
line_size = len(line_text)
332-
333-
# Enforce memory limit
334-
if stderr_size + line_size > max_stderr_size:
335-
stderr_lines.append(
336-
f"[stderr truncated after {stderr_size} bytes]"
337-
)
338-
# Drain rest of stream without storing
339-
async for _ in self._stderr_stream:
340-
pass
341-
break
342-
337+
# Flush any pending writes
338+
self._stderr_file.flush()
339+
# Read from the beginning
340+
self._stderr_file.seek(0)
341+
for line in self._stderr_file:
342+
line_text = line.strip()
343+
if line_text:
343344
stderr_lines.append(line_text)
344-
stderr_size += line_size
345-
346-
except TimeoutError:
347-
stderr_lines.append(
348-
f"[stderr collection timed out after {stderr_timeout}s]"
349-
)
350-
except anyio.ClosedResourceError:
345+
except Exception:
351346
pass
352347

353348
# Check process completion and handle errors
@@ -356,7 +351,13 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
356351
except Exception:
357352
returncode = -1
358353

359-
stderr_output = "\n".join(stderr_lines) if stderr_lines else ""
354+
# Convert deque to string for error reporting
355+
stderr_output = "\n".join(list(stderr_lines)) if stderr_lines else ""
356+
if len(stderr_lines) == stderr_lines.maxlen:
357+
stderr_output = (
358+
f"[stderr truncated, showing last {stderr_lines.maxlen} lines]\n"
359+
+ stderr_output
360+
)
360361

361362
# Use exit code for error detection, not string matching
362363
if returncode is not None and returncode != 0:

0 commit comments

Comments
 (0)