Skip to content

Commit e1b6555

Browse files
committed
🤖 fix: prevent mux-server crash on task timeout + undici termination
When a tool call (notably `task`) times out waiting for `agent_report`, the parent stream can hit an undici `BodyTimeoutError` surfaced as `TypeError: terminated`. This could bubble into an unhandled rejection or uncaught exception, taking down mux-server. Fixes: 1. Attach no-op .catch() handlers to streamText() side promises (usage, steps, providerMetadata, etc.) to prevent unhandled rejections when stream is aborted/terminated 2. Treat abort-induced errors as cancellation, not stream errors - if stream was aborted/stopping, skip error state + error partial write 3. Make tool-error formatting non-throwing with safe JSON.stringify 4. Make `task` tool timeouts return {status:"running"} instead of throwing (consistent with task_await behavior) 5. Improve error categorization for undici termination/timeouts to map to "network" instead of "unknown" 6. Add safety net process.on("uncaughtException"/"unhandledRejection") handlers in CLI server entrypoint that suppress benign network errors and keep the server running
1 parent d3928b5 commit e1b6555

File tree

3 files changed

+158
-23
lines changed

3 files changed

+158
-23
lines changed

src/cli/server.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,72 @@ import {
1919
import * as os from "os";
2020
import { getParseOptions } from "./argv";
2121

22+
// -----------------------------------------------------------------------------
23+
// Safety net: prevent uncaught exceptions / unhandled rejections from crashing
24+
// the server process. This is a fallback; the real fixes are in StreamManager
25+
// and tool code, but this ensures resilience against unknown edge cases.
26+
// -----------------------------------------------------------------------------
27+
const isBenignNetworkError = (err: unknown): boolean => {
28+
// TypeError: terminated - undici stream termination
29+
if (err instanceof TypeError && err.message === "terminated") return true;
30+
31+
// UND_ERR_BODY_TIMEOUT - undici body timeout
32+
if (
33+
typeof err === "object" &&
34+
err !== null &&
35+
"code" in err &&
36+
(err as { code?: string }).code === "UND_ERR_BODY_TIMEOUT"
37+
)
38+
return true;
39+
40+
// Check nested cause for UND_ERR_BODY_TIMEOUT
41+
if (
42+
typeof err === "object" &&
43+
err !== null &&
44+
"cause" in err &&
45+
typeof (err as { cause?: unknown }).cause === "object" &&
46+
(err as { cause?: { code?: string } }).cause !== null &&
47+
(err as { cause: { code?: string } }).cause.code === "UND_ERR_BODY_TIMEOUT"
48+
)
49+
return true;
50+
51+
// AbortError - expected when streams are cancelled
52+
if (err instanceof Error && err.name === "AbortError") return true;
53+
54+
return false;
55+
};
56+
57+
const formatErrorWithCause = (err: unknown): string => {
58+
if (!(err instanceof Error)) return String(err);
59+
let msg = err.stack ?? err.message;
60+
if (err.cause) {
61+
msg += `\n [cause] ${formatErrorWithCause(err.cause)}`;
62+
}
63+
return msg;
64+
};
65+
66+
process.on("uncaughtException", (err) => {
67+
if (isBenignNetworkError(err)) {
68+
console.warn("[mux-server] Suppressed benign uncaughtException:", err.message ?? err);
69+
return;
70+
}
71+
console.error("[mux-server] Uncaught exception (server continuing):", formatErrorWithCause(err));
72+
// Do NOT exit - keep server running
73+
});
74+
75+
process.on("unhandledRejection", (reason) => {
76+
if (isBenignNetworkError(reason)) {
77+
const msg = reason instanceof Error ? reason.message : String(reason);
78+
console.warn("[mux-server] Suppressed benign unhandledRejection:", msg);
79+
return;
80+
}
81+
console.error(
82+
"[mux-server] Unhandled rejection (server continuing):",
83+
formatErrorWithCause(reason)
84+
);
85+
// Do NOT exit - keep server running
86+
});
87+
2288
const program = new Command();
2389
program
2490
.name("mux server")

src/node/services/streamManager.ts

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,22 @@ export class StreamManager extends EventEmitter {
771771
throw error;
772772
}
773773

774+
// Attach no-op catch handlers to side promises that we may not always await.
775+
// This prevents unhandled rejection crashes if the stream is aborted/terminated
776+
// and these promises reject (e.g., undici TypeError: terminated).
777+
const suppressUnhandledRejection = (p: unknown): void => {
778+
if (p && typeof (p as Promise<unknown>).catch === "function") {
779+
void (p as Promise<unknown>).catch(() => undefined);
780+
}
781+
};
782+
suppressUnhandledRejection(streamResult.usage);
783+
suppressUnhandledRejection(streamResult.totalUsage);
784+
suppressUnhandledRejection(streamResult.steps);
785+
suppressUnhandledRejection(streamResult.providerMetadata);
786+
suppressUnhandledRejection(streamResult.text);
787+
suppressUnhandledRejection(streamResult.finishReason);
788+
suppressUnhandledRejection(streamResult.warnings);
789+
774790
const streamInfo: WorkspaceStreamInfo = {
775791
state: StreamState.STARTING,
776792
streamResult,
@@ -1162,15 +1178,20 @@ export class StreamManager extends EventEmitter {
11621178
error: toolErrorPart.error,
11631179
});
11641180

1165-
// Format error output
1181+
// Format error output - use safe stringify to avoid secondary exceptions
1182+
// (e.g., circular refs, BigInt) turning a handled tool failure into a crash
1183+
const formatToolError = (err: unknown): string => {
1184+
if (typeof err === "string") return err;
1185+
if (err instanceof Error) return err.message;
1186+
try {
1187+
return JSON.stringify(err);
1188+
} catch {
1189+
return String(err) || "[unserializable error]";
1190+
}
1191+
};
11661192
const errorOutput = {
11671193
success: false,
1168-
error:
1169-
typeof toolErrorPart.error === "string"
1170-
? toolErrorPart.error
1171-
: toolErrorPart.error instanceof Error
1172-
? toolErrorPart.error.message
1173-
: JSON.stringify(toolErrorPart.error),
1194+
error: formatToolError(toolErrorPart.error),
11741195
};
11751196

11761197
// Use shared completion logic (await to ensure partial is flushed before event)
@@ -1378,6 +1399,16 @@ export class StreamManager extends EventEmitter {
13781399
this.emit("stream-end", streamEndEvent);
13791400
}
13801401
} catch (error) {
1402+
// If stream was aborted or is stopping, treat termination errors as expected cancellation
1403+
// rather than fatal errors. This prevents TypeError: terminated (undici) from corrupting
1404+
// stream state or showing scary error messages when user simply cancelled the stream.
1405+
const isAbortedOrStopping =
1406+
streamInfo.abortController.signal.aborted || streamInfo.state === StreamState.STOPPING;
1407+
if (isAbortedOrStopping) {
1408+
log.debug("Stream ended after abort/stop (not an error):", { error });
1409+
return;
1410+
}
1411+
13811412
streamInfo.state = StreamState.ERROR;
13821413

13831414
// Log the actual error for debugging
@@ -1582,6 +1613,25 @@ export class StreamManager extends EventEmitter {
15821613
}
15831614
}
15841615

1616+
// Detect undici/fetch termination errors (e.g., TypeError: terminated, UND_ERR_BODY_TIMEOUT)
1617+
// These occur when the underlying HTTP connection is severed or times out
1618+
const isUndiciTermination =
1619+
(error instanceof TypeError && error.message === "terminated") ||
1620+
(typeof error === "object" &&
1621+
error !== null &&
1622+
"code" in error &&
1623+
(error as { code?: string }).code === "UND_ERR_BODY_TIMEOUT") ||
1624+
(typeof error === "object" &&
1625+
error !== null &&
1626+
"cause" in error &&
1627+
typeof (error as { cause?: unknown }).cause === "object" &&
1628+
(error as { cause?: { code?: string } }).cause !== null &&
1629+
(error as { cause: { code?: string } }).cause.code === "UND_ERR_BODY_TIMEOUT");
1630+
1631+
if (isUndiciTermination) {
1632+
return "network";
1633+
}
1634+
15851635
// Fall back to string matching for other errors
15861636
if (error instanceof Error) {
15871637
const message = error.message.toLowerCase();

src/node/services/tools/task.ts

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,42 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
8383
);
8484
}
8585

86-
const report = await taskService.waitForAgentReport(created.data.taskId, {
87-
abortSignal,
88-
requestingWorkspaceId: workspaceId,
89-
});
86+
try {
87+
const report = await taskService.waitForAgentReport(created.data.taskId, {
88+
abortSignal,
89+
requestingWorkspaceId: workspaceId,
90+
});
9091

91-
return parseToolResult(
92-
TaskToolResultSchema,
93-
{
94-
status: "completed" as const,
95-
taskId: created.data.taskId,
96-
reportMarkdown: report.reportMarkdown,
97-
title: report.title,
98-
agentId: requestedAgentId,
99-
agentType: requestedAgentId,
100-
},
101-
"task"
102-
);
92+
return parseToolResult(
93+
TaskToolResultSchema,
94+
{
95+
status: "completed" as const,
96+
taskId: created.data.taskId,
97+
reportMarkdown: report.reportMarkdown,
98+
title: report.title,
99+
agentId: requestedAgentId,
100+
agentType: requestedAgentId,
101+
},
102+
"task"
103+
);
104+
} catch (waitError) {
105+
// If wait timed out, return a valid result indicating task is still running
106+
// (consistent with task_await behavior). This prevents timeouts from becoming
107+
// tool execution errors that can destabilize the parent stream.
108+
const message = waitError instanceof Error ? waitError.message : String(waitError);
109+
if (/timed out waiting for agent_report/i.test(message)) {
110+
log.debug("Task wait timed out, returning running status", {
111+
taskId: created.data.taskId,
112+
});
113+
return parseToolResult(
114+
TaskToolResultSchema,
115+
{ status: "running" as const, taskId: created.data.taskId },
116+
"task"
117+
);
118+
}
119+
// Re-throw other errors (e.g., "Interrupted", "Task terminated")
120+
throw waitError;
121+
}
103122
},
104123
});
105124
};

0 commit comments

Comments
 (0)