Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/lib/server/textGeneration/mcp/runMcpFlow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -696,15 +696,23 @@ export async function* runMcpFlow({
if (event.type === "update") {
yield event.update;
} else {
messagesOpenAI = [
...messagesOpenAI,
const followupMessages: ChatCompletionMessageParam[] = [
assistantToolMessage,
...(event.summary.toolMessages ?? []),
];
// Inject tool-returned images as a user message so the LLM can see them
const toolImageCount = event.summary.toolImages?.length ?? 0;
if (mmEnabled && toolImageCount > 0) {
followupMessages.push({
role: "user",
content: event.summary.toolImages,
});
}
messagesOpenAI = [...messagesOpenAI, ...followupMessages];
toolMsgCount = event.summary.toolMessages?.length ?? 0;
toolRunCount = event.summary.toolRuns?.length ?? 0;
logger.info(
{ toolMsgCount, toolRunCount },
{ toolMsgCount, toolRunCount, toolImageCount },
"[mcp] tools executed; continuing loop for follow-up completion"
);
}
Expand Down
29 changes: 27 additions & 2 deletions src/lib/server/textGeneration/mcp/toolInvocation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ export type ToolRun = {
output: string;
};

export type ToolImagePart = {
type: "image_url";
image_url: { url: string; detail: "auto" };
};

export interface NormalizedToolCall {
id: string;
name: string;
Expand All @@ -47,6 +52,7 @@ export interface ExecuteToolCallsParams {
export interface ToolCallExecutionResult {
toolMessages: ChatCompletionMessageParam[];
toolRuns: ToolRun[];
toolImages: ToolImagePart[];
finalAnswer?: { text: string; interrupted: boolean };
}

Expand All @@ -64,6 +70,17 @@ const serverMap = (servers: McpServerConfig[]): Map<string, McpServerConfig> =>
return map;
};

function toToolImagePart(block: unknown): ToolImagePart | undefined {
if (!block || typeof block !== "object") return undefined;
const obj = block as Record<string, unknown>;
if (obj.type !== "image" || typeof obj.data !== "string" || typeof obj.mimeType !== "string")
return undefined;
return {
type: "image_url",
image_url: { url: `data:${obj.mimeType};base64,${obj.data}`, detail: "auto" },

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Bound MCP image payloads before adding data URLs

This constructs image_url parts from raw MCP image blocks without any size or mime normalization, so a tool that returns a large/unsupported image (for example a full-resolution screenshot) will be forwarded verbatim and can cause the follow-up chat.completions.create call to fail on payload/image validation. In the same flow, user-uploaded images are constrained via makeImageProcessor (maxSizeInMB, width/height), so tool-returned images need equivalent checks or preprocessing before being appended to toolImages.

Useful? React with 👍 / 👎.

};
}

export async function* executeToolCalls({
calls,
mapping,
Expand All @@ -78,6 +95,7 @@ export async function* executeToolCalls({
const effectiveTimeoutMs = toolTimeoutMs ?? getMcpToolTimeoutMs();
const toolMessages: ChatCompletionMessageParam[] = [];
const toolRuns: ToolRun[] = [];
const toolImages: ToolImagePart[] = [];
const serverLookup = serverMap(servers);
// Pre-emit call + ETA updates and prepare tasks
type TaskResult = {
Expand Down Expand Up @@ -335,7 +353,14 @@ export async function* executeToolCalls({
const name = prepared[r.index].call.name;
const id = prepared[r.index].call.id;
if (!r.error) {
const output = r.output ?? "";
let output = r.output ?? "";
// Extract any image content blocks returned by the MCP tool
const imageParts = (r.blocks ?? []).map(toToolImagePart).filter(Boolean) as ToolImagePart[];
toolImages.push(...imageParts);
// If output is empty but images were returned, provide placeholder text
if (output === "" && imageParts.length > 0) {
output = "Tool returned image(s).";
}
toolRuns.push({ name, parameters: r.paramsClean, output });
// For the LLM follow-up call, we keep only the textual output
toolMessages.push({ role: "tool", tool_call_id: id, content: output });
Expand All @@ -345,5 +370,5 @@ export async function* executeToolCalls({
}
}

yield { type: "complete", summary: { toolMessages, toolRuns } };
yield { type: "complete", summary: { toolMessages, toolRuns, toolImages } };
}
Loading