Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/lib/components/chat/ChatWindow.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
providerOverride && providerOverride !== "auto" && !currentModel.isRouter
);

// Always allow common text-like files; add images only when model is multimodal
// Always allow common text-like files; add images when multimodal; add declared file types
import { TEXT_MIME_ALLOWLIST, IMAGE_MIME_ALLOWLIST_DEFAULT } from "$lib/constants/mime";

let activeMimeTypes = $derived(
Expand All @@ -319,6 +319,7 @@
...(modelIsMultimodal
? (currentModel.multimodalAcceptedMimetypes ?? [...IMAGE_MIME_ALLOWLIST_DEFAULT])
: []),
...(currentModel.acceptedFileMimetypes ?? []),
])
)
);
Expand Down
1 change: 1 addition & 0 deletions src/lib/server/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export type GETModelsResponse = Array<{
preprompt?: string;
multimodal: boolean;
multimodalAcceptedMimetypes?: string[];
acceptedFileMimetypes?: string[];
supportsTools?: boolean;
unlisted: boolean;
hasInferenceAPI: boolean;
Expand Down
7 changes: 6 additions & 1 deletion src/lib/server/endpoints/openai/endpointOai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,12 @@ export async function endpointOai(
}) => {
// Format messages for the chat API, handling multimodal content if supported
let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] =
await prepareMessagesWithFiles(messages, imageProcessor, isMultimodal ?? model.multimodal);
await prepareMessagesWithFiles(
messages,
imageProcessor,
isMultimodal ?? model.multimodal,
model.acceptedFileMimetypes
);

// Normalize preprompt and handle empty values
const normalizedPreprompt = typeof preprompt === "string" ? preprompt.trim() : "";
Expand Down
2 changes: 2 additions & 0 deletions src/lib/server/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ const modelConfig = z.object({
.optional(),
multimodal: z.boolean().default(false),
multimodalAcceptedMimetypes: z.array(z.string()).optional(),
/** MIME types the model accepts as file attachments (e.g. ["application/pdf", "image/*"]) */
acceptedFileMimetypes: z.array(z.string()).optional(),
// Aggregated tool-calling capability across providers (HF router)
supportsTools: z.boolean().default(false),
unlisted: z.boolean().default(false),
Expand Down
3 changes: 2 additions & 1 deletion src/lib/server/textGeneration/mcp/runMcpFlow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ export async function* runMcpFlow({
let messagesOpenAI: ChatCompletionMessageParam[] = await prepareMessagesWithFiles(
messages,
imageProcessor,
mmEnabled
mmEnabled,
model.acceptedFileMimetypes

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use target model file allowlist in MCP message prep

After resolveRouterTarget picks targetModel, the MCP request is built against that routed model, but prepareMessagesWithFiles still receives model.acceptedFileMimetypes from the original model. In routed conversations where these differ, file parts are prepared with the wrong MIME policy: PDFs can be omitted when the routed model supports them, or sent when the routed model does not, which can produce incorrect behavior or upstream 4xx errors during tool-calling flows.

Useful? React with 👍 / 👎.

);
const toolPreprompt = buildToolPreprompt(oaTools);
const prepromptPieces: string[] = [];
Expand Down
66 changes: 49 additions & 17 deletions src/lib/server/textGeneration/utils/prepareFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,51 @@ import type { OpenAI } from "openai";
import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime";
import type { makeImageProcessor } from "$lib/server/endpoints/images";

/** MIME types that OpenAI handles natively as file content parts */
const NATIVE_FILE_MIMETYPES = ["application/pdf"] as const;

function matchesMimeAllowlist(mime: string, allowlist: readonly string[]): boolean {
const normalizedMime = (mime || "").toLowerCase();
const [fileType, fileSubtype] = normalizedMime.split("/");
return allowlist.some((allowed) => {
const [type, subtype] = allowed.toLowerCase().split("/");
const typeOk = type === "*" || type === fileType;
const subOk = subtype === "*" || subtype === fileSubtype;
return typeOk && subOk;
});
}

/**
* Prepare chat messages for OpenAI-compatible multimodal payloads.
* - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled.
* - Sends PDFs as native file content parts when the model accepts them.
* - Injects text-file content into the user message text.
* - Leaves messages untouched when no files or multimodal disabled.
*/
export async function prepareMessagesWithFiles(
messages: EndpointMessage[],
imageProcessor: ReturnType<typeof makeImageProcessor>,
isMultimodal: boolean
isMultimodal: boolean,
acceptedFileMimetypes?: string[]
): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]> {
return Promise.all(
messages.map(async (message) => {
if (message.from === "user" && message.files && message.files.length > 0) {
const { imageParts, textContent } = await prepareFiles(
const { imageParts, fileParts, textContent } = await prepareFiles(
imageProcessor,
message.files,
isMultimodal
isMultimodal,
acceptedFileMimetypes
);

let messageText = message.content;
if (textContent.length > 0) {
messageText = textContent + "\n\n" + message.content;
}

if (imageParts.length > 0 && isMultimodal) {
const parts = [{ type: "text" as const, text: messageText }, ...imageParts];
const multimodalParts = [...imageParts, ...fileParts];
if (multimodalParts.length > 0) {
const parts = [{ type: "text" as const, text: messageText }, ...multimodalParts];
return { role: message.from, content: parts };
}

Expand All @@ -44,22 +62,25 @@ export async function prepareMessagesWithFiles(
async function prepareFiles(
imageProcessor: ReturnType<typeof makeImageProcessor>,
files: MessageFile[],
isMultimodal: boolean
isMultimodal: boolean,
acceptedFileMimetypes?: string[]
): Promise<{
imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
fileParts: OpenAI.Chat.Completions.ChatCompletionContentPart.File[];
textContent: string;
}> {
const imageFiles = files.filter((file) => file.mime.startsWith("image/"));
const textFiles = files.filter((file) => {
const mime = (file.mime || "").toLowerCase();
const [fileType, fileSubtype] = mime.split("/");
return TEXT_MIME_ALLOWLIST.some((allowed) => {
const [type, subtype] = allowed.toLowerCase().split("/");
const typeOk = type === "*" || type === fileType;
const subOk = subtype === "*" || subtype === fileSubtype;
return typeOk && subOk;
});
});
const textFiles = files.filter((file) => matchesMimeAllowlist(file.mime, TEXT_MIME_ALLOWLIST));

// Files that the model accepts natively (e.g. PDFs via OpenAI's file content part)
const nativeFiles = files.filter(
(file) =>
!file.mime.startsWith("image/") &&
!matchesMimeAllowlist(file.mime, TEXT_MIME_ALLOWLIST) &&
acceptedFileMimetypes &&
matchesMimeAllowlist(file.mime, acceptedFileMimetypes) &&
matchesMimeAllowlist(file.mime, NATIVE_FILE_MIMETYPES)
);

let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = [];
if (isMultimodal && imageFiles.length > 0) {
Expand All @@ -73,6 +94,17 @@ async function prepareFiles(
}));
}

// Send natively-supported files as OpenAI file content parts
const fileParts: OpenAI.Chat.Completions.ChatCompletionContentPart.File[] = nativeFiles.map(
(file) => ({
type: "file" as const,
file: {
filename: file.name,
file_data: `data:${file.mime};base64,${file.value}`,
},
})
);

let textContent = "";
if (textFiles.length > 0) {
const textParts = await Promise.all(
Expand All @@ -84,5 +116,5 @@ async function prepareFiles(
textContent = textParts.join("\n\n");
}

return { imageParts, textContent };
return { imageParts, fileParts, textContent };
}
1 change: 1 addition & 0 deletions src/lib/types/Model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export type Model = Pick<
| "preprompt"
| "multimodal"
| "multimodalAcceptedMimetypes"
| "acceptedFileMimetypes"
| "unlisted"
| "hasInferenceAPI"
| "providers"
Expand Down
1 change: 1 addition & 0 deletions src/routes/api/v2/models/+server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export const GET: RequestHandler = async () => {
preprompt: model.preprompt,
multimodal: model.multimodal,
multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes,
acceptedFileMimetypes: model.acceptedFileMimetypes,
supportsTools: (model as unknown as { supportsTools?: boolean }).supportsTools ?? false,
unlisted: model.unlisted,
hasInferenceAPI: model.hasInferenceAPI,
Expand Down