Skip to content

Commit 4cad2b6

Browse files
committed
feat: Adds support for audio file uploads
Allows users to upload and process audio files (mp3, wav) This commit introduces the ability to upload audio files, process them, and include them in chat messages. It reads audio files as base64 data and includes the data and format in the API payload.
1 parent 2e0f1d7 commit 4cad2b6

File tree

4 files changed

+73
-5
lines changed

4 files changed

+73
-5
lines changed

tools/server/webui/src/lib/services/chat.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ export class ChatService {
282282

283283
// Add image files
284284
const imageFiles = message.extra.filter((extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => extra.type === 'imageFile');
285+
285286
for (const image of imageFiles) {
286287
contentParts.push({
287288
type: 'image_url',
@@ -291,15 +292,30 @@ export class ChatService {
291292

292293
// Add text files as additional text content
293294
const textFiles = message.extra.filter((extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile => extra.type === 'textFile');
295+
294296
for (const textFile of textFiles) {
295297
contentParts.push({
296298
type: 'text',
297299
text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
298300
});
299301
}
300302

303+
// Add audio files
304+
const audioFiles = message.extra.filter((extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile => extra.type === 'audioFile');
305+
306+
for (const audio of audioFiles) {
307+
contentParts.push({
308+
type: 'input_audio',
309+
input_audio: {
310+
data: audio.base64Data,
311+
format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
312+
}
313+
});
314+
}
315+
301316
// Add PDF files as text content
302317
const pdfFiles = message.extra.filter((extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile => extra.type === 'pdfFile');
318+
303319
for (const pdfFile of pdfFiles) {
304320
if (pdfFile.processedAsImages && pdfFile.images) {
305321
// If PDF was processed as images, add each page as an image

tools/server/webui/src/lib/types/api.d.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
export interface ApiChatMessageContentPart {
2-
type: 'text' | 'image_url';
2+
type: 'text' | 'image_url' | 'input_audio';
33
text?: string;
44
image_url?: {
55
url: string;
66
};
7+
input_audio?: {
8+
data: string;
9+
format: 'wav' | 'mp3';
10+
};
711
}
812

913
export interface ApiChatMessageData {

tools/server/webui/src/lib/utils/convert-files-to-extra.ts

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,27 @@ import { isWebpMimeType, webpBase64UrlToPngDataURL } from "./webp-to-png";
44
import { config } from '$lib/stores/settings.svelte';
55
import { isLikelyTextFile, readFileAsText } from "./text-files";
66

7+
function readFileAsBase64(file: File): Promise<string> {
8+
return new Promise((resolve, reject) => {
9+
const reader = new FileReader();
10+
11+
reader.onload = () => {
12+
// Extract base64 data without the data URL prefix
13+
const dataUrl = reader.result as string;
14+
const base64 = dataUrl.split(',')[1];
15+
resolve(base64);
16+
};
17+
18+
reader.onerror = () => reject(reader.error);
19+
20+
reader.readAsDataURL(file);
21+
});
22+
}
23+
24+
function isAudioMimeType(mimeType: string): boolean {
25+
return mimeType === 'audio/mpeg' || mimeType === 'audio/wav' || mimeType === 'audio/mp3';
26+
}
27+
728
export async function parseFilesToMessageExtras(
829
files: ChatUploadedFile[]
930
): Promise<DatabaseMessageExtra[]> {
@@ -40,41 +61,64 @@ export async function parseFilesToMessageExtras(
4061
base64Url
4162
});
4263
}
64+
} else if (isAudioMimeType(file.type)) {
65+
// Process audio files (MP3 and WAV)
66+
try {
67+
const base64Data = await readFileAsBase64(file.file);
68+
69+
extras.push({
70+
type: 'audioFile',
71+
name: file.name,
72+
base64Data: base64Data,
73+
mimeType: file.type
74+
});
75+
} catch (error) {
76+
console.error(`Failed to process audio file ${file.name}:`, error);
77+
}
4378
} else if (isPdfMimeType(file.type)) {
4479
try {
80+
// Always get base64 data for preview functionality
81+
const base64Data = await readFileAsBase64(file.file);
4582
const currentConfig = config();
4683
const shouldProcessAsImages = Boolean(currentConfig.pdfAsImage);
4784

4885
if (shouldProcessAsImages) {
4986
// Process PDF as images
5087
try {
5188
const images = await convertPDFToImage(file.file);
89+
5290
extras.push({
5391
type: 'pdfFile',
5492
name: file.name,
5593
content: `PDF file with ${images.length} pages`,
5694
images: images,
57-
processedAsImages: true
95+
processedAsImages: true,
96+
base64Data: base64Data
5897
});
5998
} catch (imageError) {
6099
console.warn(`Failed to process PDF ${file.name} as images, falling back to text:`, imageError);
100+
61101
// Fallback to text processing
62102
const content = await convertPDFToText(file.file);
103+
63104
extras.push({
64105
type: 'pdfFile',
65106
name: file.name,
66107
content: content,
67-
processedAsImages: false
108+
processedAsImages: false,
109+
base64Data: base64Data
68110
});
69111
}
70112
} else {
71113
// Process PDF as text (default)
72114
const content = await convertPDFToText(file.file);
115+
73116
extras.push({
74117
type: 'pdfFile',
75118
name: file.name,
76119
content: content,
77-
processedAsImages: false
120+
processedAsImages: false,
121+
base64Data: base64Data
78122
});
79123
}
80124
} catch (error) {

tools/server/webui/src/lib/utils/process-uploaded-files.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,12 @@ export async function processFilesToChatUploaded(files: File[]): Promise<ChatUpl
6565
} else if (isPdfMimeType(file.type)) {
6666
// PDFs handled later when building extras; keep metadata only
6767
results.push(base);
68+
} else if (file.type.startsWith('audio/')) {
69+
// Generate preview URL for audio files
70+
const preview = await readFileAsDataURL(file);
71+
results.push({ ...base, preview });
6872
} else {
69-
// Other files: add as-is (audio, etc.)
73+
// Other files: add as-is
7074
results.push(base);
7175
}
7276
} catch (error) {

0 commit comments

Comments
 (0)