Skip to content

Commit 7e2ab2a

Browse files
committed
feat: Improves file upload handling with modality support
Refactors file upload logic to handle modality support. It now dynamically filters files based on the capabilities of the active model (vision, audio) and displays a detailed error message when incompatible files are selected, preventing unexpected behavior and improving user experience. - Updates file input to dynamically accept file types based on model capabilities. - Introduces an alert dialog to inform users about unsupported file types and reasons, with specific messages for both generally unsupported files and modality-related issues. - Modifies PDF processing to ensure PDF files are processed as text for non-vision models and images for vision models, adapting the setting when necessary.
1 parent 47ec32d commit 7e2ab2a

File tree

6 files changed

+343
-32
lines changed

6 files changed

+343
-32
lines changed

tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActionButtons.svelte

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
isRecording = false,
2626
class: className = ''
2727
}: Props = $props();
28+
29+
const fileUploadDisabled = disabled || isLoading;
30+
const fileUploadTooltipText = !supportsVision()
31+
? 'Text files and PDFs supported. Images, audio, and video require vision models.'
32+
: 'Attach files';
2833
</script>
2934

3035
<div class="flex items-center justify-between gap-1 {className}">
@@ -33,21 +38,17 @@
3338
<Tooltip.Trigger>
3439
<Button
3540
type="button"
36-
class="text-muted-foreground bg-transparent hover:bg-foreground/10 hover:text-foreground h-8 w-8 rounded-full p-0 {!supportsVision()
37-
? 'opacity-50 cursor-not-allowed'
38-
: ''}"
39-
disabled={disabled || isLoading || !supportsVision()}
41+
class="text-muted-foreground bg-transparent hover:bg-foreground/10 hover:text-foreground h-8 w-8 rounded-full p-0"
42+
disabled={fileUploadDisabled}
4043
onclick={onFileUpload}
4144
>
4245
<span class="sr-only">Attach files</span>
4346
<Paperclip class="h-4 w-4" />
4447
</Button>
4548
</Tooltip.Trigger>
46-
{#if !supportsVision()}
47-
<Tooltip.Content>
48-
<p>Current model does not support vision</p>
49-
</Tooltip.Content>
50-
{/if}
49+
<Tooltip.Content>
50+
<p>{fileUploadTooltipText}</p>
51+
</Tooltip.Content>
5152
</Tooltip.Root>
5253
</div>
5354

tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<script lang="ts">
22
import { ALL_SUPPORTED_EXTENSIONS, ALL_SUPPORTED_MIME_TYPES } from '$lib/constants/supported-file-types';
3+
import { generateModalityAwareAcceptString } from '$lib/utils/modality-file-validation';
34
45
interface Props {
56
accept?: string;
@@ -15,12 +16,15 @@
1516
].join(',');
1617
1718
let {
18-
accept = defaultAccept,
19+
accept,
1920
multiple = true,
2021
onFileSelect,
2122
class: className = ''
2223
}: Props = $props();
2324
25+
// Use modality-aware accept string by default, but allow override
26+
const finalAccept = $derived(accept ?? generateModalityAwareAcceptString());
27+
2428
let fileInputElement: HTMLInputElement | undefined;
2529
2630
export function click() {
@@ -39,7 +43,7 @@
3943
bind:this={fileInputElement}
4044
type="file"
4145
{multiple}
42-
{accept}
46+
accept={finalAccept}
4347
onchange={handleFileSelect}
4448
class="hidden {className}"
4549
/>

tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte

Lines changed: 103 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import { processFilesToChatUploaded } from '$lib/utils/process-uploaded-files';
44
import { serverStore } from '$lib/stores/server.svelte';
55
import { isFileTypeSupported } from '$lib/constants/supported-file-types';
6+
import { filterFilesByModalities, generateModalityErrorMessage } from '$lib/utils/modality-file-validation';
67
import { ChatForm, ChatScreenHeader, ChatMessages, ServerInfo } from '$lib/components/app';
78
import {
89
activeMessages,
@@ -17,6 +18,7 @@
1718
import { AUTO_SCROLL_THRESHOLD } from '$lib/constants/auto-scroll';
1819
import { navigating } from '$app/state';
1920
import ChatScreenDragOverlay from './ChatScreenDragOverlay.svelte';
21+
import * as AlertDialog from '$lib/components/ui/alert-dialog';
2022
2123
let { showCenteredEmpty = false } = $props();
2224
let chatScrollContainer: HTMLDivElement | undefined = $state();
@@ -25,6 +27,20 @@
2527
let uploadedFiles = $state<ChatUploadedFile[]>([]);
2628
let isDragOver = $state(false);
2729
let dragCounter = $state(0);
30+
31+
// Alert Dialog state for file upload errors
32+
let showFileErrorDialog = $state(false);
33+
let fileErrorData = $state<{
34+
generallyUnsupported: File[];
35+
modalityUnsupported: File[];
36+
modalityReasons: Record<string, string>;
37+
supportedTypes: string[];
38+
}>({
39+
generallyUnsupported: [],
40+
modalityUnsupported: [],
41+
modalityReasons: {},
42+
supportedTypes: []
43+
});
2844
2945
const isEmpty = $derived(
3046
showCenteredEmpty && !activeConversation() && activeMessages().length === 0 && !isLoading()
@@ -117,24 +133,38 @@
117133
}
118134
119135
async function processFiles(files: File[]) {
120-
const supportedFiles: File[] = [];
121-
const unsupportedFiles: File[] = [];
136+
// First filter by general file type support
137+
const generallySupported: File[] = [];
138+
const generallyUnsupported: File[] = [];
122139
123140
for (const file of files) {
124141
if (isFileTypeSupported(file.name, file.type)) {
125-
supportedFiles.push(file);
142+
generallySupported.push(file);
126143
} else {
127-
unsupportedFiles.push(file);
144+
generallyUnsupported.push(file);
128145
}
129146
}
130147
131-
if (unsupportedFiles.length > 0) {
132-
const fileNames = unsupportedFiles.map(f => f.name).join(', ');
133-
const message = unsupportedFiles.length === 1
134-
? `The file "${fileNames}" is not supported. Please upload images (JPG, PNG, GIF, WebP, SVG), audio files (MP3, WAV), PDFs, or text files.`
135-
: `The following files are not supported: ${fileNames}. Please upload images (JPG, PNG, GIF, WebP, SVG), audio files (MP3, WAV), PDFs, or text files.`;
148+
// Then filter by model modalities
149+
const { supportedFiles, unsupportedFiles, modalityReasons } = filterFilesByModalities(generallySupported);
150+
151+
// Combine all unsupported files
152+
const allUnsupportedFiles = [...generallyUnsupported, ...unsupportedFiles];
153+
154+
if (allUnsupportedFiles.length > 0) {
155+
// Determine supported types for current model
156+
const supportedTypes: string[] = ['text files', 'PDFs'];
157+
if (supportsVision()) supportedTypes.push('images');
158+
if (supportsAudio()) supportedTypes.push('audio files');
136159
137-
alert(message);
160+
// Structure error data for better presentation
161+
fileErrorData = {
162+
generallyUnsupported,
163+
modalityUnsupported: unsupportedFiles,
164+
modalityReasons,
165+
supportedTypes
166+
};
167+
showFileErrorDialog = true;
138168
}
139169
140170
if (supportedFiles.length > 0) {
@@ -242,6 +272,69 @@
242272
</div>
243273
{/if}
244274

275+
<!-- File Upload Error Alert Dialog -->
276+
<AlertDialog.Root bind:open={showFileErrorDialog}>
277+
<AlertDialog.Portal>
278+
<AlertDialog.Overlay />
279+
<AlertDialog.Content class="max-w-md">
280+
<AlertDialog.Header>
281+
<AlertDialog.Title>File Upload Error</AlertDialog.Title>
282+
<AlertDialog.Description class="text-sm text-muted-foreground">
283+
Some files cannot be uploaded with the current model.
284+
</AlertDialog.Description>
285+
</AlertDialog.Header>
286+
287+
<div class="space-y-4">
288+
<!-- Generally unsupported files -->
289+
{#if fileErrorData.generallyUnsupported.length > 0}
290+
<div class="space-y-2">
291+
<h4 class="text-sm font-medium text-destructive">Unsupported File Types</h4>
292+
<div class="space-y-1">
293+
{#each fileErrorData.generallyUnsupported as file}
294+
<div class="rounded-md bg-destructive/10 px-3 py-2">
295+
<p class="text-sm font-mono text-destructive break-all">{file.name}</p>
296+
<p class="text-xs text-muted-foreground mt-1">File type not supported</p>
297+
</div>
298+
{/each}
299+
</div>
300+
</div>
301+
{/if}
302+
303+
<!-- Modality-restricted files -->
304+
{#if fileErrorData.modalityUnsupported.length > 0}
305+
<div class="space-y-2">
306+
<h4 class="text-sm font-medium text-destructive">Model Compatibility Issues</h4>
307+
<div class="space-y-1">
308+
{#each fileErrorData.modalityUnsupported as file}
309+
<div class="rounded-md bg-destructive/10 px-3 py-2">
310+
<p class="text-sm font-mono text-destructive break-all">{file.name}</p>
311+
<p class="text-xs text-muted-foreground mt-1">
312+
{fileErrorData.modalityReasons[file.name] || 'Not supported by current model'}
313+
</p>
314+
</div>
315+
{/each}
316+
</div>
317+
</div>
318+
{/if}
319+
320+
<!-- Supported file types -->
321+
<div class="rounded-md bg-muted/50 p-3">
322+
<h4 class="text-sm font-medium mb-2">This model supports:</h4>
323+
<p class="text-sm text-muted-foreground">
324+
{fileErrorData.supportedTypes.join(', ')}
325+
</p>
326+
</div>
327+
</div>
328+
329+
<AlertDialog.Footer>
330+
<AlertDialog.Action onclick={() => showFileErrorDialog = false}>
331+
Got it
332+
</AlertDialog.Action>
333+
</AlertDialog.Footer>
334+
</AlertDialog.Content>
335+
</AlertDialog.Portal>
336+
</AlertDialog.Root>
337+
245338
<style>
246339
.conversation-chat-form {
247340
position: relative;

tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
updateMultipleConfig,
1313
resetConfig
1414
} from '$lib/stores/settings.svelte';
15+
import { supportsVision } from '$lib/stores/server.svelte';
1516
1617
interface Props {
1718
onOpenChange?: (open: boolean) => void;
@@ -205,10 +206,12 @@
205206
</p>
206207
{/if}
207208
{:else if field.type === 'checkbox'}
209+
{@const isDisabled = field.key === 'pdfAsImage' && !supportsVision()}
208210
<div class="flex items-start space-x-3">
209211
<Checkbox
210212
id={field.key}
211213
checked={Boolean(localConfig[field.key])}
214+
disabled={isDisabled}
212215
onCheckedChange={(checked) =>
213216
(localConfig[field.key] = checked)}
214217
class="mt-1"
@@ -217,7 +220,7 @@
217220
<div class="space-y-1">
218221
<label
219222
for={field.key}
220-
class="cursor-pointer text-sm font-medium leading-none"
223+
class="cursor-pointer text-sm font-medium leading-none {isDisabled ? 'text-muted-foreground' : ''}"
221224
>
222225
{field.label}
223226
</label>
@@ -226,6 +229,10 @@
226229
<p class="text-muted-foreground text-xs">
227230
{field.help}
228231
</p>
232+
{:else if field.key === 'pdfAsImage' && !supportsVision()}
233+
<p class="text-muted-foreground text-xs">
234+
PDF-to-image processing requires a vision-capable model. PDFs will be processed as text.
235+
</p>
229236
{/if}
230237
</div>
231238
</div>

tools/server/webui/src/lib/utils/convert-files-to-extra.ts

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import { convertPDFToImage, convertPDFToText, isPdfMimeType } from "./pdf-processing";
22
import { isSvgMimeType, svgBase64UrlToPngDataURL } from "./svg-to-png";
33
import { isWebpMimeType, webpBase64UrlToPngDataURL } from "./webp-to-png";
4-
import { config } from '$lib/stores/settings.svelte';
5-
import { isLikelyTextFile, readFileAsText } from "./text-files";
6-
import {
7-
FileTypeCategory,
8-
AudioMimeType,
9-
getFileTypeCategory
10-
} from '$lib/constants/supported-file-types';
4+
import { config, settingsStore } from '$lib/stores/settings.svelte';
5+
import { supportsVision } from '$lib/stores/server.svelte';
6+
import { FileTypeCategory, getFileTypeCategory } from '$lib/constants/supported-file-types';
7+
import { readFileAsText, isLikelyTextFile } from './text-files';
8+
import { toast } from 'svelte-sonner';
119

1210
function readFileAsBase64(file: File): Promise<string> {
1311
return new Promise((resolve, reject) => {
@@ -87,10 +85,28 @@ export async function parseFilesToMessageExtras(
8785
// Always get base64 data for preview functionality
8886
const base64Data = await readFileAsBase64(file.file);
8987
const currentConfig = config();
90-
const shouldProcessAsImages = Boolean(currentConfig.pdfAsImage);
88+
const hasVisionSupport = supportsVision();
89+
90+
// Force PDF-to-text for non-vision models
91+
let shouldProcessAsImages = Boolean(currentConfig.pdfAsImage) && hasVisionSupport;
92+
93+
// If user had pdfAsImage enabled but model doesn't support vision, update setting and notify
94+
if (currentConfig.pdfAsImage && !hasVisionSupport) {
95+
console.log('Non-vision model detected: forcing PDF-to-text mode and updating settings');
96+
97+
// Update the setting in localStorage
98+
settingsStore.updateConfig('pdfAsImage', false);
99+
100+
// Show toast notification to user
101+
toast.warning('PDF setting changed: Non-vision model detected, PDFs will be processed as text instead of images.', {
102+
duration: 5000
103+
});
104+
105+
shouldProcessAsImages = false;
106+
}
91107

92108
if (shouldProcessAsImages) {
93-
// Process PDF as images
109+
// Process PDF as images (only for vision models)
94110
try {
95111
const images = await convertPDFToImage(file.file);
96112

@@ -117,7 +133,7 @@ export async function parseFilesToMessageExtras(
117133
});
118134
}
119135
} else {
120-
// Process PDF as text (default)
136+
// Process PDF as text (default or forced for non-vision models)
121137
const content = await convertPDFToText(file.file);
122138

123139
extras.push({

0 commit comments

Comments
 (0)