11import { useState } from 'react' ;
22import { MessageExtra } from '../utils/types' ;
33import toast from 'react-hot-toast' ;
4+ import { useAppContext } from '../utils/app.context' ;
45
56// Interface describing the API returned by the hook
67export interface ChatExtraContextApi {
@@ -12,6 +13,7 @@ export interface ChatExtraContextApi {
1213}
1314
1415export function useChatExtraContext ( ) : ChatExtraContextApi {
16+ const { serverProps } = useAppContext ( ) ;
1517 const [ items , setItems ] = useState < MessageExtra [ ] > ( [ ] ) ;
1618
1719 const addItems = ( newItems : MessageExtra [ ] ) => {
@@ -34,38 +36,55 @@ export function useChatExtraContext(): ChatExtraContextApi {
3436 toast . error ( 'File is too large. Maximum size is 10MB.' ) ;
3537 break ;
3638 }
37- if ( mimeType . startsWith ( 'text/' ) ) {
39+
40+ if ( mimeType . startsWith ( 'image/' ) && mimeType !== 'image/svg+xml' ) {
41+ if ( ! serverProps ?. has_multimodal ) {
42+ toast . error ( 'Multimodal is not supported by this server or model.' ) ;
43+ break ;
44+ }
3845 const reader = new FileReader ( ) ;
3946 reader . onload = ( event ) => {
4047 if ( event . target ?. result ) {
4148 addItems ( [
4249 {
43- type : 'textFile ' ,
50+ type : 'imageFile ' ,
4451 name : file . name ,
45- content : event . target . result as string ,
52+ base64Url : event . target . result as string ,
4653 } ,
4754 ] ) ;
4855 }
4956 } ;
50- reader . readAsText ( file ) ;
51- } else if ( mimeType . startsWith ( 'image/' ) ) {
52- // TODO @ngxson : throw an error if the server does not support image input
57+ reader . readAsDataURL ( file ) ;
58+ } else if (
59+ mimeType . startsWith ( 'video/' ) ||
60+ mimeType . startsWith ( 'audio/' )
61+ ) {
62+ toast . error ( 'Video files are not supported yet.' ) ;
63+ break ;
64+ } else if ( mimeType . startsWith ( 'application/pdf' ) ) {
65+ toast . error ( 'PDF files are not supported yet.' ) ;
66+ break ;
67+ } else {
68+ // Because there can be many text file types (like code file), we will not check the mime type
69+ // and will just check if the file is not binary.
5370 const reader = new FileReader ( ) ;
5471 reader . onload = ( event ) => {
5572 if ( event . target ?. result ) {
73+ const content = event . target . result as string ;
74+ if ( ! isLikelyNotBinary ( content ) ) {
75+ toast . error ( 'File is binary. Please upload a text file.' ) ;
76+ return ;
77+ }
5678 addItems ( [
5779 {
58- type : 'imageFile ' ,
80+ type : 'textFile ' ,
5981 name : file . name ,
60- base64Url : event . target . result as string ,
82+ content ,
6183 } ,
6284 ] ) ;
6385 }
6486 } ;
65- reader . readAsDataURL ( file ) ;
66- } else {
67- // TODO @ngxson : support all other file formats like .pdf, .py, .bat, .c, etc
68- toast . error ( 'Unsupported file type.' ) ;
87+ reader . readAsText ( file ) ;
6988 }
7089 }
7190 } ;
@@ -78,3 +97,76 @@ export function useChatExtraContext(): ChatExtraContextApi {
7897 onFileAdded,
7998 } ;
8099}
100+
101+ // WARN: vibe code below
102+ // This code is a heuristic to determine if a string is likely not binary.
103+ export function isLikelyNotBinary ( str : string ) : boolean {
104+ const options = {
105+ prefixLength : 1024 * 10 , // Check the first 10KB of the string
106+ suspiciousCharThresholdRatio : 0.15 , // Allow up to 15% suspicious chars
107+ maxAbsoluteNullBytes : 2 ,
108+ } ;
109+
110+ if ( ! str ) {
111+ return true ; // Empty string is considered "not binary" or trivially text.
112+ }
113+
114+ const sampleLength = Math . min ( str . length , options . prefixLength ) ;
115+ if ( sampleLength === 0 ) {
116+ return true ; // Effectively an empty string after considering prefixLength.
117+ }
118+
119+ let suspiciousCharCount = 0 ;
120+ let nullByteCount = 0 ;
121+
122+ for ( let i = 0 ; i < sampleLength ; i ++ ) {
123+ const charCode = str . charCodeAt ( i ) ;
124+
125+ // 1. Check for Unicode Replacement Character (U+FFFD)
126+ // This is a strong indicator if the string was created from decoding bytes as UTF-8.
127+ if ( charCode === 0xfffd ) {
128+ suspiciousCharCount ++ ;
129+ continue ;
130+ }
131+
132+ // 2. Check for Null Bytes (U+0000)
133+ if ( charCode === 0x0000 ) {
134+ nullByteCount ++ ;
135+ // We also count nulls towards the general suspicious character count,
136+ // as they are less common in typical text files.
137+ suspiciousCharCount ++ ;
138+ continue ;
139+ }
140+
141+ // 3. Check for C0 Control Characters (U+0001 to U+001F)
142+ // Exclude common text control characters: TAB (9), LF (10), CR (13).
143+ // We can also be a bit lenient with BEL (7) and BS (8) which sometimes appear in logs.
144+ if ( charCode < 32 ) {
145+ if (
146+ charCode !== 9 && // TAB
147+ charCode !== 10 && // LF
148+ charCode !== 13 && // CR
149+ charCode !== 7 && // BEL (Bell) - sometimes in logs
150+ charCode !== 8 // BS (Backspace) - less common, but possible
151+ ) {
152+ suspiciousCharCount ++ ;
153+ }
154+ }
155+ // Characters from 32 (space) up to 126 (~) are printable ASCII.
156+ // Characters 127 (DEL) is a control character.
157+ // Characters >= 128 are extended ASCII / multi-byte Unicode.
158+ // If they resulted in U+FFFD, we caught it. Otherwise, they are valid
159+ // (though perhaps unusual) Unicode characters from JS's perspective.
160+ // The main concern is if those higher characters came from misinterpreting
161+ // a single-byte encoding as UTF-8, which again, U+FFFD would usually flag.
162+ }
163+
164+ // Check absolute null byte count
165+ if ( nullByteCount > options . maxAbsoluteNullBytes ) {
166+ return false ; // Too many null bytes is a strong binary indicator
167+ }
168+
169+ // Check ratio of suspicious characters
170+ const ratio = suspiciousCharCount / sampleLength ;
171+ return ratio <= options . suspiciousCharThresholdRatio ;
172+ }
0 commit comments