stricter upload file check, only allow image if server has mtmd

ngxson · ngxson · commit 9d076e475764 · 2025-05-08T01:44:06.000+02:00
diff --git a/tools/server/webui/src/components/ChatScreen.tsx b/tools/server/webui/src/components/ChatScreen.tsx
@@ -11,8 +11,6 @@ import {
   ArrowUpIcon,
   StopIcon,
   PaperClipIcon,
-  DocumentTextIcon,
-  XMarkIcon,
 } from '@heroicons/react/24/solid';
 import {
   ChatExtraContextApi,
@@ -249,9 +247,17 @@ export default function ChatScreen() {
       >
         {/* chat messages */}
         <div id="messages-list" className="grow">
-          <div className="mt-auto flex justify-center">
+          <div className="mt-auto flex flex-col items-center">
             {/* placeholder to shift the message to the bottom */}
-            {viewingChat ? '' : 'Send a message to start'}
+            {viewingChat ? (
+              ''
+            ) : (
+              <>
+                <ServerInfo />
+                <br />
+                Send a message to start
+              </>
+            )}
           </div>
           {[...messages, ...pendingMsgDisplay].map((msg) => (
             <ChatMessage
@@ -285,6 +291,23 @@ export default function ChatScreen() {
   );
 }
 
+function ServerInfo() {
+  const { serverProps } = useAppContext();
+  return (
+    <div className="card card-sm shadow-sm border-1 border-base-content/20 text-base-content/70 mb-6">
+      <div className="card-body">
+        <b>Server Info</b>
+        <p>
+          <b>Model</b>: {serverProps?.model_path?.split(/(\\|\/)/).pop()}
+          <br />
+          <b>Build</b>: {serverProps?.build_info}
+          <br />
+        </p>
+      </div>
+    </div>
+  );
+}
+
 function ChatInput({
   textarea,
   extraContext,
diff --git a/tools/server/webui/src/components/Header.tsx b/tools/server/webui/src/components/Header.tsx
@@ -4,7 +4,11 @@ import { useAppContext } from '../utils/app.context';
 import { classNames } from '../utils/misc';
 import daisyuiThemes from 'daisyui/theme/object';
 import { THEMES } from '../Config';
-import { Cog8ToothIcon, MoonIcon, Bars3Icon } from '@heroicons/react/24/outline';
+import {
+  Cog8ToothIcon,
+  MoonIcon,
+  Bars3Icon,
+} from '@heroicons/react/24/outline';
 
 export default function Header() {
   const [selectedTheme, setSelectedTheme] = useState(StorageUtils.getTheme());
diff --git a/tools/server/webui/src/components/Sidebar.tsx b/tools/server/webui/src/components/Sidebar.tsx
@@ -146,9 +146,12 @@ function ConversationItem({
       >
         {conv.name}
       </div>
-      <div className="dropdown dropdown-end h-5 opacity-0 group-hover:opacity-100">
+      <div className="dropdown dropdown-end h-5">
         <BtnWithTooltips
-          className="cursor-pointer block group-hover:block"
+          // on mobile, we always show the ellipsis icon
+          // on desktop, we only show it when the user hovers over the conversation item
+          // we use opacity instead of hidden to avoid layout shift
+          className="cursor-pointer opacity-100 md:opacity-0 group-hover:opacity-100"
           onClick={() => {}}
           tooltipsContent="More"
         >
diff --git a/tools/server/webui/src/components/useChatExtraContext.tsx b/tools/server/webui/src/components/useChatExtraContext.tsx
@@ -1,6 +1,7 @@
 import { useState } from 'react';
 import { MessageExtra } from '../utils/types';
 import toast from 'react-hot-toast';
+import { useAppContext } from '../utils/app.context';
 
 // Interface describing the API returned by the hook
 export interface ChatExtraContextApi {
@@ -12,6 +13,7 @@ export interface ChatExtraContextApi {
 }
 
 export function useChatExtraContext(): ChatExtraContextApi {
+  const { serverProps } = useAppContext();
   const [items, setItems] = useState<MessageExtra[]>([]);
 
   const addItems = (newItems: MessageExtra[]) => {
@@ -34,38 +36,55 @@ export function useChatExtraContext(): ChatExtraContextApi {
         toast.error('File is too large. Maximum size is 10MB.');
         break;
       }
-      if (mimeType.startsWith('text/')) {
+
+      if (mimeType.startsWith('image/') && mimeType !== 'image/svg+xml') {
+        if (!serverProps?.has_multimodal) {
+          toast.error('Multimodal is not supported by this server or model.');
+          break;
+        }
         const reader = new FileReader();
         reader.onload = (event) => {
           if (event.target?.result) {
             addItems([
               {
-                type: 'textFile',
+                type: 'imageFile',
                 name: file.name,
-                content: event.target.result as string,
+                base64Url: event.target.result as string,
               },
             ]);
           }
         };
-        reader.readAsText(file);
-      } else if (mimeType.startsWith('image/')) {
-        // TODO @ngxson : throw an error if the server does not support image input
+        reader.readAsDataURL(file);
+      } else if (
+        mimeType.startsWith('video/') ||
+        mimeType.startsWith('audio/')
+      ) {
+        toast.error('Video files are not supported yet.');
+        break;
+      } else if (mimeType.startsWith('application/pdf')) {
+        toast.error('PDF files are not supported yet.');
+        break;
+      } else {
+        // Because there can be many text file types (like code file), we will not check the mime type
+        // and will just check if the file is not binary.
         const reader = new FileReader();
         reader.onload = (event) => {
           if (event.target?.result) {
+            const content = event.target.result as string;
+            if (!isLikelyNotBinary(content)) {
+              toast.error('File is binary. Please upload a text file.');
+              return;
+            }
             addItems([
               {
-                type: 'imageFile',
+                type: 'textFile',
                 name: file.name,
-                base64Url: event.target.result as string,
+                content,
               },
             ]);
           }
         };
-        reader.readAsDataURL(file);
-      } else {
-        // TODO @ngxson : support all other file formats like .pdf, .py, .bat, .c, etc
-        toast.error('Unsupported file type.');
+        reader.readAsText(file);
       }
     }
   };
@@ -78,3 +97,76 @@ export function useChatExtraContext(): ChatExtraContextApi {
     onFileAdded,
   };
 }
+
+// WARN: vibe code below
+// This code is a heuristic to determine if a string is likely not binary.
+export function isLikelyNotBinary(str: string): boolean {
+  const options = {
+    prefixLength: 1024 * 10, // Check the first 10KB of the string
+    suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars
+    maxAbsoluteNullBytes: 2,
+  };
+
+  if (!str) {
+    return true; // Empty string is considered "not binary" or trivially text.
+  }
+
+  const sampleLength = Math.min(str.length, options.prefixLength);
+  if (sampleLength === 0) {
+    return true; // Effectively an empty string after considering prefixLength.
+  }
+
+  let suspiciousCharCount = 0;
+  let nullByteCount = 0;
+
+  for (let i = 0; i < sampleLength; i++) {
+    const charCode = str.charCodeAt(i);
+
+    // 1. Check for Unicode Replacement Character (U+FFFD)
+    // This is a strong indicator if the string was created from decoding bytes as UTF-8.
+    if (charCode === 0xfffd) {
+      suspiciousCharCount++;
+      continue;
+    }
+
+    // 2. Check for Null Bytes (U+0000)
+    if (charCode === 0x0000) {
+      nullByteCount++;
+      // We also count nulls towards the general suspicious character count,
+      // as they are less common in typical text files.
+      suspiciousCharCount++;
+      continue;
+    }
+
+    // 3. Check for C0 Control Characters (U+0001 to U+001F)
+    // Exclude common text control characters: TAB (9), LF (10), CR (13).
+    // We can also be a bit lenient with BEL (7) and BS (8) which sometimes appear in logs.
+    if (charCode < 32) {
+      if (
+        charCode !== 9 && // TAB
+        charCode !== 10 && // LF
+        charCode !== 13 && // CR
+        charCode !== 7 && // BEL (Bell) - sometimes in logs
+        charCode !== 8 // BS (Backspace) - less common, but possible
+      ) {
+        suspiciousCharCount++;
+      }
+    }
+    // Characters from 32 (space) up to 126 (~) are printable ASCII.
+    // Characters 127 (DEL) is a control character.
+    // Characters >= 128 are extended ASCII / multi-byte Unicode.
+    // If they resulted in U+FFFD, we caught it. Otherwise, they are valid
+    // (though perhaps unusual) Unicode characters from JS's perspective.
+    // The main concern is if those higher characters came from misinterpreting
+    // a single-byte encoding as UTF-8, which again, U+FFFD would usually flag.
+  }
+
+  // Check absolute null byte count
+  if (nullByteCount > options.maxAbsoluteNullBytes) {
+    return false; // Too many null bytes is a strong binary indicator
+  }
+
+  // Check ratio of suspicious characters
+  const ratio = suspiciousCharCount / sampleLength;
+  return ratio <= options.suspiciousCharThresholdRatio;
+}
diff --git a/tools/server/webui/src/utils/app.context.tsx b/tools/server/webui/src/utils/app.context.tsx
@@ -3,6 +3,7 @@ import {
   APIMessage,
   CanvasData,
   Conversation,
+  LlamaCppServerProps,
   Message,
   PendingMessage,
   ViewingChat,
@@ -12,6 +13,7 @@ import {
   filterThoughtFromMsgs,
   normalizeMsgsForAPI,
   getSSEStreamAsync,
+  getServerProps,
 } from './misc';
 import { BASE_URL, CONFIG_DEFAULT, isDev } from '../Config';
 import { matchPath, useLocation, useNavigate } from 'react-router';
@@ -47,6 +49,9 @@ interface AppContextValue {
   saveConfig: (config: typeof CONFIG_DEFAULT) => void;
   showSettings: boolean;
   setShowSettings: (show: boolean) => void;
+
+  // props
+  serverProps: LlamaCppServerProps | null;
 }
 
 // this callback is used for scrolling to the bottom of the chat and switching to the last node
@@ -75,6 +80,9 @@ export const AppContextProvider = ({
   const params = matchPath('/chat/:convId', pathname);
   const convId = params?.params?.convId;
 
+  const [serverProps, setServerProps] = useState<LlamaCppServerProps | null>(
+    null
+  );
   const [viewingChat, setViewingChat] = useState<ViewingChat | null>(null);
   const [pendingMessages, setPendingMessages] = useState<
     Record<Conversation['id'], PendingMessage>
@@ -86,6 +94,20 @@ export const AppContextProvider = ({
   const [canvasData, setCanvasData] = useState<CanvasData | null>(null);
   const [showSettings, setShowSettings] = useState(false);
 
+  // get server props
+  useEffect(() => {
+    getServerProps(BASE_URL, config.apiKey)
+      .then((props) => {
+        console.debug('Server props:', props);
+        setServerProps(props);
+      })
+      .catch((err) => {
+        console.error(err);
+        toast.error('Failed to fetch server props');
+      });
+    // eslint-disable-next-line
+  }, []);
+
   // handle change when the convId from URL is changed
   useEffect(() => {
     // also reset the canvas data
@@ -378,6 +400,7 @@ export const AppContextProvider = ({
         saveConfig,
         showSettings,
         setShowSettings,
+        serverProps,
       }}
     >
       {children}
diff --git a/tools/server/webui/src/utils/misc.ts b/tools/server/webui/src/utils/misc.ts
@@ -1,6 +1,11 @@
 // @ts-expect-error this package does not have typing
 import TextLineStream from 'textlinestream';
-import { APIMessage, APIMessageContentPart, Message } from './types';
+import {
+  APIMessage,
+  APIMessageContentPart,
+  LlamaCppServerProps,
+  Message,
+} from './types';
 
 // ponyfill for missing ReadableStream asyncIterator on Safari
 import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
@@ -157,3 +162,25 @@ export const cleanCurrentUrl = (removeQueryParams: string[]) => {
   });
   window.history.replaceState({}, '', url.toString());
 };
+
+export const getServerProps = async (
+  baseUrl: string,
+  apiKey?: string
+): Promise<LlamaCppServerProps> => {
+  try {
+    const response = await fetch(`${baseUrl}/props`, {
+      headers: {
+        'Content-Type': 'application/json',
+        ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+      },
+    });
+    if (!response.ok) {
+      throw new Error('Failed to fetch server props');
+    }
+    const data = await response.json();
+    return data as LlamaCppServerProps;
+  } catch (error) {
+    console.error('Error fetching server props:', error);
+    throw error;
+  }
+};
diff --git a/tools/server/webui/src/utils/types.ts b/tools/server/webui/src/utils/types.ts
@@ -112,3 +112,12 @@ export interface CanvasPyInterpreter {
 }
 
 export type CanvasData = CanvasPyInterpreter;
+
+// a non-complete list of props, only contains the ones we need
+export interface LlamaCppServerProps {
+  build_info: string;
+  model_path: string;
+  n_ctx: number;
+  has_multimodal: boolean;
+  // TODO: support params
+}
diff --git a/tools/server/webui/vite.config.ts b/tools/server/webui/vite.config.ts
@@ -71,6 +71,7 @@ export default defineConfig({
   server: {
     proxy: {
       '/v1': 'http://localhost:8080',
+      '/props': 'http://localhost:8080',
     },
     headers: {
       'Cross-Origin-Embedder-Policy': 'require-corp',