LostRuins
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 4 additions & 2 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/server/public/index.html.gz‎
90 Bytes b/‎examples/server/public/index.html.gz‎
90 Bytes
diff --git a/‎examples/server/webui/src/components/ChatScreen.tsx‎
Lines changed: 53 additions & 25 deletions b/‎examples/server/webui/src/components/ChatScreen.tsx‎
Lines changed: 53 additions & 25 deletions
diff --git a/‎examples/server/webui/src/utils/llama-vscode.ts‎
Lines changed: 5 additions & 7 deletions b/‎examples/server/webui/src/utils/llama-vscode.ts‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎examples/tts/tts.cpp‎
Lines changed: 8 additions & 0 deletions b/‎examples/tts/tts.cpp‎
Lines changed: 8 additions & 0 deletions
@@ -529,6 +529,8 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
         reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
         added_vocab = tokenizer.get_added_vocab()
 
+        added_tokens_decoder = tokenizer.added_tokens_decoder
+
         for i in range(vocab_size):
             if i not in reverse_vocab:
                 tokens.append(f"[PAD{i}]")
@@ -538,13 +540,13 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
                 if token in added_vocab:
                     # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
                     # To avoid unexpected issues - we make sure to normalize non-normalized tokens
-                    if not tokenizer.added_tokens_decoder[i].normalized:
+                    if not added_tokens_decoder[i].normalized:
                         previous_token = token
                         token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
                         if previous_token != token:
                             logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
 
-                    if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
+                    if added_tokens_decoder[i].special or self.does_token_look_special(token):
                         toktypes.append(gguf.TokenType.CONTROL)
                     else:
                         # NOTE: this was added for Gemma.
 
@@ -99,13 +99,9 @@ export default function ChatScreen() {
     canvasData,
     replaceMessageAndGenerate,
   } = useAppContext();
-  const [inputMsg, setInputMsg] = useState(prefilledMsg.content());
-  const inputRef = useRef<HTMLTextAreaElement>(null);
+  const textarea = useOptimizedTextarea(prefilledMsg.content());
 
-  const { extraContext, clearExtraContext } = useVSCodeContext(
-    inputRef,
-    setInputMsg
-  );
+  const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
   // TODO: improve this when we have "upload file" feature
   const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined;
 
@@ -135,9 +131,10 @@ export default function ChatScreen() {
   };
 
   const sendNewMessage = async () => {
-    if (inputMsg.trim().length === 0 || isGenerating(currConvId ?? '')) return;
-    const lastInpMsg = inputMsg;
-    setInputMsg('');
+    const lastInpMsg = textarea.value();
+    if (lastInpMsg.trim().length === 0 || isGenerating(currConvId ?? ''))
+      return;
+    textarea.setValue('');
     scrollToBottom(false);
     setCurrNodeId(-1);
     // get the last message node
@@ -146,13 +143,13 @@ export default function ChatScreen() {
       !(await sendMessage(
         currConvId,
         lastMsgNodeId,
-        inputMsg,
+        lastInpMsg,
         currExtra,
         onChunk
       ))
     ) {
       // restore the input message if failed
-      setInputMsg(lastInpMsg);
+      textarea.setValue(lastInpMsg);
     }
     // OK
     clearExtraContext();
@@ -195,16 +192,13 @@ export default function ChatScreen() {
       // send the prefilled message if needed
       sendNewMessage();
     } else {
-      // otherwise, focus on the input and move the cursor to the end
-      if (inputRef.current) {
-        inputRef.current.focus();
-        inputRef.current.selectionStart = inputRef.current.value.length;
-      }
+      // otherwise, focus on the input
+      textarea.focus();
     }
     prefilledMsg.clear();
     // no need to keep track of sendNewMessage
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [inputRef]);
+  }, [textarea.ref]);
 
   // due to some timing issues of StorageUtils.appendMsg(), we need to make sure the pendingMsg is not duplicated upon rendering (i.e. appears once in the saved conversation and once in the pendingMsg)
   const pendingMsgDisplay: MessageDisplay[] =
@@ -258,9 +252,7 @@ export default function ChatScreen() {
           <textarea
             className="textarea textarea-bordered w-full"
             placeholder="Type a message (Shift+Enter to add a new line)"
-            ref={inputRef}
-            value={inputMsg}
-            onChange={(e) => setInputMsg(e.target.value)}
+            ref={textarea.ref}
             onKeyDown={(e) => {
               if (e.nativeEvent.isComposing || e.keyCode === 229) return;
               if (e.key === 'Enter' && e.shiftKey) return;
@@ -280,11 +272,7 @@ export default function ChatScreen() {
               Stop
             </button>
           ) : (
-            <button
-              className="btn btn-primary ml-2"
-              onClick={sendNewMessage}
-              disabled={inputMsg.trim().length === 0}
-            >
+            <button className="btn btn-primary ml-2" onClick={sendNewMessage}>
               Send
             </button>
           )}
@@ -298,3 +286,43 @@ export default function ChatScreen() {
     </div>
   );
 }
+
+export interface OptimizedTextareaValue {
+  value: () => string;
+  setValue: (value: string) => void;
+  focus: () => void;
+  ref: React.RefObject<HTMLTextAreaElement>;
+}
+
+// This is a workaround to prevent the textarea from re-rendering when the inner content changes
+// See https://github.com/ggml-org/llama.cpp/pull/12299
+function useOptimizedTextarea(initValue: string): OptimizedTextareaValue {
+  const [savedInitValue, setSavedInitValue] = useState<string>(initValue);
+  const textareaRef = useRef<HTMLTextAreaElement>(null);
+
+  useEffect(() => {
+    if (textareaRef.current && savedInitValue) {
+      textareaRef.current.value = savedInitValue;
+      setSavedInitValue('');
+    }
+  }, [textareaRef, savedInitValue, setSavedInitValue]);
+
+  return {
+    value: () => {
+      return textareaRef.current?.value ?? savedInitValue;
+    },
+    setValue: (value: string) => {
+      if (textareaRef.current) {
+        textareaRef.current.value = value;
+      }
+    },
+    focus: () => {
+      if (textareaRef.current) {
+        // focus and move the cursor to the end
+        textareaRef.current.focus();
+        textareaRef.current.selectionStart = textareaRef.current.value.length;
+      }
+    },
+    ref: textareaRef,
+  };
+}
@@ -1,5 +1,6 @@
 import { useEffect, useState } from 'react';
 import { MessageExtraContext } from './types';
+import { OptimizedTextareaValue } from '../components/ChatScreen';
 
 // Extra context when using llama.cpp WebUI from llama-vscode, inside an iframe
 // Ref: https://github.com/ggml-org/llama.cpp/pull/11940
@@ -14,10 +15,7 @@ interface SetTextEvData {
  * window.postMessage({ command: 'setText', text: 'Spot the syntax error', context: 'def test()\n  return 123' }, '*');
  */
 
-export const useVSCodeContext = (
-  inputRef: React.RefObject<HTMLTextAreaElement>,
-  setInputMsg: (text: string) => void
-) => {
+export const useVSCodeContext = (textarea: OptimizedTextareaValue) => {
   const [extraContext, setExtraContext] = useState<MessageExtraContext | null>(
     null
   );
@@ -27,20 +25,20 @@ export const useVSCodeContext = (
     const handleMessage = (event: MessageEvent) => {
       if (event.data?.command === 'setText') {
         const data: SetTextEvData = event.data;
-        setInputMsg(data?.text);
+        textarea.setValue(data?.text);
         if (data?.context && data.context.length > 0) {
           setExtraContext({
             type: 'context',
             content: data.context,
           });
         }
-        inputRef.current?.focus();
+        textarea.focus();
       }
     };
 
     window.addEventListener('message', handleMessage);
     return () => window.removeEventListener('message', handleMessage);
-  }, [inputRef, setInputMsg]);
+  }, [textarea]);
 
   // Add a keydown listener that sends the "escapePressed" message to the parent window
   useEffect(() => {
 
@@ -571,6 +571,10 @@ int main(int argc, char ** argv) {
     model_ttc = llama_init_ttc.model.get();
     ctx_ttc   = llama_init_ttc.context.get();
 
+    if (model_ttc == nullptr || ctx_ttc == nullptr) {
+        return ENOENT;
+    }
+
     const llama_vocab * vocab = llama_model_get_vocab(model_ttc);
 
     // TODO: refactor in a common struct
@@ -586,6 +590,10 @@ int main(int argc, char ** argv) {
     model_cts = llama_init_cts.model.get();
     ctx_cts   = llama_init_cts.context.get();
 
+    if (model_cts == nullptr || ctx_cts == nullptr) {
+        return ENOENT;
+    }
+
     std::vector<common_sampler *> smpl(n_parallel);
     for (int i = 0; i < n_parallel; ++i) {
         params.sampling.no_perf = (i != 0);