Skip to content

Commit 7030ebf

Browse files
committed
Merge branch 'upstream' into concedo_experimental
# Conflicts: # docs/backend/SYCL.md # ggml/src/CMakeLists.txt # ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp # ggml/src/ggml-sycl/CMakeLists.txt # tests/test-backend-ops.cpp
2 parents c1e5841 + af04481 commit 7030ebf

File tree

24 files changed

+1881
-396
lines changed

24 files changed

+1881
-396
lines changed

convert_hf_to_gguf.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,8 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
529529
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
530530
added_vocab = tokenizer.get_added_vocab()
531531

532+
added_tokens_decoder = tokenizer.added_tokens_decoder
533+
532534
for i in range(vocab_size):
533535
if i not in reverse_vocab:
534536
tokens.append(f"[PAD{i}]")
@@ -538,13 +540,13 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
538540
if token in added_vocab:
539541
# The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
540542
# To avoid unexpected issues - we make sure to normalize non-normalized tokens
541-
if not tokenizer.added_tokens_decoder[i].normalized:
543+
if not added_tokens_decoder[i].normalized:
542544
previous_token = token
543545
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
544546
if previous_token != token:
545547
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
546548

547-
if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
549+
if added_tokens_decoder[i].special or self.does_token_look_special(token):
548550
toktypes.append(gguf.TokenType.CONTROL)
549551
else:
550552
# NOTE: this was added for Gemma.
90 Bytes
Binary file not shown.

examples/server/webui/src/components/ChatScreen.tsx

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,9 @@ export default function ChatScreen() {
9999
canvasData,
100100
replaceMessageAndGenerate,
101101
} = useAppContext();
102-
const [inputMsg, setInputMsg] = useState(prefilledMsg.content());
103-
const inputRef = useRef<HTMLTextAreaElement>(null);
102+
const textarea = useOptimizedTextarea(prefilledMsg.content());
104103

105-
const { extraContext, clearExtraContext } = useVSCodeContext(
106-
inputRef,
107-
setInputMsg
108-
);
104+
const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
109105
// TODO: improve this when we have "upload file" feature
110106
const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined;
111107

@@ -135,9 +131,10 @@ export default function ChatScreen() {
135131
};
136132

137133
const sendNewMessage = async () => {
138-
if (inputMsg.trim().length === 0 || isGenerating(currConvId ?? '')) return;
139-
const lastInpMsg = inputMsg;
140-
setInputMsg('');
134+
const lastInpMsg = textarea.value();
135+
if (lastInpMsg.trim().length === 0 || isGenerating(currConvId ?? ''))
136+
return;
137+
textarea.setValue('');
141138
scrollToBottom(false);
142139
setCurrNodeId(-1);
143140
// get the last message node
@@ -146,13 +143,13 @@ export default function ChatScreen() {
146143
!(await sendMessage(
147144
currConvId,
148145
lastMsgNodeId,
149-
inputMsg,
146+
lastInpMsg,
150147
currExtra,
151148
onChunk
152149
))
153150
) {
154151
// restore the input message if failed
155-
setInputMsg(lastInpMsg);
152+
textarea.setValue(lastInpMsg);
156153
}
157154
// OK
158155
clearExtraContext();
@@ -195,16 +192,13 @@ export default function ChatScreen() {
195192
// send the prefilled message if needed
196193
sendNewMessage();
197194
} else {
198-
// otherwise, focus on the input and move the cursor to the end
199-
if (inputRef.current) {
200-
inputRef.current.focus();
201-
inputRef.current.selectionStart = inputRef.current.value.length;
202-
}
195+
// otherwise, focus on the input
196+
textarea.focus();
203197
}
204198
prefilledMsg.clear();
205199
// no need to keep track of sendNewMessage
206200
// eslint-disable-next-line react-hooks/exhaustive-deps
207-
}, [inputRef]);
201+
}, [textarea.ref]);
208202

209203
// due to some timing issues of StorageUtils.appendMsg(), we need to make sure the pendingMsg is not duplicated upon rendering (i.e. appears once in the saved conversation and once in the pendingMsg)
210204
const pendingMsgDisplay: MessageDisplay[] =
@@ -258,9 +252,7 @@ export default function ChatScreen() {
258252
<textarea
259253
className="textarea textarea-bordered w-full"
260254
placeholder="Type a message (Shift+Enter to add a new line)"
261-
ref={inputRef}
262-
value={inputMsg}
263-
onChange={(e) => setInputMsg(e.target.value)}
255+
ref={textarea.ref}
264256
onKeyDown={(e) => {
265257
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
266258
if (e.key === 'Enter' && e.shiftKey) return;
@@ -280,11 +272,7 @@ export default function ChatScreen() {
280272
Stop
281273
</button>
282274
) : (
283-
<button
284-
className="btn btn-primary ml-2"
285-
onClick={sendNewMessage}
286-
disabled={inputMsg.trim().length === 0}
287-
>
275+
<button className="btn btn-primary ml-2" onClick={sendNewMessage}>
288276
Send
289277
</button>
290278
)}
@@ -298,3 +286,43 @@ export default function ChatScreen() {
298286
</div>
299287
);
300288
}
289+
290+
export interface OptimizedTextareaValue {
291+
value: () => string;
292+
setValue: (value: string) => void;
293+
focus: () => void;
294+
ref: React.RefObject<HTMLTextAreaElement>;
295+
}
296+
297+
// This is a workaround to prevent the textarea from re-rendering when the inner content changes
298+
// See https://github.com/ggml-org/llama.cpp/pull/12299
299+
function useOptimizedTextarea(initValue: string): OptimizedTextareaValue {
300+
const [savedInitValue, setSavedInitValue] = useState<string>(initValue);
301+
const textareaRef = useRef<HTMLTextAreaElement>(null);
302+
303+
useEffect(() => {
304+
if (textareaRef.current && savedInitValue) {
305+
textareaRef.current.value = savedInitValue;
306+
setSavedInitValue('');
307+
}
308+
}, [textareaRef, savedInitValue, setSavedInitValue]);
309+
310+
return {
311+
value: () => {
312+
return textareaRef.current?.value ?? savedInitValue;
313+
},
314+
setValue: (value: string) => {
315+
if (textareaRef.current) {
316+
textareaRef.current.value = value;
317+
}
318+
},
319+
focus: () => {
320+
if (textareaRef.current) {
321+
// focus and move the cursor to the end
322+
textareaRef.current.focus();
323+
textareaRef.current.selectionStart = textareaRef.current.value.length;
324+
}
325+
},
326+
ref: textareaRef,
327+
};
328+
}

examples/server/webui/src/utils/llama-vscode.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { useEffect, useState } from 'react';
22
import { MessageExtraContext } from './types';
3+
import { OptimizedTextareaValue } from '../components/ChatScreen';
34

45
// Extra context when using llama.cpp WebUI from llama-vscode, inside an iframe
56
// Ref: https://github.com/ggml-org/llama.cpp/pull/11940
@@ -14,10 +15,7 @@ interface SetTextEvData {
1415
* window.postMessage({ command: 'setText', text: 'Spot the syntax error', context: 'def test()\n return 123' }, '*');
1516
*/
1617

17-
export const useVSCodeContext = (
18-
inputRef: React.RefObject<HTMLTextAreaElement>,
19-
setInputMsg: (text: string) => void
20-
) => {
18+
export const useVSCodeContext = (textarea: OptimizedTextareaValue) => {
2119
const [extraContext, setExtraContext] = useState<MessageExtraContext | null>(
2220
null
2321
);
@@ -27,20 +25,20 @@ export const useVSCodeContext = (
2725
const handleMessage = (event: MessageEvent) => {
2826
if (event.data?.command === 'setText') {
2927
const data: SetTextEvData = event.data;
30-
setInputMsg(data?.text);
28+
textarea.setValue(data?.text);
3129
if (data?.context && data.context.length > 0) {
3230
setExtraContext({
3331
type: 'context',
3432
content: data.context,
3533
});
3634
}
37-
inputRef.current?.focus();
35+
textarea.focus();
3836
}
3937
};
4038

4139
window.addEventListener('message', handleMessage);
4240
return () => window.removeEventListener('message', handleMessage);
43-
}, [inputRef, setInputMsg]);
41+
}, [textarea]);
4442

4543
// Add a keydown listener that sends the "escapePressed" message to the parent window
4644
useEffect(() => {

examples/tts/tts.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,10 @@ int main(int argc, char ** argv) {
571571
model_ttc = llama_init_ttc.model.get();
572572
ctx_ttc = llama_init_ttc.context.get();
573573

574+
if (model_ttc == nullptr || ctx_ttc == nullptr) {
575+
return ENOENT;
576+
}
577+
574578
const llama_vocab * vocab = llama_model_get_vocab(model_ttc);
575579

576580
// TODO: refactor in a common struct
@@ -586,6 +590,10 @@ int main(int argc, char ** argv) {
586590
model_cts = llama_init_cts.model.get();
587591
ctx_cts = llama_init_cts.context.get();
588592

593+
if (model_cts == nullptr || ctx_cts == nullptr) {
594+
return ENOENT;
595+
}
596+
589597
std::vector<common_sampler *> smpl(n_parallel);
590598
for (int i = 0; i < n_parallel; ++i) {
591599
params.sampling.no_perf = (i != 0);

0 commit comments

Comments
 (0)