File tree Expand file tree Collapse file tree 2 files changed +21
-9
lines changed
Expand file tree Collapse file tree 2 files changed +21
-9
lines changed Original file line number Diff line number Diff line change @@ -445,17 +445,17 @@ def detokenize(self, tokens: List[int]) -> bytes:
445445 """
446446 assert self .ctx is not None
447447 output = b""
448- buffer_size = 32
448+ buffer_size = 8
449449 buffer = (ctypes .c_char * buffer_size )()
450450 for token in tokens :
451- if token == llama_cpp .llama_token_bos (self .ctx ):
452- continue
453451 n = llama_cpp .llama_token_to_str (
454452 self .ctx , llama_cpp .llama_token (token ), buffer , buffer_size
455453 )
456454 assert n <= buffer_size
457455 output += bytes (buffer [:n ])
458- return output
456+ # NOTE: Llama1 models automatically added a space at the start of the prompt
457+ # this line removes a leading space if the first token is a beginning of sentence token
458+ return output [1 :] if len (tokens ) > 0 and tokens [0 ] == self .token_bos () else output
459459
460460 def set_cache (self , cache : Optional [BaseLlamaCache ]):
461461 """Set the cache.
@@ -886,7 +886,7 @@ def _create_completion(
886886 created : int = int (time .time ())
887887 completion_tokens : List [int ] = []
888888 # Add blank space to start of prompt to match OG llama tokenizer
889- prompt_tokens : List [int ] = self .tokenize (b" " + prompt .encode ("utf-8" ))
889+ prompt_tokens : List [int ] = self .tokenize (prompt .encode ("utf-8" )) if prompt != "" else [ self . token_bos ()]
890890 text : bytes = b""
891891 returned_tokens : int = 0
892892 stop = (
Original file line number Diff line number Diff line change 1+ import pytest
12import llama_cpp
23
34MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama.gguf"
45
56
6- def test_llama ():
7- llama = llama_cpp .Llama (model_path = MODEL , vocab_only = True )
7+ def test_llama_cpp_tokenization ():
8+ llama = llama_cpp .Llama (model_path = MODEL , vocab_only = True , verbose = False )
89
910 assert llama
1011 assert llama .ctx is not None
1112
1213 text = b"Hello World"
1314
14- assert llama .detokenize (llama .tokenize (text )) == text
15+ tokens = llama .tokenize (text )
16+ assert tokens [0 ] == llama .token_bos ()
17+ assert tokens == [1 , 15043 , 2787 ]
18+ detokenized = llama .detokenize (tokens )
19+ assert detokenized == text
20+
21+ tokens = llama .tokenize (text , add_bos = False )
22+ assert tokens [0 ] != llama .token_bos ()
23+ assert tokens == [15043 , 2787 ]
24+
25+ detokenized = llama .detokenize (tokens )
26+ assert detokenized != text
1527
1628
17- # @pytest.mark.skip(reason="need to update sample mocking ")
29+ @pytest .mark .skip (reason = "bug in tokenization where leading space is always inserted even if not after eos " )
1830def test_llama_patch (monkeypatch ):
1931 llama = llama_cpp .Llama (model_path = MODEL , vocab_only = True )
2032 n_vocab = llama_cpp .llama_n_vocab (llama .ctx )
You can’t perform that action at this time.
0 commit comments