@@ -368,10 +368,10 @@ def generate(self):
368368 id = llama_cpp .llama_sample_token_mirostat_v2 (self .ctx , candidates_p , llama_cpp .c_float (self .params .mirostat_tau ), llama_cpp .c_float (self .params .mirostat_eta ), llama_cpp .c_float (mirostat_mu ))
369369 else :
370370 # Temperature sampling
371- llama_cpp .llama_sample_top_k (self .ctx , candidates_p , top_k )
372- llama_cpp .llama_sample_tail_free (self .ctx , candidates_p , llama_cpp .c_float (self .params .tfs_z ))
373- llama_cpp .llama_sample_typical (self .ctx , candidates_p , llama_cpp .c_float (self .params .typical_p ))
374- llama_cpp .llama_sample_top_p (self .ctx , candidates_p , llama_cpp .c_float (self .params .top_p ))
371+ llama_cpp .llama_sample_top_k (self .ctx , candidates_p , top_k , min_keep = llama_cpp . c_size_t ( 1 ) )
372+ llama_cpp .llama_sample_tail_free (self .ctx , candidates_p , llama_cpp .c_float (self .params .tfs_z ), min_keep = llama_cpp . c_size_t ( 1 ) )
373+ llama_cpp .llama_sample_typical (self .ctx , candidates_p , llama_cpp .c_float (self .params .typical_p ), min_keep = llama_cpp . c_size_t ( 1 ) )
374+ llama_cpp .llama_sample_top_p (self .ctx , candidates_p , llama_cpp .c_float (self .params .top_p ), min_keep = llama_cpp . c_size_t ( 1 ) )
375375 llama_cpp .llama_sample_temperature (self .ctx , candidates_p , llama_cpp .c_float (self .params .temp ))
376376 id = llama_cpp .llama_sample_token (self .ctx , candidates_p )
377377 # print("`{}`".format(candidates_p.size))
@@ -382,12 +382,15 @@ def generate(self):
382382 # replace end of text token with newline token when in interactive mode
383383 if (id == llama_cpp .llama_token_eos () and self .params .interactive and not self .params .instruct ):
384384 id = self .llama_token_newline [0 ]
385+ self .embd .append (id )
385386 if (self .use_antiprompt ()):
386387 # tokenize and inject first reverse prompt
387388 self .embd_inp += self .first_antiprompt [0 ]
388-
389- # add it to the context
390- self .embd .append (id )
389+ for id in self .first_antiprompt [0 ]:
390+ self .embd .append (id )
391+ else :
392+ # add it to the context
393+ self .embd .append (id )
391394
392395 # echo this to console
393396 self .output_echo = True
@@ -493,7 +496,7 @@ def output(self):
493496 # Contains multi-byte UTF8
494497 for num , pattern in [(2 , 192 ), (3 , 224 ), (4 , 240 )]:
495498 # Bitwise AND check
496- if pattern & int .from_bytes (cur_char ) == pattern :
499+ if pattern & int .from_bytes (cur_char , 'little' ) == pattern :
497500 self .multibyte_fix = [cur_char ] + ([None ] * (num - 1 ))
498501
499502 # Stop incomplete bytes from passing
0 commit comments