@@ -405,13 +405,6 @@ def sample_repetition_penalties(
405405 # )
406406 raise NotImplementedError ("sample_repetition_penalties is not implemented in llama.cpp" )
407407
408- def sample_softmax (self , candidates : "_LlamaTokenDataArray" ):
409- # llama_cpp.llama_sample_softmax(
410- # self.ctx,
411- # llama_cpp.byref(candidates.candidates),
412- # )
413- raise NotImplementedError ("sample_softmax is not implemented in llama.cpp" )
414-
415408 def sample_top_k (self , candidates : "_LlamaTokenDataArray" , k : int , min_keep : int ):
416409 # llama_cpp.llama_sample_top_k(
417410 # self.ctx, llama_cpp.byref(candidates.candidates), k, min_keep
@@ -592,6 +585,7 @@ def __init__(self, *, n_vocab: int):
592585 self .candidates = llama_cpp .llama_token_data_array (
593586 data = self .candidates_data .ctypes .data_as (llama_cpp .llama_token_data_p ),
594587 size = self .n_vocab ,
588+ selected = - 1 ,
595589 sorted = False ,
596590 )
597591 self .default_candidates_data_id = np .arange (self .n_vocab , dtype = np .intc ) # type: ignore
@@ -729,7 +723,6 @@ def sample(
729723 ctx_main .sample_grammar (token_data_array , self .grammar )
730724
731725 if self .params .temp < 0 :
732- ctx_main .sample_softmax (token_data_array )
733726 id = token_data_array .candidates_data .id [0 ]
734727 elif self .params .temp == 0 :
735728 id = ctx_main .sample_token_greedy (token_data_array )
@@ -827,10 +820,6 @@ def add_dist(self, seed: int):
827820 sampler = llama_cpp .llama_sampler_init_dist (seed )
828821 self ._add_sampler (sampler )
829822
830- def add_softmax (self ):
831- sampler = llama_cpp .llama_sampler_init_softmax ()
832- self ._add_sampler (sampler )
833-
834823 def add_top_k (self , k : int ):
835824 sampler = llama_cpp .llama_sampler_init_top_k (k )
836825 self ._add_sampler (sampler )
0 commit comments