@@ -1664,7 +1664,7 @@ struct llama_sampler_dry {
16641664// Ported from Koboldcpp, original PR: https://github.com/LostRuins/koboldcpp/pull/982 (Original author: pi6am)
16651665static void get_overlapping_token_sequences (const llama_vocab & vocab, const std::string& str, std::unordered_multimap<llama_token, std::vector<llama_token>>& token_sequences, int max_tail_len = -1 ) {
16661666 for (llama_token token_id = 0 ; token_id < (llama_token)vocab.n_vocab ; token_id++) {
1667- std::string word = llama_detokenize ( vocab, {token_id}, true );
1667+ std::string word = vocab. detokenize ( {token_id}, true );
16681668 if (word.find (str) != std::string::npos) {
16691669 token_sequences.emplace (token_id, std::vector<llama_token>());
16701670 } else {
@@ -1681,7 +1681,7 @@ static void get_overlapping_token_sequences(const llama_vocab & vocab, const std
16811681 }
16821682 }
16831683 if (match) {
1684- std::vector<llama_token> tokenization = llama_tokenize_internal ( vocab, str.substr (i), false , false );
1684+ std::vector<llama_token> tokenization = vocab. tokenize ( str.substr (i), false , false );
16851685 if (max_tail_len >= 0 && tokenization.size () > (size_t )max_tail_len) {
16861686 tokenization.resize (max_tail_len);
16871687 }
@@ -2153,7 +2153,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
21532153 float p_eog_sum = 0 .0f ;
21542154
21552155 for (size_t i = 0 ; i < cur_p->size ; ++i) {
2156- if (llama_token_is_eog_impl (* ctx->vocab , cur_p->data [i].id )) {
2156+ if (ctx->vocab -> token_is_eog ( cur_p->data [i].id )) {
21572157 p_eog_sum += cur_p->data [i].p ;
21582158 } else {
21592159 p_txt_sum += cur_p->data [i].p ;
@@ -2175,7 +2175,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
21752175 float p_sum = 0 .0f ;
21762176
21772177 for (size_t i = 0 ; i < size_org; ++i) {
2178- if (llama_token_is_eog_impl (* ctx->vocab , cur_p->data [i].id )) {
2178+ if (ctx->vocab -> token_is_eog ( cur_p->data [i].id )) {
21792179 p_sum += cur_p->data [i].p ;
21802180
21812181 cur_p->data [cur_p->size ++] = cur_p->data [i];
@@ -2203,17 +2203,17 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22032203 continue ;
22042204 }
22052205
2206- int len0 = llama_token_to_piece_impl (* ctx->vocab , cur_p->data [i0].id , ctx->buf0 .data (), ctx->buf0 .size (), 0 , false );
2206+ int len0 = ctx->vocab -> token_to_piece ( cur_p->data [i0].id , ctx->buf0 .data (), ctx->buf0 .size (), 0 , false );
22072207 if (len0 < 0 ) {
22082208 ctx->buf0 .resize (len0);
2209- len0 = llama_token_to_piece_impl (* ctx->vocab , cur_p->data [i0].id , ctx->buf0 .data (), ctx->buf0 .size (), 0 , false );
2209+ len0 = ctx->vocab -> token_to_piece ( cur_p->data [i0].id , ctx->buf0 .data (), ctx->buf0 .size (), 0 , false );
22102210 assert (len0 > 0 );
22112211 }
22122212
2213- int len1 = llama_token_to_piece_impl (* ctx->vocab , cur_p->data [i1].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
2213+ int len1 = ctx->vocab -> token_to_piece ( cur_p->data [i1].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
22142214 if (len1 < 0 ) {
22152215 ctx->buf1 .resize (len1);
2216- len1 = llama_token_to_piece_impl (* ctx->vocab , cur_p->data [i1].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
2216+ len1 = ctx->vocab -> token_to_piece ( cur_p->data [i1].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
22172217 assert (len1 > 0 );
22182218 }
22192219
@@ -2248,7 +2248,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22482248 LOG_DBG_CUR (" %s: n_combined = %zu, applying thold = %.3f\n " , __func__, n_combined, thold);
22492249
22502250 for (size_t i = 0 ; i < size_org; ++i) {
2251- const bool is_eog = llama_token_is_eog_impl (* ctx->vocab , cur_p->data [i].id );
2251+ const bool is_eog = ctx->vocab -> token_is_eog ( cur_p->data [i].id );
22522252
22532253 if (cur_p->data [i].p < thold && !is_eog) {
22542254 continue ;
@@ -2269,7 +2269,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22692269 // if no non-EOG tokens are left -> reduce cur_p to single EOT token
22702270 if (n_non_eog == 0 ) {
22712271 cur_p->size = 1 ;
2272- cur_p->data [0 ].id = llama_token_eot_impl (* ctx->vocab );
2272+ cur_p->data [0 ].id = ctx->vocab -> token_eot ( );
22732273 cur_p->data [0 ].logit = 1 .0f ;
22742274
22752275 return ;
@@ -2291,7 +2291,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22912291 LOG_DBG_CUR (" %s: applying thold = %.3f\n " , __func__, thold);
22922292
22932293 for (size_t i = 0 ; i < size_org; ++i) {
2294- const bool is_eog = llama_token_is_eog_impl (* ctx->vocab , cur_p->data [i].id );
2294+ const bool is_eog = ctx->vocab -> token_is_eog ( cur_p->data [i].id );
22952295
22962296 if (cur_p->data [i].p < thold && !is_eog) {
22972297 continue ;
0 commit comments