@@ -154,28 +154,28 @@ int main(int argc, char ** argv) {
154154 LOG_INF (" \n " );
155155 LOG_INF (" %s\n " , common_params_get_system_info (params).c_str ());
156156 }
157- const bool add_bos = llama_add_bos_token (vocab);
158- GGML_ASSERT (!llama_add_eos_token (vocab));
157+ const bool add_bos = llama_vocab_add_bos (vocab);
158+ GGML_ASSERT (!llama_vocab_add_eos (vocab));
159159
160160 std::vector<llama_token> embd_inp;
161161 std::vector<llama_token> embd_end;
162162 std::vector<llama_token> inp_pfx = common_tokenize (ctx, params.input_prefix , false );
163163 std::vector<llama_token> inp_sfx = common_tokenize (ctx, params.input_suffix , false );
164164
165- GGML_ASSERT (llama_token_fim_pre (vocab) >= 0 );
166- GGML_ASSERT (llama_token_fim_suf (vocab) >= 0 );
165+ GGML_ASSERT (llama_vocab_fim_pre (vocab) >= 0 );
166+ GGML_ASSERT (llama_vocab_fim_suf (vocab) >= 0 );
167167
168- inp_pfx.insert (inp_pfx.begin (), llama_token_fim_pre (vocab));
169- inp_sfx.insert (inp_sfx.begin (), llama_token_fim_suf (vocab));
168+ inp_pfx.insert (inp_pfx.begin (), llama_vocab_fim_pre (vocab));
169+ inp_sfx.insert (inp_sfx.begin (), llama_vocab_fim_suf (vocab));
170170
171171 embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
172172 embd_end = params.spm_infill ? inp_pfx : inp_sfx;
173173 if (add_bos) {
174- embd_inp.insert (embd_inp.begin (), llama_token_bos (vocab));
174+ embd_inp.insert (embd_inp.begin (), llama_vocab_bos (vocab));
175175 }
176176 embd_inp.insert (embd_inp.end (), embd_end.begin (), embd_end.end ());
177177
178- const llama_token middle_token = llama_token_fim_mid (vocab);
178+ const llama_token middle_token = llama_vocab_fim_mid (vocab);
179179 if (middle_token >= 0 ) {
180180 embd_inp.push_back (middle_token);
181181 }
@@ -187,7 +187,7 @@ int main(int argc, char ** argv) {
187187
188188 // Should not run without any tokens
189189 if (embd_inp.empty ()) {
190- embd_inp.push_back (llama_token_bos (vocab));
190+ embd_inp.push_back (llama_vocab_bos (vocab));
191191 LOG_WRN (" embd_inp was considered empty and bos was added: %s\n " , string_from (ctx, embd_inp).c_str ());
192192 }
193193
@@ -422,10 +422,10 @@ int main(int argc, char ** argv) {
422422 // if not currently processing queued inputs;
423423 if ((int ) embd_inp.size () <= n_consumed) {
424424 // deal with eot token in infill mode
425- if ((common_sampler_last (smpl) == llama_token_eot (vocab) || is_interacting) && params.interactive ){
425+ if ((common_sampler_last (smpl) == llama_vocab_eot (vocab) || is_interacting) && params.interactive ){
426426 if (is_interacting && !params.interactive_first ) {
427427 // print an eot token
428- LOG (" %s" , common_token_to_piece (ctx, llama_token_eot (vocab)).c_str ());
428+ LOG (" %s" , common_token_to_piece (ctx, llama_vocab_eot (vocab)).c_str ());
429429 }
430430 LOG (" \n " );
431431 console::set_display (console::user_input);
@@ -465,13 +465,13 @@ int main(int argc, char ** argv) {
465465 std::vector<llama_token> inp_pfx = common_tokenize (ctx, params.input_prefix , false );
466466 std::vector<llama_token> inp_sfx = common_tokenize (ctx, params.input_suffix , false );
467467
468- inp_pfx.insert (inp_pfx.begin (), llama_token_fim_pre (vocab));
469- inp_sfx.insert (inp_sfx.begin (), llama_token_fim_suf (vocab));
468+ inp_pfx.insert (inp_pfx.begin (), llama_vocab_fim_pre (vocab));
469+ inp_sfx.insert (inp_sfx.begin (), llama_vocab_fim_suf (vocab));
470470
471471 embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
472472 embd_end = params.spm_infill ? inp_pfx : inp_sfx;
473473 if (add_bos) {
474- embd_inp.insert (embd_inp.begin (), llama_token_bos (vocab));
474+ embd_inp.insert (embd_inp.begin (), llama_vocab_bos (vocab));
475475 }
476476 embd_inp.insert (embd_inp.end (), embd_end.begin (), embd_end.end ());
477477
@@ -486,7 +486,7 @@ int main(int argc, char ** argv) {
486486 is_interacting = false ;
487487 }
488488 // deal with end of generation tokens in interactive mode
489- else if (llama_token_is_eog (vocab, common_sampler_last (smpl))) {
489+ else if (llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
490490 LOG_DBG (" found EOS token\n " );
491491
492492 if (params.interactive ) {
@@ -502,7 +502,7 @@ int main(int argc, char ** argv) {
502502
503503 if (params.input_prefix_bos ) {
504504 LOG_DBG (" adding input prefix BOS token\n " );
505- embd_inp.push_back (llama_token_bos (vocab));
505+ embd_inp.push_back (llama_vocab_bos (vocab));
506506 }
507507
508508 std::string buffer;
@@ -565,7 +565,7 @@ int main(int argc, char ** argv) {
565565 }
566566
567567 // end of generation
568- if (!embd.empty () && llama_token_is_eog (vocab, embd.back ()) && !params.interactive ) {
568+ if (!embd.empty () && llama_vocab_is_eog (vocab, embd.back ()) && !params.interactive ) {
569569 break ;
570570 }
571571
@@ -577,7 +577,7 @@ int main(int argc, char ** argv) {
577577 }
578578 }
579579 if (!params.interactive && n_remain <= 0 ) {
580- LOG (" %s" , common_token_to_piece (ctx, llama_token_eot (vocab)).c_str ());
580+ LOG (" %s" , common_token_to_piece (ctx, llama_vocab_eot (vocab)).c_str ());
581581 }
582582
583583 LOG (" \n " );
0 commit comments