Skip to content

Commit 940139c

Browse files
committed
cont
ggml-ci
1 parent 1d9f1f2 commit 940139c

File tree

34 files changed

+268
-164
lines changed

34 files changed

+268
-164
lines changed

common/common.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -862,17 +862,17 @@ struct common_init_result common_init_from_params(common_params & params) {
862862
if (params.reranking) {
863863
bool ok = true;
864864

865-
if (llama_token_bos(vocab) == LLAMA_TOKEN_NULL) {
865+
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
866866
LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
867867
ok = false;
868868
}
869869

870-
if (llama_token_eos(vocab) == LLAMA_TOKEN_NULL) {
870+
if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
871871
LOG_WRN("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__);
872872
ok = false;
873873
}
874874

875-
if (llama_token_sep(vocab) == LLAMA_TOKEN_NULL) {
875+
if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) {
876876
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
877877
ok = false;
878878
}
@@ -943,14 +943,14 @@ struct common_init_result common_init_from_params(common_params & params) {
943943
common_lora_adapters_apply(lctx, params.lora_adapters);
944944
}
945945

946-
if (params.sampling.ignore_eos && llama_token_eos(vocab) == LLAMA_TOKEN_NULL) {
946+
if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
947947
LOG_WRN("%s: warning: vocab does not have an EOS token, ignoring --ignore-eos\n", __func__);
948948
params.sampling.ignore_eos = false;
949949
}
950950

951951
if (params.sampling.ignore_eos) {
952952
for (llama_token i = 0; i < llama_vocab_n_vocab(vocab); i++) {
953-
if (llama_token_is_eog(vocab, i)) {
953+
if (llama_vocab_is_eog(vocab, i)) {
954954
LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY);
955955
params.sampling.logit_bias.push_back({i, -INFINITY});
956956
}
@@ -971,8 +971,8 @@ struct common_init_result common_init_from_params(common_params & params) {
971971
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
972972

973973
std::vector<llama_token> tmp;
974-
llama_token bos = llama_token_bos(vocab);
975-
llama_token eos = llama_token_eos(vocab);
974+
llama_token bos = llama_vocab_bos(vocab);
975+
llama_token eos = llama_vocab_eos(vocab);
976976

977977
// some models (e.g. T5) don't have a BOS token
978978
if (bos != LLAMA_TOKEN_NULL) {

common/speculative.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,13 @@ bool common_speculative_are_compatible(
9494
return false;
9595
}
9696

97-
if (llama_add_bos_token(vocab_tgt) != llama_add_bos_token(vocab_dft) ||
98-
llama_add_eos_token(vocab_tgt) != llama_add_eos_token(vocab_dft) ||
99-
llama_token_bos(vocab_tgt) != llama_token_bos(vocab_dft) ||
100-
llama_token_eos(vocab_tgt) != llama_token_eos(vocab_dft)) {
97+
if (llama_vocab_add_bos(vocab_tgt) != llama_vocab_add_bos(vocab_dft) ||
98+
llama_vocab_add_eos(vocab_tgt) != llama_vocab_add_eos(vocab_dft) ||
99+
llama_vocab_bos(vocab_tgt) != llama_vocab_bos(vocab_dft) ||
100+
llama_vocab_eos(vocab_tgt) != llama_vocab_eos(vocab_dft)) {
101101
LOG_ERR("%s: draft vocab special tokens must match target vocab to use speculation\n", __func__);
102-
LOG_ERR("%s: tgt: bos = %d (%d), eos = %d (%d)\n", __func__, llama_token_bos(vocab_tgt), llama_add_bos_token(vocab_tgt), llama_token_eos(vocab_tgt), llama_add_eos_token(vocab_tgt));
103-
LOG_ERR("%s: dft: bos = %d (%d), eos = %d (%d)\n", __func__, llama_token_bos(vocab_dft), llama_add_bos_token(vocab_dft), llama_token_eos(vocab_dft), llama_add_eos_token(vocab_dft));
102+
LOG_ERR("%s: tgt: bos = %d (%d), eos = %d (%d)\n", __func__, llama_vocab_bos(vocab_tgt), llama_vocab_add_bos(vocab_tgt), llama_vocab_eos(vocab_tgt), llama_vocab_add_eos(vocab_tgt));
103+
LOG_ERR("%s: dft: bos = %d (%d), eos = %d (%d)\n", __func__, llama_vocab_bos(vocab_dft), llama_vocab_add_bos(vocab_dft), llama_vocab_eos(vocab_dft), llama_vocab_add_eos(vocab_dft));
104104
return false;
105105
}
106106

@@ -118,8 +118,8 @@ bool common_speculative_are_compatible(
118118
}
119119

120120
for (int i = SPEC_VOCAB_CHECK_START_TOKEN_ID; i < std::min(n_vocab_tgt, n_vocab_dft); ++i) {
121-
const char * token_text_tgt = llama_token_get_text(vocab_tgt, i);
122-
const char * token_text_dft = llama_token_get_text(vocab_dft, i);
121+
const char * token_text_tgt = llama_vocab_get_text(vocab_tgt, i);
122+
const char * token_text_dft = llama_vocab_get_text(vocab_dft, i);
123123
if (std::strcmp(token_text_tgt, token_text_dft) != 0) {
124124
LOG_ERR("%s: draft vocab vocab must match target vocab to use speculation but "
125125
"token %d content differs - target '%s', draft '%s'\n", __func__, i,

examples/batched.swift/Sources/main.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ while n_cur <= n_len {
141141
let new_token_id = llama_sampler_sample(smpl, context, i_batch[i])
142142

143143
// is it an end of stream? -> mark the stream as finished
144-
if llama_token_is_eog(model, new_token_id) || n_cur == n_len {
144+
if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len {
145145
i_batch[i] = -1
146146
// print("")
147147
if n_parallel > 1 {

examples/batched/batched.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ int main(int argc, char ** argv) {
123123

124124
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
125125
if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
126-
decoder_start_token_id = llama_token_bos(vocab);
126+
decoder_start_token_id = llama_vocab_bos(vocab);
127127
}
128128

129129
common_batch_clear(batch);
@@ -176,7 +176,7 @@ int main(int argc, char ** argv) {
176176
const llama_token new_token_id = llama_sampler_sample(smpl, ctx, i_batch[i]);
177177

178178
// is it an end of generation? -> mark the stream as finished
179-
if (llama_token_is_eog(vocab, new_token_id) || n_cur == n_predict) {
179+
if (llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_predict) {
180180
i_batch[i] = -1;
181181
LOG("\n");
182182
if (n_parallel > 1) {

examples/cvector-generator/cvector-generator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ struct tokenized_prompt {
275275
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
276276
const llama_model * model = llama_get_model(ctx);
277277
const llama_vocab * vocab = llama_model_get_vocab(model);
278-
const bool add_bos = llama_add_bos_token(vocab);
278+
const bool add_bos = llama_vocab_add_bos(vocab);
279279
tokens_pos = common_tokenize(ctx, pos, add_bos, true);
280280
tokens_neg = common_tokenize(ctx, neg, add_bos, true);
281281
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());

examples/embedding/embedding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ int main(int argc, char ** argv) {
150150
// check if the last token is SEP
151151
// it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true'
152152
for (auto & inp : inputs) {
153-
if (inp.empty() || inp.back() != llama_token_sep(vocab)) {
153+
if (inp.empty() || inp.back() != llama_vocab_sep(vocab)) {
154154
LOG_WRN("%s: last token in the prompt is not SEP\n", __func__);
155155
LOG_WRN("%s: 'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__);
156156
}

examples/eval-callback/eval-callback.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ static bool run(llama_context * ctx, const common_params & params) {
130130
const llama_model * model = llama_get_model(ctx);
131131
const llama_vocab * vocab = llama_model_get_vocab(model);
132132

133-
const bool add_bos = llama_add_bos_token(vocab);
133+
const bool add_bos = llama_vocab_add_bos(vocab);
134134

135135
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, add_bos);
136136

examples/gritlm/gritlm.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
2626

2727
// GritLM seems to have EOS = ""
2828
// https://github.com/ContextualAI/gritlm/blob/92025b16534712b31b3c4aaaf069350e222bd5f8/gritlm/gritlm.py#L18
29-
// inputs.push_back(llama_token_eos(vocab));
29+
// inputs.push_back(llama_vocab_eos(vocab));
3030

3131
// we want to ignore instruction tokens for mean pooling
3232
const int32_t n_inst = common_tokenize(vocab, instruction, true, false).size();
@@ -100,7 +100,7 @@ static std::string generate(llama_context * ctx, llama_sampler * smpl, const std
100100
const llama_model * model = llama_get_model(ctx);
101101
const llama_vocab * vocab = llama_model_get_vocab(model);
102102

103-
llama_token eos_token = llama_token_eos(vocab);
103+
llama_token eos_token = llama_vocab_eos(vocab);
104104

105105
llama_kv_cache_clear(ctx);
106106
llama_set_embeddings(ctx, false);

examples/imatrix/imatrix.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,10 +431,10 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
431431
const llama_model * model = llama_get_model(ctx);
432432
const llama_vocab * vocab = llama_model_get_vocab(model);
433433

434-
const bool add_bos = llama_add_bos_token(vocab);
434+
const bool add_bos = llama_vocab_add_bos(vocab);
435435
const int n_ctx = llama_n_ctx(ctx);
436436

437-
GGML_ASSERT(!llama_add_eos_token(vocab));
437+
GGML_ASSERT(!llama_vocab_add_eos(vocab));
438438

439439
auto tim1 = std::chrono::high_resolution_clock::now();
440440
LOG_INF("%s: tokenizing the input ..\n", __func__);
@@ -510,7 +510,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
510510

511511
// add BOS token for the first batch of each chunk
512512
if (add_bos && j == 0) {
513-
tokens[batch_start] = llama_token_bos(vocab);
513+
tokens[batch_start] = llama_vocab_bos(vocab);
514514
}
515515

516516
common_batch_clear(batch);

examples/infill/infill.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -154,28 +154,28 @@ int main(int argc, char ** argv) {
154154
LOG_INF("\n");
155155
LOG_INF("%s\n", common_params_get_system_info(params).c_str());
156156
}
157-
const bool add_bos = llama_add_bos_token(vocab);
158-
GGML_ASSERT(!llama_add_eos_token(vocab));
157+
const bool add_bos = llama_vocab_add_bos(vocab);
158+
GGML_ASSERT(!llama_vocab_add_eos(vocab));
159159

160160
std::vector<llama_token> embd_inp;
161161
std::vector<llama_token> embd_end;
162162
std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
163163
std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
164164

165-
GGML_ASSERT(llama_token_fim_pre(vocab) >= 0);
166-
GGML_ASSERT(llama_token_fim_suf(vocab) >= 0);
165+
GGML_ASSERT(llama_vocab_fim_pre(vocab) >= 0);
166+
GGML_ASSERT(llama_vocab_fim_suf(vocab) >= 0);
167167

168-
inp_pfx.insert(inp_pfx.begin(), llama_token_fim_pre(vocab));
169-
inp_sfx.insert(inp_sfx.begin(), llama_token_fim_suf(vocab));
168+
inp_pfx.insert(inp_pfx.begin(), llama_vocab_fim_pre(vocab));
169+
inp_sfx.insert(inp_sfx.begin(), llama_vocab_fim_suf(vocab));
170170

171171
embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
172172
embd_end = params.spm_infill ? inp_pfx : inp_sfx;
173173
if (add_bos) {
174-
embd_inp.insert(embd_inp.begin(), llama_token_bos(vocab));
174+
embd_inp.insert(embd_inp.begin(), llama_vocab_bos(vocab));
175175
}
176176
embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
177177

178-
const llama_token middle_token = llama_token_fim_mid(vocab);
178+
const llama_token middle_token = llama_vocab_fim_mid(vocab);
179179
if (middle_token >= 0) {
180180
embd_inp.push_back(middle_token);
181181
}
@@ -187,7 +187,7 @@ int main(int argc, char ** argv) {
187187

188188
// Should not run without any tokens
189189
if (embd_inp.empty()) {
190-
embd_inp.push_back(llama_token_bos(vocab));
190+
embd_inp.push_back(llama_vocab_bos(vocab));
191191
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
192192
}
193193

@@ -422,10 +422,10 @@ int main(int argc, char ** argv) {
422422
// if not currently processing queued inputs;
423423
if ((int) embd_inp.size() <= n_consumed) {
424424
// deal with eot token in infill mode
425-
if ((common_sampler_last(smpl) == llama_token_eot(vocab) || is_interacting) && params.interactive){
425+
if ((common_sampler_last(smpl) == llama_vocab_eot(vocab) || is_interacting) && params.interactive){
426426
if (is_interacting && !params.interactive_first) {
427427
// print an eot token
428-
LOG("%s", common_token_to_piece(ctx, llama_token_eot(vocab)).c_str());
428+
LOG("%s", common_token_to_piece(ctx, llama_vocab_eot(vocab)).c_str());
429429
}
430430
LOG("\n");
431431
console::set_display(console::user_input);
@@ -465,13 +465,13 @@ int main(int argc, char ** argv) {
465465
std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
466466
std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
467467

468-
inp_pfx.insert(inp_pfx.begin(), llama_token_fim_pre(vocab));
469-
inp_sfx.insert(inp_sfx.begin(), llama_token_fim_suf(vocab));
468+
inp_pfx.insert(inp_pfx.begin(), llama_vocab_fim_pre(vocab));
469+
inp_sfx.insert(inp_sfx.begin(), llama_vocab_fim_suf(vocab));
470470

471471
embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
472472
embd_end = params.spm_infill ? inp_pfx : inp_sfx;
473473
if (add_bos) {
474-
embd_inp.insert(embd_inp.begin(), llama_token_bos(vocab));
474+
embd_inp.insert(embd_inp.begin(), llama_vocab_bos(vocab));
475475
}
476476
embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
477477

@@ -486,7 +486,7 @@ int main(int argc, char ** argv) {
486486
is_interacting = false;
487487
}
488488
// deal with end of generation tokens in interactive mode
489-
else if (llama_token_is_eog(vocab, common_sampler_last(smpl))) {
489+
else if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
490490
LOG_DBG("found EOS token\n");
491491

492492
if (params.interactive) {
@@ -502,7 +502,7 @@ int main(int argc, char ** argv) {
502502

503503
if (params.input_prefix_bos) {
504504
LOG_DBG("adding input prefix BOS token\n");
505-
embd_inp.push_back(llama_token_bos(vocab));
505+
embd_inp.push_back(llama_vocab_bos(vocab));
506506
}
507507

508508
std::string buffer;
@@ -565,7 +565,7 @@ int main(int argc, char ** argv) {
565565
}
566566

567567
// end of generation
568-
if (!embd.empty() && llama_token_is_eog(vocab, embd.back()) && !params.interactive) {
568+
if (!embd.empty() && llama_vocab_is_eog(vocab, embd.back()) && !params.interactive) {
569569
break;
570570
}
571571

@@ -577,7 +577,7 @@ int main(int argc, char ** argv) {
577577
}
578578
}
579579
if (!params.interactive && n_remain <= 0) {
580-
LOG("%s", common_token_to_piece(ctx, llama_token_eot(vocab)).c_str());
580+
LOG("%s", common_token_to_piece(ctx, llama_vocab_eot(vocab)).c_str());
581581
}
582582

583583
LOG("\n");

0 commit comments

Comments
 (0)