Skip to content

Commit 84337c2

Browse files
committed
common : remove duplicate function llama_should_add_bos_token (#8778)
Author: Zhenwei Jin
1 parent 2a2b605 commit 84337c2

File tree

14 files changed

+26
-40
lines changed

14 files changed

+26
-40
lines changed

common/common.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2981,12 +2981,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
29812981
return text;
29822982
}
29832983

2984-
bool llama_should_add_bos_token(const llama_model * model) {
2985-
const int add_bos = llama_add_bos_token(model);
2986-
2987-
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
2988-
}
2989-
29902984
//
29912985
// Chat template utils
29922986
//

common/common.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,10 +408,6 @@ std::string llama_detokenize(
408408
const std::vector<llama_token> & tokens,
409409
bool special = true);
410410

411-
// Uses the value from the model metadata if possible, otherwise
412-
// defaults to true when model type is SPM, otherwise false.
413-
bool llama_should_add_bos_token(const llama_model * model);
414-
415411
//
416412
// Chat template utils
417413
//

examples/cvector-generator/cvector-generator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ struct tokenized_prompt {
271271
size_t max_seq_len;
272272

273273
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
274-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
274+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
275275
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
276276
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
277277
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());

examples/eval-callback/eval-callback.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
127127
}
128128

129129
static bool run(llama_context * ctx, const gpt_params & params) {
130-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
130+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
131131

132132
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);
133133

examples/imatrix/imatrix.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -637,8 +637,8 @@ static void process_logits(
637637
}
638638

639639
static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
640-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
641-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
640+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
641+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
642642
const int n_ctx = llama_n_ctx(ctx);
643643

644644
auto tim1 = std::chrono::high_resolution_clock::now();

examples/infill/infill.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ int main(int argc, char ** argv) {
203203
LOG_TEE("\n");
204204
LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
205205
}
206-
const bool add_bos = llama_should_add_bos_token(model);
207-
GGML_ASSERT(llama_add_eos_token(model) != 1);
206+
const bool add_bos = llama_add_bos_token(model);
207+
GGML_ASSERT(!llama_add_eos_token(model));
208208
LOG("add_bos: %d\n", add_bos);
209209

210210
std::vector<llama_token> embd_inp;

examples/main/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,9 @@ int main(int argc, char ** argv) {
267267
}
268268
}
269269

270-
const bool add_bos = llama_should_add_bos_token(model);
270+
const bool add_bos = llama_add_bos_token(model);
271271
if (!llama_model_has_encoder(model)) {
272-
GGML_ASSERT(llama_add_eos_token(model) != 1);
272+
GGML_ASSERT(!llama_add_eos_token(model));
273273
}
274274
LOG("add_bos: %d\n", add_bos);
275275

examples/perplexity/perplexity.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
347347
// Output: `perplexity: 13.5106 [114/114]`
348348
// BOS tokens will be added for each chunk before eval
349349

350-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
351-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
350+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
351+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
352352

353353
fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
354354

@@ -487,8 +487,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
487487
// Output: `perplexity: 13.5106 [114/114]`
488488
// BOS tokens will be added for each chunk before eval
489489

490-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
491-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
490+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
491+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
492492

493493
std::ofstream logits_stream;
494494
if (!params.logits_file.empty()) {
@@ -1740,8 +1740,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
17401740
const int n_batch = params.n_batch;
17411741
const int num_batches = (n_ctx + n_batch - 1)/n_batch;
17421742
const int nv = 2*((n_vocab + 1)/2) + 4;
1743-
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
1744-
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
1743+
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
1744+
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
17451745

17461746
std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
17471747
std::vector<float> kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);

examples/server/server.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -858,9 +858,8 @@ struct server_context {
858858

859859
n_ctx = llama_n_ctx(ctx);
860860

861-
add_bos_token = llama_should_add_bos_token(model);
862-
has_eos_token = llama_add_eos_token(model) != 1;
863-
861+
add_bos_token = llama_add_bos_token(model);
862+
has_eos_token = !llama_add_eos_token(model);
864863
return true;
865864
}
866865

@@ -2287,7 +2286,7 @@ struct server_context {
22872286
slot.t_start_generation = 0;
22882287

22892288
if (slot.infill) {
2290-
const bool add_bos = llama_should_add_bos_token(model);
2289+
const bool add_bos = llama_add_bos_token(model);
22912290
bool suff_rm_leading_spc = true;
22922291
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
22932292
params.input_suffix.erase(0, 1);

examples/tokenize/tokenize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
362362
prompt = stdin_buffer.str();
363363
}
364364

365-
const bool model_wants_add_bos = llama_should_add_bos_token(model);
365+
const bool model_wants_add_bos = llama_add_bos_token(model);
366366
const bool add_bos = model_wants_add_bos && !no_bos;
367367
const bool parse_special = !no_parse_special;
368368

0 commit comments

Comments
 (0)