Skip to content

Commit cbea4ba

Browse files
committed
vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174)
ggml-ci
1 parent 6540935 commit cbea4ba

File tree

19 files changed

+58
-59
lines changed

19 files changed

+58
-59
lines changed

common/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ struct common_init_result common_init_from_params(common_params & params) {
950950
}
951951

952952
if (params.sampling.ignore_eos) {
953-
for (llama_token i = 0; i < llama_vocab_n_vocab(vocab); i++) {
953+
for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
954954
if (llama_vocab_is_eog(vocab, i)) {
955955
LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY);
956956
params.sampling.logit_bias.push_back({i, -INFINITY});

common/sampling.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ struct common_sampler {
116116
const llama_model * model = llama_get_model(ctx);
117117
const llama_vocab * vocab = llama_model_get_vocab(model);
118118

119-
const int n_vocab = llama_vocab_n_vocab(vocab);
119+
const int n_vocab = llama_vocab_n_tokens(vocab);
120120

121121
cur.resize(n_vocab);
122122

@@ -162,7 +162,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
162162

163163
llama_sampler_chain_add(result->chain,
164164
llama_sampler_init_logit_bias(
165-
llama_vocab_n_vocab(vocab),
165+
llama_vocab_n_tokens(vocab),
166166
params.logit_bias.size(),
167167
params.logit_bias.data()));
168168

@@ -211,7 +211,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
211211
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
212212
} else if (params.mirostat == 1) {
213213
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
214-
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_vocab(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
214+
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
215215
} else if (params.mirostat == 2) {
216216
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
217217
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));

common/speculative.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,15 @@ bool common_speculative_are_compatible(
105105
}
106106

107107
{
108-
const int n_vocab_tgt = llama_vocab_n_vocab(vocab_tgt);
109-
const int n_vocab_dft = llama_vocab_n_vocab(vocab_dft);
108+
const int n_vocab_tgt = llama_vocab_n_tokens(vocab_tgt);
109+
const int n_vocab_dft = llama_vocab_n_tokens(vocab_dft);
110110

111111
const int vocab_diff = std::abs(n_vocab_tgt - n_vocab_dft);
112112

113113
if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {
114114
LOG_ERR("%s: draft model vocab must closely match target model to use speculation but "
115115
"target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",
116-
__func__, n_vocab_tgt, llama_vocab_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
116+
__func__, n_vocab_tgt, llama_vocab_n_tokens(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
117117
return false;
118118
}
119119

examples/imatrix/imatrix.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
470470
const int n_chunk_max = tokens.size() / n_ctx;
471471

472472
const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
473-
const int n_vocab = llama_vocab_n_vocab(vocab);
473+
const int n_vocab = llama_vocab_n_tokens(vocab);
474474
const int n_batch = params.n_batch;
475475

476476
int count = 0;

examples/llama-bench/llama-bench.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,7 +1402,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_batch, int n_th
14021402

14031403
const llama_model * model = llama_get_model(ctx);
14041404
const llama_vocab * vocab = llama_model_get_vocab(model);
1405-
const int32_t n_vocab = llama_vocab_n_vocab(vocab);
1405+
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
14061406

14071407
std::vector<llama_token> tokens(n_batch);
14081408

@@ -1426,7 +1426,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_threads) {
14261426

14271427
const llama_model * model = llama_get_model(ctx);
14281428
const llama_vocab * vocab = llama_model_get_vocab(model);
1429-
const int32_t n_vocab = llama_vocab_n_vocab(vocab);
1429+
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
14301430

14311431
llama_token token = llama_vocab_get_add_bos(vocab) ? llama_vocab_bos(vocab) : std::rand() % n_vocab;
14321432

examples/lookahead/lookahead.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ int main(int argc, char ** argv) {
149149
}
150150

151151
// here we keep adding new n-grams as we go
152-
ngram_container ngrams_observed(llama_vocab_n_vocab(vocab), N, G);
152+
ngram_container ngrams_observed(llama_vocab_n_tokens(vocab), N, G);
153153

154154
// debug
155155
struct llama_kv_cache_view kvc_view = llama_kv_cache_view_init(ctx, W + G + 1);

examples/perplexity/perplexity.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const common_params
341341
const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
342342
const int n_batch = params.n_batch;
343343

344-
const int n_vocab = llama_vocab_n_vocab(vocab);
344+
const int n_vocab = llama_vocab_n_tokens(vocab);
345345

346346
int count = 0;
347347
double nll = 0.0;
@@ -491,7 +491,7 @@ static results_perplexity perplexity(llama_context * ctx, const common_params &
491491
const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
492492
const int n_batch = params.n_batch;
493493

494-
const int n_vocab = llama_vocab_n_vocab(vocab);
494+
const int n_vocab = llama_vocab_n_tokens(vocab);
495495

496496
int count = 0;
497497
double nll = 0.0;
@@ -857,7 +857,7 @@ static void hellaswag_score(llama_context * ctx, const common_params & params) {
857857
const int n_ctx = llama_n_ctx(ctx);
858858
const int n_batch = params.n_batch;
859859

860-
const int n_vocab = llama_vocab_n_vocab(vocab);
860+
const int n_vocab = llama_vocab_n_tokens(vocab);
861861

862862
const int max_tasks_per_batch = 32;
863863
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
@@ -1141,7 +1141,7 @@ static void winogrande_score(llama_context * ctx, const common_params & params)
11411141
const int n_ctx = llama_n_ctx(ctx);
11421142
const int n_batch = params.n_batch;
11431143

1144-
const int n_vocab = llama_vocab_n_vocab(vocab);
1144+
const int n_vocab = llama_vocab_n_tokens(vocab);
11451145

11461146
const int max_tasks_per_batch = 128;
11471147
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
@@ -1495,7 +1495,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par
14951495
const int n_ctx = llama_n_ctx(ctx);
14961496
const int n_batch = params.n_batch;
14971497

1498-
const int n_vocab = llama_vocab_n_vocab(vocab);
1498+
const int n_vocab = llama_vocab_n_tokens(vocab);
14991499

15001500
const int max_tasks_per_batch = 32;
15011501
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
@@ -1704,8 +1704,8 @@ static void kl_divergence(llama_context * ctx, const common_params & params) {
17041704
LOG_ERR("%s: failed reading n_vocab, n_chunk from %s\n", __func__, params.logits_file.c_str());
17051705
return;
17061706
}
1707-
if (n_vocab != llama_vocab_n_vocab(vocab)) {
1708-
LOG_ERR("%s: inconsistent vocabulary (%d vs %d)\n", __func__, n_vocab, llama_vocab_n_vocab(vocab));
1707+
if (n_vocab != llama_vocab_n_tokens(vocab)) {
1708+
LOG_ERR("%s: inconsistent vocabulary (%d vs %d)\n", __func__, n_vocab, llama_vocab_n_tokens(vocab));
17091709
}
17101710

17111711
std::vector<llama_token> tokens(size_t(n_ctx) * n_chunk);

examples/server/server.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ struct server_task {
331331

332332
const auto & logit_bias = data.find("logit_bias");
333333
if (logit_bias != data.end() && logit_bias->is_array()) {
334-
const int n_vocab = llama_vocab_n_vocab(vocab);
334+
const int n_vocab = llama_vocab_n_tokens(vocab);
335335
for (const auto & el : *logit_bias) {
336336
// TODO: we may want to throw errors here, in case "el" is incorrect
337337
if (el.is_array() && el.size() == 2) {
@@ -2081,7 +2081,7 @@ struct server_context {
20812081

20822082
void populate_token_probs(const server_slot & slot, completion_token_output & result, bool post_sampling, bool special, int idx) {
20832083
size_t n_probs = slot.params.sampling.n_probs;
2084-
size_t n_vocab = llama_vocab_n_vocab(vocab);
2084+
size_t n_vocab = llama_vocab_n_tokens(vocab);
20852085
if (post_sampling) {
20862086
const auto * cur_p = common_sampler_get_candidates(slot.smpl);
20872087
const size_t max_probs = cur_p->size;
@@ -3137,7 +3137,7 @@ struct server_context {
31373137
json model_meta() const {
31383138
return json {
31393139
{"vocab_type", llama_vocab_type (vocab)},
3140-
{"n_vocab", llama_vocab_n_vocab (vocab)},
3140+
{"n_vocab", llama_vocab_n_tokens (vocab)},
31413141
{"n_ctx_train", llama_model_n_ctx_train(model)},
31423142
{"n_embd", llama_model_n_embd (model)},
31433143
{"n_params", llama_model_n_params (model)},

examples/server/utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ static std::vector<llama_token_data> get_token_probabilities(llama_context * ctx
776776
const llama_model * model = llama_get_model(ctx);
777777
const llama_vocab * vocab = llama_model_get_vocab(model);
778778

779-
const int n_vocab = llama_vocab_n_vocab(vocab);
779+
const int n_vocab = llama_vocab_n_tokens(vocab);
780780

781781
cur.resize(n_vocab);
782782
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {

examples/speculative/speculative.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,16 @@ int main(int argc, char ** argv) {
116116
}
117117

118118
{
119-
const int n_vocab_tgt = llama_vocab_n_vocab(vocab_tgt);
120-
const int n_vocab_dft = llama_vocab_n_vocab(vocab_dft);
119+
const int n_vocab_tgt = llama_vocab_n_tokens(vocab_tgt);
120+
const int n_vocab_dft = llama_vocab_n_tokens(vocab_dft);
121121
const int vocab_diff = n_vocab_tgt > n_vocab_dft
122122
? n_vocab_tgt - n_vocab_dft
123123
: n_vocab_dft - n_vocab_tgt;
124124

125125
if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {
126126
LOG_ERR("%s: draft model vocab must closely match target model to use speculation but ", __func__);
127127
LOG_ERR("target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",
128-
n_vocab_tgt, llama_vocab_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
128+
n_vocab_tgt, llama_vocab_n_tokens(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
129129
return 1;
130130
}
131131

@@ -173,7 +173,7 @@ int main(int argc, char ** argv) {
173173
const auto t_enc_end = ggml_time_us();
174174

175175
// the 2 models should have the same vocab
176-
//GGML_ASSERT(n_vocab == llama_vocab_n_vocab(model_dft));
176+
//GGML_ASSERT(n_vocab == llama_vocab_n_tokens(model_dft));
177177

178178
// how many tokens to draft each time
179179
int n_draft = params.speculative.n_max;

0 commit comments

Comments
 (0)