Skip to content

Commit 9d6f9df

Browse files
committed
llama : vocab
ggml-ci
1 parent be9a25f commit 9d6f9df

File tree

6 files changed

+355
-359
lines changed

6 files changed

+355
-359
lines changed

src/llama-grammar.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,7 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
10941094
const llama_token id = cur_p->data[i].id;
10951095
const std::string & piece = grammar.vocab->cache_token_to_piece.at(id);
10961096

1097-
if (llama_token_is_eog_impl(*grammar.vocab, id)) {
1097+
if (grammar.vocab->token_is_eog(id)) {
10981098
if (!allow_eog) {
10991099
cur_p->data[i].logit = -INFINITY;
11001100
}
@@ -1115,7 +1115,7 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
11151115
void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
11161116
GGML_ASSERT(grammar.vocab != nullptr);
11171117

1118-
if (llama_token_is_eog_impl(*grammar.vocab, token)) {
1118+
if (grammar.vocab->token_is_eog(token)) {
11191119
for (const auto & stack : grammar.stacks) {
11201120
if (stack.empty()) {
11211121
return;

src/llama-model.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,19 +1586,19 @@ void llama_model::load_vocab(llama_model_loader & ml) {
15861586
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
15871587
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
15881588
try {
1589-
vocab.linefeed_id = llama_byte_to_token_impl(vocab, '\n');
1589+
vocab.linefeed_id = vocab.byte_to_token('\n');
15901590
} catch (const std::exception & e) {
15911591
LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what());
15921592
vocab.linefeed_id = vocab.special_pad_id;
15931593
}
15941594
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
15951595
vocab.linefeed_id = vocab.special_pad_id;
15961596
} else if (vocab.type == LLAMA_VOCAB_TYPE_RWKV) {
1597-
const std::vector<int> ids = llama_tokenize_internal(vocab, "\n", false);
1597+
const std::vector<int> ids = vocab.tokenize("\n", false);
15981598
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
15991599
vocab.linefeed_id = ids[0];
16001600
} else {
1601-
const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
1601+
const std::vector<int> ids = vocab.tokenize("\xC4\x8A", false); // U+010A
16021602

16031603
//GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
16041604
if (ids.empty()) {
@@ -4190,7 +4190,7 @@ void llama_model::print_info() const {
41904190

41914191
// hparams
41924192
LLAMA_LOG_INFO("%s: arch = %s\n", __func__, arch_name().c_str());
4193-
LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, llama_model_vocab_type_name(vocab.type));
4193+
LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, vocab.type_name().c_str());
41944194
LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, hparams.n_vocab);
41954195
LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (int) vocab.bpe_ranks.size());
41964196
LLAMA_LOG_INFO("%s: vocab_only = %d\n", __func__, hparams.vocab_only);

src/llama-sampling.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,7 +1664,7 @@ struct llama_sampler_dry {
16641664
// Ported from Koboldcpp, original PR: https://github.com/LostRuins/koboldcpp/pull/982 (Original author: pi6am)
16651665
static void get_overlapping_token_sequences(const llama_vocab & vocab, const std::string& str, std::unordered_multimap<llama_token, std::vector<llama_token>>& token_sequences, int max_tail_len = -1) {
16661666
for (llama_token token_id = 0; token_id < (llama_token)vocab.n_vocab; token_id++) {
1667-
std::string word = llama_detokenize(vocab, {token_id}, true);
1667+
std::string word = vocab.detokenize({token_id}, true);
16681668
if (word.find(str) != std::string::npos) {
16691669
token_sequences.emplace(token_id, std::vector<llama_token>());
16701670
} else {
@@ -1681,7 +1681,7 @@ static void get_overlapping_token_sequences(const llama_vocab & vocab, const std
16811681
}
16821682
}
16831683
if (match) {
1684-
std::vector<llama_token> tokenization = llama_tokenize_internal(vocab, str.substr(i), false, false);
1684+
std::vector<llama_token> tokenization = vocab.tokenize(str.substr(i), false, false);
16851685
if (max_tail_len >= 0 && tokenization.size() > (size_t)max_tail_len) {
16861686
tokenization.resize(max_tail_len);
16871687
}
@@ -2153,7 +2153,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
21532153
float p_eog_sum = 0.0f;
21542154

21552155
for (size_t i = 0; i < cur_p->size; ++i) {
2156-
if (llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id)) {
2156+
if (ctx->vocab->token_is_eog(cur_p->data[i].id)) {
21572157
p_eog_sum += cur_p->data[i].p;
21582158
} else {
21592159
p_txt_sum += cur_p->data[i].p;
@@ -2175,7 +2175,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
21752175
float p_sum = 0.0f;
21762176

21772177
for (size_t i = 0; i < size_org; ++i) {
2178-
if (llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id)) {
2178+
if (ctx->vocab->token_is_eog(cur_p->data[i].id)) {
21792179
p_sum += cur_p->data[i].p;
21802180

21812181
cur_p->data[cur_p->size++] = cur_p->data[i];
@@ -2203,17 +2203,17 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22032203
continue;
22042204
}
22052205

2206-
int len0 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
2206+
int len0 = ctx->vocab->token_to_piece(cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
22072207
if (len0 < 0) {
22082208
ctx->buf0.resize(len0);
2209-
len0 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
2209+
len0 = ctx->vocab->token_to_piece(cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
22102210
assert(len0 > 0);
22112211
}
22122212

2213-
int len1 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
2213+
int len1 = ctx->vocab->token_to_piece(cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
22142214
if (len1 < 0) {
22152215
ctx->buf1.resize(len1);
2216-
len1 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
2216+
len1 = ctx->vocab->token_to_piece(cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
22172217
assert(len1 > 0);
22182218
}
22192219

@@ -2248,7 +2248,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22482248
LOG_DBG_CUR("%s: n_combined = %zu, applying thold = %.3f\n", __func__, n_combined, thold);
22492249

22502250
for (size_t i = 0; i < size_org; ++i) {
2251-
const bool is_eog = llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id);
2251+
const bool is_eog = ctx->vocab->token_is_eog(cur_p->data[i].id);
22522252

22532253
if (cur_p->data[i].p < thold && !is_eog) {
22542254
continue;
@@ -2269,7 +2269,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22692269
// if no non-EOG tokens are left -> reduce cur_p to single EOT token
22702270
if (n_non_eog == 0) {
22712271
cur_p->size = 1;
2272-
cur_p->data[0].id = llama_token_eot_impl(*ctx->vocab);
2272+
cur_p->data[0].id = ctx->vocab->token_eot();
22732273
cur_p->data[0].logit = 1.0f;
22742274

22752275
return;
@@ -2291,7 +2291,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
22912291
LOG_DBG_CUR("%s: applying thold = %.3f\n", __func__, thold);
22922292

22932293
for (size_t i = 0; i < size_org; ++i) {
2294-
const bool is_eog = llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id);
2294+
const bool is_eog = ctx->vocab->token_is_eog(cur_p->data[i].id);
22952295

22962296
if (cur_p->data[i].p < thold && !is_eog) {
22972297
continue;

0 commit comments

Comments
 (0)