use public model APIs, not vocab

mmoskal · mmoskal · commit 7d2b8187fbab · 2024-11-07T10:17:19.000-08:00
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
@@ -2347,7 +2347,7 @@ void llama_perf_sampler_reset(struct llama_sampler * chain) {
 #include "llguidance.h"
 
 struct llama_sampler_llg {
-    const struct llama_vocab * vocab;
+    const struct llama_model * model;
     std::string grammar_kind;
     std::string grammar_data;
     LlgConstraint *grammar;
@@ -2364,6 +2364,7 @@ static LlgConstraint *llama_sampler_llg_new(const char * grammar_kind, const cha
         llg_free_constraint(c);
         return nullptr;
     }
+    return c;
 }
 
 static const char * llama_sampler_llg_name(const struct llama_sampler * /*smpl*/) {
@@ -2394,7 +2395,7 @@ static void llama_sampler_llg_apply(struct llama_sampler * smpl, llama_token_dat
         if (ctx->has_llg_res) {
             if (ctx->llg_res.is_stop) {
                 for (size_t i = 0; i < cur_p->size; ++i) {
-                    if (!llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id)) {
+                    if (!llama_token_is_eog(ctx->model, cur_p->data[i].id)) {
                         cur_p->data[i].logit = -INFINITY;
                     }
                 }
@@ -2426,7 +2427,7 @@ static void llama_sampler_llg_reset(struct llama_sampler * smpl) {
 static struct llama_sampler * llama_sampler_llg_clone(const struct llama_sampler * smpl) {
     const auto * ctx = (const llama_sampler_llg *) smpl->ctx;
 
-    auto * result = llama_sampler_init_llg_impl(*ctx->vocab, nullptr, nullptr);
+    auto * result = llama_sampler_init_llg(ctx->model, nullptr, nullptr);
 
     // copy the state
     {
@@ -2461,15 +2462,13 @@ static struct llama_sampler_i llama_sampler_llg_i = {
     /* .free   = */ llama_sampler_llg_free,
 };
 
-struct llama_sampler * llama_sampler_init_llg_impl(const struct llama_vocab & vocab, const char * grammar_kind, const char * grammar_data) {
+struct llama_sampler * llama_sampler_init_llg(const struct llama_model * model, 
+        const char * grammar_kind, const char * grammar_data) {
     auto * ctx = new llama_sampler_llg;
 
     if (grammar_kind != nullptr && grammar_kind[0] != '\0') {
-        auto d = vocab.id_to_token[94776].text;
-        LLAMA_LOG_INFO("llg: %s %d\n", d.c_str(), d.size());
-
         *ctx = {
-            /* .vocab        = */ &vocab,
+            /* .model        = */ model,
             /* .grammar_kind = */ grammar_kind,
             /* .grammar_data = */ grammar_data,
             /* .grammar      = */ llama_sampler_llg_new(grammar_kind, grammar_data),
@@ -2478,7 +2477,7 @@ struct llama_sampler * llama_sampler_init_llg_impl(const struct llama_vocab & vo
         };
     } else {
         *ctx = {
-            /* .vocab        = */ &vocab,
+            /* .model        = */ model,
             /* .grammar_kind = */ {},
             /* .grammar_data = */ {},
             /* .grammar      = */ nullptr,
diff --git a/src/llama-sampling.h b/src/llama-sampling.h
@@ -25,10 +25,6 @@ struct llama_sampler * llama_sampler_init_grammar_impl(
         const struct llama_vocab & vocab,
                       const char * grammar_str,
                       const char * grammar_root);
-struct llama_sampler * llama_sampler_init_llg_impl(
-        const struct llama_vocab & vocab, 
-                      const char * grammar_type, 
-                      const char * grammar_data);
 
 
 struct llama_sampler * llama_sampler_init_infill_impl(
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -21866,12 +21866,6 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_model * mod
     return llama_sampler_init_grammar_impl(model->vocab, grammar_str, grammar_root);
 }
 
-#ifdef GGML_LLGUIDANCE
-struct llama_sampler * llama_sampler_init_llg(const struct llama_model * model, const char * grammar_type, const char * grammar_data) {
-    return llama_sampler_init_llg_impl(model->vocab, grammar_type, grammar_data);
-}
-#endif
-
 struct llama_sampler * llama_sampler_init_infill(const struct llama_model * model) {
     return llama_sampler_init_infill_impl(model->vocab);
 }

Original file line number	Diff line number	Diff line change
`@@ -21866,12 +21866,6 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_model * mod`
`21866`	`21866`	`return llama_sampler_init_grammar_impl(model->vocab, grammar_str, grammar_root);`
`21867`	`21867`	`}`
`21868`	`21868`
`21869`		`-#ifdef GGML_LLGUIDANCE`
`21870`		`-struct llama_sampler * llama_sampler_init_llg(const struct llama_model * model, const char * grammar_type, const char * grammar_data) {`
`21871`		`- return llama_sampler_init_llg_impl(model->vocab, grammar_type, grammar_data);`
`21872`		`-}`
`21873`		`-#endif`
`21874`		`-`
`21875`	`21869`	`struct llama_sampler * llama_sampler_init_infill(const struct llama_model * model) {`
`21876`	`21870`	`return llama_sampler_init_infill_impl(model->vocab);`
`21877`	`21871`	`}`