use 'llg:' grammar prefix as marker

mmoskal · mmoskal · commit 9d474f2b6ff5 · 2024-11-07T10:07:27.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -133,3 +133,4 @@ poetry.toml
 
 # Test models for lora adapters
 /lora-tests
+include/llguidance.h
diff --git a/common/sampling.cpp b/common/sampling.cpp
@@ -146,9 +146,26 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
 
     lparams.no_perf = params.no_perf;
 
+    struct llama_sampler * grmr;
+    if (params.grammar.compare(0, 4, "llg:") == 0) {
+#ifdef GGML_LLGUIDANCE
+        auto gp = params.grammar.find(':', 4);
+        if (gp == std::string::npos) {
+            GGML_ABORT("invalid serialized grammar");
+        }
+        auto grm_type = params.grammar.substr(4, gp - 4);
+        auto grm_data = params.grammar.c_str() + gp + 1;
+        grmr = llama_sampler_init_llg(model, grm_type.c_str(), grm_data);
+#else
+        GGML_ABORT("llguidance (GGML_LLGUIDANCE) is not enabled");
+#endif
+    } else {
+        grmr = llama_sampler_init_grammar(model, params.grammar.c_str(), "root");
+    }
+
     auto * result = new common_sampler {
         /* .params = */ params,
-        /* .grmr   = */ llama_sampler_init_grammar(model, params.grammar.c_str(), "root"),
+        /* .grmr   = */ grmr,
         /* .chain  = */ llama_sampler_chain_init(lparams),
         /* .prev   = */ ring_buffer<llama_token>(std::max(32, params.n_prev)),
         /* .cur    = */ {},
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
@@ -2358,7 +2358,12 @@ struct llama_sampler_llg {
 static LlgConstraint *llama_sampler_llg_new(const char * grammar_kind, const char * grammar_data) {
     LlgConstraintInit cinit;
     llg_constraint_init_set_defaults(&cinit, nullptr);
-    return llg_new_constraint_any(&cinit, grammar_kind, grammar_data);
+    auto c = llg_new_constraint_any(&cinit, grammar_kind, grammar_data);
+    if (llg_get_error(c)) {
+        LLAMA_LOG_ERROR("llg error: %s\n", llg_get_error(c));
+        llg_free_constraint(c);
+        return nullptr;
+    }
 }
 
 static const char * llama_sampler_llg_name(const struct llama_sampler * /*smpl*/) {
@@ -2382,6 +2387,8 @@ static void llama_sampler_llg_apply(struct llama_sampler * smpl, llama_token_dat
                 ctx->has_llg_res = true;
             } else {
                 LLAMA_LOG_ERROR("llg error: %s\n", llg_get_error(ctx->grammar));
+                llg_free_constraint(ctx->grammar);
+                ctx->grammar = nullptr;
             }
         }
         if (ctx->has_llg_res) {
@@ -2458,6 +2465,9 @@ struct llama_sampler * llama_sampler_init_llg_impl(const struct llama_vocab & vo
     auto * ctx = new llama_sampler_llg;
 
     if (grammar_kind != nullptr && grammar_kind[0] != '\0') {
+        auto d = vocab.id_to_token[94776].text;
+        LLAMA_LOG_INFO("llg: %s %d\n", d.c_str(), d.size());
+
         *ctx = {
             /* .vocab        = */ &vocab,
             /* .grammar_kind = */ grammar_kind,

Original file line number	Diff line number	Diff line change
`@@ -133,3 +133,4 @@ poetry.toml`
`133`	`133`
`134`	`134`	`# Test models for lora adapters`
`135`	`135`	`/lora-tests`
	`136`	`+include/llguidance.h`