candidates_p 128

Nexesenex · Nexesenex · commit 30e6b031d537 · 2025-07-25T12:41:39.000+02:00
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -1753,15 +1753,15 @@ const std::vector<samplers> & sampler_order, llama_grammar * grammar, float dyna
     bool use_grammar = grammar != nullptr;
     std::vector<llama_token_data> precache = (use_grammar ? std::vector<llama_token_data>(candidates) : std::vector<llama_token_data>(0));
 
-    sample_top_k(&candidates_p, 512);
+    sample_top_k(&candidates_p, 128);
 
     if (use_grammar) {
         sample_grammar(file_format, n_vocab, &candidates_p, grammar);
-        // if top_k 3000 doesn't contain a valid candidate for this grammar, try again pre-cull
+        // if top_k 3000/256 doesn't contain a valid candidate for this grammar, try again pre-cull
         if (candidates_p.size <= 0) {
             candidates_p = { precache.data(), precache.size(), false };
             sample_grammar(file_format, n_vocab, &candidates_p, grammar);
-            sample_top_k(&candidates_p, 3000);
+            sample_top_k(&candidates_p, 128);
         }
     }