@@ -1593,7 +1593,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
15931593 for (const auto & reject : rejects) {
15941594 candidates->data [reject.index ].logit = -INFINITY;
15951595 }
1596-
1596+
15971597 auto first = candidates->data ;
15981598 auto last = first + candidates->size ;
15991599 last = std::remove_if (first, last,
@@ -1650,25 +1650,19 @@ const std::vector<samplers> & sampler_order, llama_grammar * grammar, float dyna
16501650
16511651 // dry always first as logits cannot be resorted
16521652 sample_dry (n_ctx, dry_penalty_last_n, dry_multiplier, dry_base, dry_allowed_length, dry_sequence_breakers, &candidates_p);
1653-
1653+
16541654 // prefilter to top 3k tokens for improved speed
16551655 bool use_grammar = grammar != nullptr ;
16561656 size_t n_pre_cull = candidates_p.size ;
1657-
1657+
16581658 sample_top_k (&candidates_p, 3000 );
1659-
1659+
16601660 if (use_grammar) {
1661-
1662- (debugmode == 1 && printf (" \n Grammar sampling %zu candidates.\n " , candidates_p.size ));
16631661 sample_grammar (file_format, n_vocab, &candidates_p, grammar);
1664- (debugmode == 1 && printf (" \n Grammar returned %zu candidates.\n " , candidates_p.size ));
1665-
16661662 // if top_k 3000 doesn't contain a valid candidate for this grammar, try again pre-cull
16671663 if (candidates_p.size <= 0 ) {
16681664 candidates_p.size = n_pre_cull;
1669- (debugmode == 1 && printf (" \n Re-sampling grammar with %zu pre-cull tokens.\n " , candidates_p.size ));
16701665 sample_grammar (file_format, n_vocab, &candidates_p, grammar);
1671- (debugmode == 1 && printf (" \n Grammar returned %zu candidates.\n " , candidates_p.size ));
16721666 sample_top_k (&candidates_p, 3000 );
16731667 }
16741668 }
@@ -3960,7 +3954,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39603954 }
39613955
39623956 if (grammar != nullptr ) {
3963- (debugmode == 1 && printf (" \n Grammar attempting to accept token...\n " ));
39643957 grammar_accept_token (file_format, n_vocab, grammar, id);
39653958 }
39663959
0 commit comments