@@ -473,12 +473,9 @@ struct server_task {
473473
474474 params.sampling .ignore_eos = json_value (data, " ignore_eos" , params_base.sampling .ignore_eos );
475475 if (params.sampling .ignore_eos ) {
476- for (llama_token i = 0 ; i < llama_vocab_n_tokens (vocab); i++) {
477- if (llama_vocab_is_eog (vocab, i)) {
478- // SRV_DBG("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(ctx, i).c_str(), -INFINITY);
479- params.sampling .logit_bias .push_back ({i, -INFINITY});
480- }
481- }
476+ params.sampling .logit_bias .insert (
477+ params.sampling .logit_bias .end (),
478+ defaults.sampling .logit_bias_eog .begin (), defaults.sampling .logit_bias_eog .end ());
482479 }
483480 }
484481
@@ -1906,7 +1903,6 @@ struct server_context {
19061903
19071904 bool clean_kv_cache = true ;
19081905 bool add_bos_token = true ;
1909- bool has_eos_token = false ;
19101906
19111907 int32_t n_ctx; // total context for all clients / slots
19121908
@@ -1965,7 +1961,6 @@ struct server_context {
19651961 n_ctx = llama_n_ctx (ctx);
19661962
19671963 add_bos_token = llama_vocab_get_add_bos (vocab);
1968- has_eos_token = llama_vocab_eos (vocab) != LLAMA_TOKEN_NULL;
19691964
19701965 if (!params_base.speculative .model .path .empty () || !params_base.speculative .model .hf_repo .empty ()) {
19711966 SRV_INF (" loading draft model '%s'\n " , params_base.speculative .model .path .c_str ());
0 commit comments