@@ -127,7 +127,6 @@ struct slot_params {
127127 std::vector<std::string> response_fields;
128128 bool timings_per_token = false ;
129129 bool post_sampling_probs = false ;
130- bool ignore_eos = false ;
131130
132131 struct common_params_sampling sampling;
133132 struct common_params_speculative speculative;
@@ -441,7 +440,6 @@ struct server_task {
441440
442441 {
443442 params.sampling .logit_bias .clear ();
444- params.ignore_eos = json_value (data, " ignore_eos" , false );
445443
446444 const auto & logit_bias = data.find (" logit_bias" );
447445 if (logit_bias != data.end () && logit_bias->is_array ()) {
@@ -472,6 +470,16 @@ struct server_task {
472470 }
473471 }
474472 }
473+
474+ params.sampling .ignore_eos = json_value (data, " ignore_eos" , params_base.sampling .ignore_eos );
475+ if (params.sampling .ignore_eos ) {
476+ for (llama_token i = 0 ; i < llama_vocab_n_tokens (vocab); i++) {
477+ if (llama_vocab_is_eog (vocab, i)) {
478+ // SRV_DBG("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(ctx, i).c_str(), -INFINITY);
479+ params.sampling .logit_bias .push_back ({i, -INFINITY});
480+ }
481+ }
482+ }
475483 }
476484
477485 {
@@ -2217,10 +2225,6 @@ struct server_context {
22172225 slot.params .n_predict = slot.n_predict ;
22182226 }
22192227
2220- if (slot.params .ignore_eos && has_eos_token) {
2221- slot.params .sampling .logit_bias .push_back ({llama_vocab_eos (vocab), -INFINITY});
2222- }
2223-
22242228 {
22252229 if (slot.smpl != nullptr ) {
22262230 common_sampler_free (slot.smpl );
0 commit comments