Skip to content

Commit 1441676

Browse files
Update server-mmojo.cpp
Signed-off-by: Brad Hutchings <[email protected]>
1 parent 038cb42 commit 1441676

File tree

1 file changed

+10
-12
lines changed

1 file changed

+10
-12
lines changed

tools/server/server-mmojo.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ struct slot_params {
133133
std::vector<std::string> response_fields;
134134
bool timings_per_token = false;
135135
bool post_sampling_probs = false;
136-
bool ignore_eos = false;
137136

138137
struct common_params_sampling sampling;
139138
struct common_params_speculative speculative;
@@ -447,7 +446,6 @@ struct server_task {
447446

448447
{
449448
params.sampling.logit_bias.clear();
450-
params.ignore_eos = json_value(data, "ignore_eos", false);
451449

452450
const auto & logit_bias = data.find("logit_bias");
453451
if (logit_bias != data.end() && logit_bias->is_array()) {
@@ -478,6 +476,13 @@ struct server_task {
478476
}
479477
}
480478
}
479+
480+
params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
481+
if (params.sampling.ignore_eos) {
482+
params.sampling.logit_bias.insert(
483+
params.sampling.logit_bias.end(),
484+
defaults.sampling.logit_bias_eog.begin(), defaults.sampling.logit_bias_eog.end());
485+
}
481486
}
482487

483488
{
@@ -1906,7 +1911,6 @@ struct server_context {
19061911

19071912
bool clean_kv_cache = true;
19081913
bool add_bos_token = true;
1909-
bool has_eos_token = false;
19101914

19111915
int32_t n_ctx; // total context for all clients / slots
19121916

@@ -1965,7 +1969,6 @@ struct server_context {
19651969
n_ctx = llama_n_ctx(ctx);
19661970

19671971
add_bos_token = llama_vocab_get_add_bos(vocab);
1968-
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
19691972

19701973
if (!params_base.speculative.model.path.empty() || !params_base.speculative.model.hf_repo.empty()) {
19711974
SRV_INF("loading draft model '%s'\n", params_base.speculative.model.path.c_str());
@@ -2225,10 +2228,6 @@ struct server_context {
22252228
slot.params.n_predict = slot.n_predict;
22262229
}
22272230

2228-
if (slot.params.ignore_eos && has_eos_token) {
2229-
slot.params.sampling.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
2230-
}
2231-
22322231
{
22332232
if (slot.smpl != nullptr) {
22342233
common_sampler_free(slot.smpl);
@@ -3894,12 +3893,11 @@ int main(int argc, char ** argv) {
38943893

38953894
return false;
38963895
};
3897-
3896+
38983897
auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
38993898
server_state current_state = state.load();
39003899
if (current_state == SERVER_STATE_LOADING_MODEL) {
39013900
auto tmp = string_split<std::string>(req.path, '.');
3902-
39033901
if (req.path == "/" || tmp.back() == "html") {
39043902
// mmojo-server START
39053903
// res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
@@ -3912,7 +3910,6 @@ int main(int argc, char ** argv) {
39123910
} else {
39133911
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
39143912
}
3915-
39163913
return false;
39173914
}
39183915
return true;
@@ -4612,9 +4609,10 @@ int main(int argc, char ** argv) {
46124609
json tokens_response = json::array();
46134610
if (body.count("content") != 0) {
46144611
const bool add_special = json_value(body, "add_special", false);
4612+
const bool parse_special = json_value(body, "parse_special", true);
46154613
const bool with_pieces = json_value(body, "with_pieces", false);
46164614

4617-
llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, true);
4615+
llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, parse_special);
46184616

46194617
if (with_pieces) {
46204618
for (const auto& token : tokens) {

0 commit comments

Comments
 (0)