Skip to content

Commit 141c5ce

Browse files
committed
server : generate at least 1 line regardless of time limit
1 parent c1444e0 commit 141c5ce

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

examples/server/server.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ struct server_slot {
178178
server_task_cmpl_type cmpl_type = SERVER_TASK_CMPL_TYPE_NORMAL;
179179

180180
bool has_next_token = true;
181+
bool has_new_line = false;
181182
bool truncated = false;
182183
bool stopped_eos = false;
183184
bool stopped_word = false;
@@ -219,6 +220,7 @@ struct server_slot {
219220

220221
n_prompt_tokens = 0;
221222
generated_text = "";
223+
has_new_line = false;
222224
truncated = false;
223225
stopped_eos = false;
224226
stopped_word = false;
@@ -1190,13 +1192,20 @@ struct server_context {
11901192
SLT_DBG(slot, "stopped by limit, n_decoded = %d, n_predict = %d\n", slot.n_decoded, slot.params.n_predict);
11911193
}
11921194

1193-
if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) {
1195+
// if we have already seen a new line, we stop after a certain time limit
1196+
if (slot.has_new_line && slot.params.t_max_predict_ms > 0 &&
1197+
(ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) {
11941198
slot.stopped_limit = true;
11951199
slot.has_next_token = false;
11961200

11971201
SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms);
11981202
}
11991203

1204+
// check if there is a new line in the generated text
1205+
if (result.text_to_send.find('\n') != std::string::npos) {
1206+
slot.has_new_line = true;
1207+
}
1208+
12001209
// if context shift is disabled, we stop when it reaches the context limit
12011210
if (slot.n_decoded >= slot.n_ctx) {
12021211
slot.truncated = true;
@@ -1347,6 +1356,7 @@ struct server_context {
13471356
{"tokens_evaluated", slot.n_prompt_tokens},
13481357
{"generation_settings", get_formated_generation(slot)},
13491358
{"prompt", slot.prompt},
1359+
{"has_new_line", slot.has_new_line},
13501360
{"truncated", slot.truncated},
13511361
{"stopped_eos", slot.stopped_eos},
13521362
{"stopped_word", slot.stopped_word},
@@ -1673,6 +1683,7 @@ struct server_context {
16731683
slot_data["prompt"] = slot.prompt;
16741684
slot_data["next_token"] = {
16751685
{"has_next_token", slot.has_next_token},
1686+
{"has_new_line", slot.has_new_line},
16761687
{"n_remain", slot.n_remaining},
16771688
{"n_decoded", slot.n_decoded},
16781689
{"stopped_eos", slot.stopped_eos},

0 commit comments

Comments
 (0)