Skip to content

Commit 7a54db2

Browse files
ggerganovmglambda
authored andcommitted
server : infill gen ends on new line (#12254)
1 parent dc8d209 commit 7a54db2

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

examples/server/server.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,7 +1317,7 @@ struct server_slot {
13171317
return task_type == SERVER_TASK_TYPE_EMBEDDING || task_type == SERVER_TASK_TYPE_RERANK;
13181318
}
13191319

1320-
bool can_batch_with(server_slot & other_slot) {
1320+
bool can_batch_with(server_slot & other_slot) const {
13211321
return is_non_causal() == other_slot.is_non_causal()
13221322
&& are_lora_equal(lora, other_slot.lora);
13231323
}
@@ -2162,14 +2162,6 @@ struct server_context {
21622162
}
21632163

21642164
if (slot.has_new_line) {
2165-
// if we have already seen a new line, we stop after a certain time limit
2166-
if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) {
2167-
slot.stop = STOP_TYPE_LIMIT;
2168-
slot.has_next_token = false;
2169-
2170-
SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms);
2171-
}
2172-
21732165
// require that each new line has a whitespace prefix (i.e. indentation) of at least slot.params.n_indent
21742166
if (slot.params.n_indent > 0) {
21752167
// check the current indentation
@@ -2208,6 +2200,14 @@ struct server_context {
22082200
// check if there is a new line in the generated text
22092201
if (result.text_to_send.find('\n') != std::string::npos) {
22102202
slot.has_new_line = true;
2203+
2204+
// if we have seen a new line, we stop after a certain time limit, but only upon another new line
2205+
if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) {
2206+
slot.stop = STOP_TYPE_LIMIT;
2207+
slot.has_next_token = false;
2208+
2209+
SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms);
2210+
}
22112211
}
22122212

22132213
// if context shift is disabled, we stop when it reaches the context limit

0 commit comments

Comments
 (0)