Skip to content

Commit 737f1a4

Browse files
committed
server: Added support for n_predict values of -2
1 parent f08f4b3 commit 737f1a4

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

examples/server/server.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,17 +1321,29 @@ struct server_slot {
13211321
&& are_lora_equal(lora, other_slot.lora);
13221322
}
13231323

1324-
bool has_budget(const common_params & global_params) {
1324+
bool has_budget(const common_params & global_params, int32_t slot_n_ctx) {
13251325
if (params.n_predict == -1 && global_params.n_predict == -1) {
13261326
return true; // limitless
13271327
}
13281328

13291329
n_remaining = -1;
1330-
1331-
if (params.n_predict != -1) {
1332-
n_remaining = params.n_predict - n_decoded;
1333-
} else if (global_params.n_predict != -1) {
1334-
n_remaining = global_params.n_predict - n_decoded;
1330+
if (global_params.n_predict == -1) {
1331+
if (params.n_predict == -2)
1332+
n_remaining = slot_n_ctx - n_decoded;
1333+
else
1334+
n_remaining = params.n_predict - n_decoded;
1335+
} else if (global_params.n_predict == -2) {
1336+
if (params.n_predict == -1 || params.n_predict == -2)
1337+
n_remaining = slot_n_ctx - n_decoded;
1338+
else
1339+
n_remaining = std::min(params.n_predict - n_decoded, slot_n_ctx - n_decoded);
1340+
} else {
1341+
if (params.n_predict == -1)
1342+
n_remaining = global_params.n_predict - n_decoded;
1343+
else if (params.n_predict == -2)
1344+
n_remaining = std::min(global_params.n_predict - n_decoded, slot_n_ctx - n_decoded);
1345+
else
1346+
n_remaining = std::min(params.n_predict - n_decoded, global_params.n_predict - n_decoded);
13351347
}
13361348

13371349
return n_remaining > 0; // no budget
@@ -2153,7 +2165,7 @@ struct server_context {
21532165
}
21542166

21552167
// check the limits
2156-
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) {
2168+
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base, slot.n_ctx)) {
21572169
slot.stop = STOP_TYPE_LIMIT;
21582170
slot.has_next_token = false;
21592171

0 commit comments

Comments
 (0)