Skip to content

Commit b4bf5df

Browse files
committed
generate token until context filled
1 parent e0614ca commit b4bf5df

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

examples/server/server.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,15 @@ struct server_slot {
219219
if (params.n_predict == -1 && global_params.n_predict == -1) {
220220
return true; // limitless
221221
}
222+
else if (global_params.n_predict == -2) {
223+
return true; // generate until context is filled
224+
}
222225

223226
n_remaining = -1;
224227

225228
if (params.n_predict != -1) {
226229
n_remaining = params.n_predict - n_decoded;
227-
} else if (global_params.n_predict == -2) {
228-
n_remaining = n_ctx - n_past;
229-
} else if (global_params.n_predict != -1) {
230+
}else if (global_params.n_predict != -1) {
230231
n_remaining = global_params.n_predict - n_decoded;
231232
}
232233

@@ -1814,6 +1815,12 @@ struct server_context {
18141815
continue;
18151816
}
18161817

1818+
if (params.n_predict == -2) {
1819+
slot.release();
1820+
send_final_response(slot);
1821+
continue;
1822+
}
1823+
18171824
// Shift context
18181825
const int n_keep = slot.params.n_keep + add_bos_token;
18191826
const int n_left = slot.n_past - n_keep;

0 commit comments

Comments
 (0)