Skip to content

Commit a015911

Browse files
committed
fill context size
1 parent 16e75f1 commit a015911

File tree

4 files changed

+8
-12
lines changed

4 files changed

+8
-12
lines changed

.pre-commit-config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,3 @@ repos:
1313
rev: 7.0.0
1414
hooks:
1515
- id: flake8
16-
additional_dependencies: [flake8-no-print]

examples/server/server.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,12 @@ struct server_slot {
219219
if (params.n_predict == -1 && global_params.n_predict == -1) {
220220
return true; // limitless
221221
}
222-
else if (global_params.n_predict == -2) {
223-
return true; // generate until context is filled
224-
}
225-
226222
n_remaining = -1;
227223

228224
if (params.n_predict != -1) {
229225
n_remaining = params.n_predict - n_decoded;
226+
} else if (global_params.n_predict == -2) {
227+
n_remaining = n_ctx - n_past - 1;
230228
} else if (global_params.n_predict != -1) {
231229
n_remaining = global_params.n_predict - n_decoded;
232230
}
@@ -1815,11 +1813,6 @@ struct server_context {
18151813
continue;
18161814
}
18171815

1818-
if (params.n_predict == -2) {
1819-
slot.release();
1820-
send_final_response(slot);
1821-
continue;
1822-
}
18231816

18241817
// Shift context
18251818
const int n_keep = slot.params.n_keep + add_bos_token;

examples/server/tests/features/n_predict.feature

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Feature: llama.cpp server
88
And a model alias tinyllama-2
99
And 42 as server seed
1010
And 64 KV cache size
11+
1112
Scenario: Generate N tokens
1213
And 12 max tokens to predict
1314
Then the server is starting
@@ -18,6 +19,7 @@ Feature: llama.cpp server
1819
"""
1920
And a completion request with no api error
2021
Then 12 tokens are predicted
22+
2123
Scenario: Generate tokens until context is full
2224
And -2 server max tokens to predict
2325
Then the server is starting
@@ -28,4 +30,3 @@ Feature: llama.cpp server
2830
"""
2931
And a completion request with no api error
3032
Then 11 tokens are predicted
31-

examples/server/tests/features/steps/steps.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,10 @@ def step_n_slots(context, n_slots: int):
159159

160160
@step('{n_predict:d} server max tokens to predict')
161161
def step_server_n_predict(context, n_predict: int):
162-
context.n_server_predict = n_predict if n_predict > 0 or n_predict in (-1, -2) else None
162+
if n_predict > 0 or n_predict in (-1, -2):
163+
context.n_server_predict = n_predict
164+
else:
165+
context.n_server_predict = None
163166

164167

165168
@step('{slot_save_path} as slot save path')

0 commit comments

Comments
 (0)