Skip to content

Commit 3c8b105

Browse files
committed
handle generation until context is filled
1 parent 63978cb commit 3c8b105

File tree

3 files changed

+37
-3
lines changed

3 files changed

+37
-3
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ struct server_slot {
244244
if (params.n_predict != -1) {
245245
n_remaining = params.n_predict - n_decoded;
246246
} else if (global_params.n_predict == -2) {
247-
n_remaining = n_ctx - n_past;
247+
n_remaining = n_ctx - n_past - 1;
248248
} else if (global_params.n_predict != -1) {
249249
n_remaining = global_params.n_predict - n_decoded;
250250
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
@llama.cpp
2+
@n_predict
3+
Feature: llama.cpp server
4+
5+
Background: Server startup
6+
Given a server listening on localhost:8080
7+
And a model file test-model.gguf
8+
And a model alias tinyllama-2
9+
And 42 as server seed
10+
And 64 KV cache size
11+
12+
Scenario: Generate N tokens
13+
And 12 max tokens to predict
14+
Then the server is starting
15+
Then the server is healthy
16+
Given a prompt:
17+
"""
18+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
19+
"""
20+
And a completion request with no api error
21+
Then 12 tokens are predicted
22+
23+
Scenario: Generate tokens until context is full
24+
And -2 server max tokens to predict
25+
Then the server is starting
26+
Then the server is healthy
27+
Given a prompt:
28+
"""
29+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
30+
"""
31+
And a completion request with no api error
32+
Then 11 tokens are predicted

examples/server/tests/features/steps/steps.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,10 @@ def step_n_slots(context, n_slots: int):
154154

155155
@step('{n_predict:d} server max tokens to predict')
156156
def step_server_n_predict(context, n_predict: int):
157-
context.n_server_predict = n_predict if n_predict > 0 else None
158-
157+
if n_predict > 0 or n_predict in (-1, -2):
158+
context.n_server_predict = n_predict
159+
else:
160+
context.n_server_predict = None
159161

160162
@step('{slot_save_path} as slot save path')
161163
def step_slot_save_path(context, slot_save_path: str):

0 commit comments

Comments
 (0)