Skip to content

Commit 28d8c91

Browse files
committed
add test
1 parent 1b301db commit 28d8c91

File tree

3 files changed

+22
-5
lines changed

3 files changed

+22
-5
lines changed

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ node index.js
416416

417417
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["dry", "top_k", "typ_p", "top_p", "min_p", "xtc", "temperature"]` - these are all the available values.
418418

419-
`timing_per_token`: Include prompt processing and text generation speed information in each response. Default: `false`
419+
`timings_per_token`: Include prompt processing and text generation speed information in each response. Default: `false`
420420

421421
**Response format**
422422

examples/server/server.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ struct server_slot {
177177
bool stopped_word = false;
178178
bool stopped_limit = false;
179179

180-
bool timing_per_token = false;
180+
bool timings_per_token = false;
181181

182182
bool oaicompat = false;
183183

@@ -884,7 +884,7 @@ struct server_context {
884884
slot.oaicompat_model = "";
885885
}
886886

887-
slot.timing_per_token = json_value(data, "timing_per_token", false);
887+
slot.timings_per_token = json_value(data, "timings_per_token", false);
888888

889889
slot.params.stream = json_value(data, "stream", false);
890890
slot.params.cache_prompt = json_value(data, "cache_prompt", true);
@@ -1283,7 +1283,7 @@ struct server_context {
12831283
{"speculative.n_max", slot.params.speculative.n_max},
12841284
{"speculative.n_min", slot.params.speculative.n_min},
12851285
{"speculative.p_min", slot.params.speculative.p_min},
1286-
{"timing_per_token", slot.timing_per_token},
1286+
{"timings_per_token", slot.timings_per_token},
12871287
};
12881288
}
12891289

@@ -1341,7 +1341,7 @@ struct server_context {
13411341
res.data["model"] = slot.oaicompat_model;
13421342
}
13431343

1344-
if (slot.timing_per_token) {
1344+
if (slot.timings_per_token) {
13451345
res.data["timings"] = slot.get_formated_timings();
13461346
}
13471347

examples/server/tests/unit/test_chat_completion.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,20 @@ def test_invalid_chat_completion_req(messages):
146146
})
147147
assert res.status_code == 400 or res.status_code == 500
148148
assert "error" in res.body
149+
150+
151+
def test_chat_completion_with_timings_per_token():
152+
global server
153+
server.start()
154+
res = server.make_stream_request("POST", "/chat/completions", data={
155+
"max_tokens": 10,
156+
"messages": [{"role": "user", "content": "test"}],
157+
"stream": True,
158+
"timings_per_token": True,
159+
})
160+
for data in res:
161+
assert "timings" in data
162+
assert "prompt_per_second" in data["timings"]
163+
assert "predicted_per_second" in data["timings"]
164+
assert "predicted_n" in data["timings"]
165+
assert data["timings"]["predicted_n"] <= 10

0 commit comments

Comments
 (0)