Skip to content

Commit 82959be

Browse files
committed
server : fill usage info in embeddings response
1 parent 08ea539 commit 82959be

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

examples/server/server.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -719,14 +719,17 @@ struct server_task_result_embd : server_task_result {
719719
int index = 0;
720720
std::vector<float> embedding;
721721

722+
int32_t n_prompt_tokens;
723+
722724
virtual int get_index() override {
723725
return index;
724726
}
725727

726728
virtual json to_json() override {
727729
return json {
728-
{"index", index},
729-
{"embedding", embedding},
730+
{"index", index},
731+
{"embedding", embedding},
732+
{"tokens_evaluated", n_prompt_tokens},
730733
};
731734
}
732735
};
@@ -1995,6 +1998,7 @@ struct server_context {
19951998
auto res = std::make_unique<server_task_result_embd>();
19961999
res->id = slot.id_task;
19972000
res->index = slot.index;
2001+
res->n_prompt_tokens = slot.n_prompt_tokens;
19982002

19992003
const int n_embd = llama_n_embd(model);
20002004

examples/server/utils.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -560,21 +560,24 @@ static json oaicompat_completion_params_parse(
560560

561561
static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
562562
json data = json::array();
563+
int32_t n_prompt_tokens = 0;
563564
int i = 0;
564565
for (const auto & elem : embeddings) {
565566
data.push_back(json{
566567
{"embedding", json_value(elem, "embedding", json::array())},
567568
{"index", i++},
568569
{"object", "embedding"}
569570
});
571+
572+
n_prompt_tokens += json_value(elem, "tokens_evaluated", 0);
570573
}
571574

572575
json res = json {
573576
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
574577
{"object", "list"},
575-
{"usage", json { // TODO: fill
576-
{"prompt_tokens", 0},
577-
{"total_tokens", 0}
578+
{"usage", json {
579+
{"prompt_tokens", n_prompt_tokens},
580+
{"total_tokens", n_prompt_tokens}
578581
}},
579582
{"data", data}
580583
};

0 commit comments

Comments
 (0)