Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 0ab073a

Browse files
committed
account for tokens properly with non streaming calls
1 parent 3f93c9a commit 0ab073a

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

lib/completions/endpoints/vllm.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,15 @@ def xml_tools_enabled?
6060
true
6161
end
6262

63+
def final_log_update(log)
64+
log.request_tokens = @prompt_tokens if @prompt_tokens
65+
log.response_tokens = @completion_tokens if @completion_tokens
66+
end
67+
6368
def decode(response_raw)
6469
json = JSON.parse(response_raw, symbolize_names: true)
70+
@prompt_tokens = json.dig(:usage, :prompt_tokens)
71+
@completion_tokens = json.dig(:usage, :completion_tokens)
6572
[json.dig(:choices, 0, :message, :content)]
6673
end
6774

spec/lib/completions/endpoints/vllm_spec.rb

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,28 @@ def stub_streamed_response(prompt, deltas, tool_call: false)
146146
end
147147
end
148148

149+
it "correctly accounts for tokens in non streaming mode" do
150+
body = (<<~TEXT).strip
151+
{"id":"chat-c580e4a9ebaa44a0becc802ed5dc213a","object":"chat.completion","created":1731294404,"model":"meta-llama/Meta-Llama-3.1-70B-Instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Random Number Generator Produces Smallest Possible Result","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":146,"total_tokens":156,"completion_tokens":10},"prompt_logprobs":null}
152+
TEXT
153+
154+
stub_request(:post, "https://test.dev/v1/chat/completions").to_return(
155+
status: 200,
156+
body: body,
157+
)
158+
159+
result = llm.generate("generate a title", user: Discourse.system_user)
160+
161+
expect(result).to eq("Random Number Generator Produces Smallest Possible Result")
162+
163+
log = AiApiAuditLog.order(:id).last
164+
expect(log.request_tokens).to eq(146)
165+
expect(log.response_tokens).to eq(10)
166+
167+
end
168+
149169
describe "#perform_completion!" do
150170
context "when using regular mode" do
151-
context "with simple prompts" do
152-
it "completes a trivial prompt and logs the response" do
153-
compliance.regular_mode_simple_prompt(vllm_mock)
154-
end
155-
end
156-
157171
context "with tools" do
158172
it "returns a function invocation" do
159173
compliance.regular_mode_tools(vllm_mock)

0 commit comments

Comments
 (0)