Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 3f93c9a

Browse files
committed
sambanova and vllm
1 parent 4075260 commit 3f93c9a

File tree

6 files changed

+59
-33
lines changed

6 files changed

+59
-33
lines changed

lib/completions/endpoints/base.rb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,10 +312,12 @@ def non_streaming_response(
312312
response_data.each { |partial| partials_raw << partial.to_s }
313313

314314
if xml_tool_processor
315-
processed = (xml_tool_processor << response_data)
316-
processed << xml_tool_processor.finish
317-
response_data = []
318-
processed.flatten.compact.each { |partial| response_data << partial }
315+
response_data.each do |partial|
316+
processed = (xml_tool_processor << partial)
317+
processed << xml_tool_processor.finish
318+
response_data = []
319+
processed.flatten.compact.each { |inner| response_data << inner }
320+
end
319321
end
320322

321323
if xml_stripper

lib/completions/endpoints/samba_nova.rb

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -55,29 +55,31 @@ def final_log_update(log)
5555
log.response_tokens = @completion_tokens if @completion_tokens
5656
end
5757

58-
def extract_completion_from(response_raw)
58+
def xml_tools_enabled?
59+
true
60+
end
61+
62+
def decode(response_raw)
5963
json = JSON.parse(response_raw, symbolize_names: true)
64+
[json.dig(:choices, 0, :message, :content)]
65+
end
66+
67+
def decode_chunk(chunk)
68+
@json_decoder ||= JsonStreamDecoder.new
69+
(@json_decoder << chunk).map { |json|
70+
text = json.dig(:choices, 0, :delta, :content)
6071

61-
if @streaming_mode
6272
@prompt_tokens ||= json.dig(:usage, :prompt_tokens)
6373
@completion_tokens ||= json.dig(:usage, :completion_tokens)
64-
end
6574

66-
parsed = json.dig(:choices, 0)
67-
return if !parsed
68-
69-
@streaming_mode ? parsed.dig(:delta, :content) : parsed.dig(:message, :content)
70-
end
71-
72-
def partials_from(decoded_chunk)
73-
decoded_chunk
74-
.split("\n")
75-
.map do |line|
76-
data = line.split("data: ", 2)[1]
77-
data == "[DONE]" ? nil : data
75+
if !text.to_s.empty?
76+
text
77+
else
78+
nil
7879
end
79-
.compact
80+
}.flatten.compact
8081
end
82+
8183
end
8284
end
8385
end

lib/completions/endpoints/vllm.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,27 @@ def prepare_request(payload)
5656
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
5757
end
5858

59+
def xml_tools_enabled?
60+
true
61+
end
62+
63+
def decode(response_raw)
64+
json = JSON.parse(response_raw, symbolize_names: true)
65+
[json.dig(:choices, 0, :message, :content)]
66+
end
67+
68+
def decode_chunk(chunk)
69+
@json_decoder ||= JsonStreamDecoder.new
70+
(@json_decoder << chunk).map do |parsed|
71+
text = parsed.dig(:choices, 0, :delta, :content)
72+
if text.to_s.empty?
73+
nil
74+
else
75+
text
76+
end
77+
end.compact
78+
end
79+
5980
def partials_from(decoded_chunk)
6081
decoded_chunk
6182
.split("\n")

spec/lib/completions/endpoints/endpoint_compliance.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ def streaming_mode_simple_prompt(mock)
201201
expect(log.raw_request_payload).to be_present
202202
expect(log.raw_response_payload).to be_present
203203
expect(log.request_tokens).to eq(endpoint.prompt_size(dialect.translate))
204+
204205
expect(log.response_tokens).to eq(
205206
endpoint.llm_model.tokenizer_class.size(mock.streamed_simple_deltas[0...-1].join),
206207
)

spec/lib/completions/endpoints/samba_nova_spec.rb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,15 @@
2222
},
2323
).to_return(status: 200, body: body, headers: {})
2424

25-
response = +""
25+
response = []
2626
llm.generate("who are you?", user: Discourse.system_user) { |partial| response << partial }
2727

28-
expect(response).to eq("I am a bot")
28+
expect(response).to eq(["I am a bot"])
29+
30+
log = AiApiAuditLog.order(:id).last
31+
32+
expect(log.request_tokens).to eq(21)
33+
expect(log.response_tokens).to eq(41)
2934
end
3035

3136
it "can perform regular completions" do

spec/lib/completions/endpoints/vllm_spec.rb

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,18 +136,13 @@ def stub_streamed_response(prompt, deltas, tool_call: false)
136136

137137
result = llm.generate(prompt, user: Discourse.system_user)
138138

139-
expected = <<~TEXT
140-
<function_calls>
141-
<invoke>
142-
<tool_name>calculate</tool_name>
143-
<parameters>
144-
<expression>1+1</expression></parameters>
145-
<tool_id>tool_0</tool_id>
146-
</invoke>
147-
</function_calls>
148-
TEXT
139+
expected = DiscourseAi::Completions::ToolCall.new(
140+
name: "calculate",
141+
id: "tool_0",
142+
parameters: { expression: "1+1" },
143+
)
149144

150-
expect(result.strip).to eq(expected.strip)
145+
expect(result).to eq(expected)
151146
end
152147
end
153148

0 commit comments

Comments
 (0)