Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 9a6aec2

Browse files
authored
DEV: eval support for tool calls (#1128)
Also fixes anthropic with no params, streaming calls
1 parent 5e80f93 commit 9a6aec2

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

evals/lib/eval.rb

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ class DiscourseAi::Evals::Eval
99
:args,
1010
:vision,
1111
:expected_output,
12-
:expected_output_regex
12+
:expected_output_regex,
13+
:expected_tool_call
1314

1415
def initialize(path:)
1516
@yaml = YAML.load_file(path).symbolize_keys
@@ -24,6 +25,8 @@ def initialize(path:)
2425
@expected_output_regex = @yaml[:expected_output_regex]
2526
@expected_output_regex =
2627
Regexp.new(@expected_output_regex, Regexp::MULTILINE) if @expected_output_regex
28+
@expected_tool_call = @yaml[:expected_tool_call]
29+
@expected_tool_call.symbolize_keys! if @expected_tool_call
2730

2831
@args[:path] = File.expand_path(File.join(File.dirname(path), @args[:path])) if @args&.key?(
2932
:path,
@@ -39,6 +42,8 @@ def run(llm:)
3942
pdf_to_text(llm, **args)
4043
when "image_to_text"
4144
image_to_text(llm, **args)
45+
when "prompt"
46+
prompt_call(llm, **args)
4247
end
4348

4449
if expected_output
@@ -53,6 +58,19 @@ def run(llm:)
5358
else
5459
{ result: :fail, expected_output: expected_output_regex, actual_output: result }
5560
end
61+
elsif expected_tool_call
62+
tool_call = result
63+
64+
if result.is_a?(Array)
65+
tool_call = result.find { |r| r.is_a?(DiscourseAi::Completions::ToolCall) }
66+
end
67+
if !tool_call.is_a?(DiscourseAi::Completions::ToolCall) ||
68+
(tool_call.name != expected_tool_call[:name]) ||
69+
(tool_call.parameters != expected_tool_call[:params])
70+
{ result: :fail, expected_output: expected_tool_call, actual_output: result }
71+
else
72+
{ result: :pass }
73+
end
5674
else
5775
{ result: :unknown, actual_output: result }
5876
end
@@ -133,4 +151,31 @@ def pdf_to_text(llm, path:)
133151
ensure
134152
upload.destroy if upload
135153
end
154+
155+
def prompt_call(llm, system_prompt:, message:, tools: nil, stream: false)
156+
if tools
157+
tools.each do |tool|
158+
tool.symbolize_keys!
159+
tool[:parameters].symbolize_keys! if tool[:parameters]
160+
end
161+
end
162+
prompt =
163+
DiscourseAi::Completions::Prompt.new(
164+
system_prompt,
165+
messages: [{ type: :user, content: message }],
166+
tools: tools,
167+
)
168+
169+
result = nil
170+
if stream
171+
result = []
172+
llm
173+
.llm_model
174+
.to_llm
175+
.generate(prompt, user: Discourse.system_user) { |partial| result << partial }
176+
else
177+
result = llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user)
178+
end
179+
result
180+
end
136181
end

lib/completions/anthropic_message_processor.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ def partial_tool_call
3434
end
3535

3636
def to_tool_call
37-
parameters = JSON.parse(raw_json, symbolize_names: true)
37+
parameters = {}
38+
parameters = JSON.parse(raw_json, symbolize_names: true) if raw_json.present?
3839
# we dupe to avoid poisoning the original tool call
3940
@tool_call = @tool_call.dup
4041
@tool_call.partial = false

0 commit comments

Comments
 (0)