Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion evals/lib/eval.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class DiscourseAi::Evals::Eval
:args,
:vision,
:expected_output,
:expected_output_regex
:expected_output_regex,
:expected_tool_call

def initialize(path:)
@yaml = YAML.load_file(path).symbolize_keys
Expand All @@ -24,6 +25,8 @@ def initialize(path:)
@expected_output_regex = @yaml[:expected_output_regex]
@expected_output_regex =
Regexp.new(@expected_output_regex, Regexp::MULTILINE) if @expected_output_regex
@expected_tool_call = @yaml[:expected_tool_call]
@expected_tool_call.symbolize_keys! if @expected_tool_call

@args[:path] = File.expand_path(File.join(File.dirname(path), @args[:path])) if @args&.key?(
:path,
Expand All @@ -39,6 +42,8 @@ def run(llm:)
pdf_to_text(llm, **args)
when "image_to_text"
image_to_text(llm, **args)
when "prompt"
prompt_call(llm, **args)
end

if expected_output
Expand All @@ -53,6 +58,19 @@ def run(llm:)
else
{ result: :fail, expected_output: expected_output_regex, actual_output: result }
end
elsif expected_tool_call
tool_call = result

if result.is_a?(Array)
tool_call = result.find { |r| r.is_a?(DiscourseAi::Completions::ToolCall) }
end
if !tool_call.is_a?(DiscourseAi::Completions::ToolCall) ||
(tool_call.name != expected_tool_call[:name]) ||
(tool_call.parameters != expected_tool_call[:params])
{ result: :fail, expected_output: expected_tool_call, actual_output: result }
else
{ result: :pass }
end
else
{ result: :unknown, actual_output: result }
end
Expand Down Expand Up @@ -133,4 +151,31 @@ def pdf_to_text(llm, path:)
ensure
upload.destroy if upload
end

def prompt_call(llm, system_prompt:, message:, tools: nil, stream: false)
if tools
tools.each do |tool|
tool.symbolize_keys!
tool[:parameters].symbolize_keys! if tool[:parameters]
end
end
prompt =
DiscourseAi::Completions::Prompt.new(
system_prompt,
messages: [{ type: :user, content: message }],
tools: tools,
)

result = nil
if stream
result = []
llm
.llm_model
.to_llm
.generate(prompt, user: Discourse.system_user) { |partial| result << partial }
else
result = llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user)
end
result
end
end
3 changes: 2 additions & 1 deletion lib/completions/anthropic_message_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def partial_tool_call
end

def to_tool_call
parameters = JSON.parse(raw_json, symbolize_names: true)
parameters = {}
parameters = JSON.parse(raw_json, symbolize_names: true) if raw_json.present?
# we dupe to avoid poisoning the original tool call
@tool_call = @tool_call.dup
@tool_call.partial = false
Expand Down
Loading