@@ -9,7 +9,8 @@ class DiscourseAi::Evals::Eval
99 :args ,
1010 :vision ,
1111 :expected_output ,
12- :expected_output_regex
12+ :expected_output_regex ,
13+ :expected_tool_call
1314
1415 def initialize ( path :)
1516 @yaml = YAML . load_file ( path ) . symbolize_keys
@@ -24,6 +25,8 @@ def initialize(path:)
2425 @expected_output_regex = @yaml [ :expected_output_regex ]
2526 @expected_output_regex =
2627 Regexp . new ( @expected_output_regex , Regexp ::MULTILINE ) if @expected_output_regex
28+ @expected_tool_call = @yaml [ :expected_tool_call ]
29+ @expected_tool_call . symbolize_keys! if @expected_tool_call
2730
2831 @args [ :path ] = File . expand_path ( File . join ( File . dirname ( path ) , @args [ :path ] ) ) if @args &.key? (
2932 :path ,
@@ -39,6 +42,8 @@ def run(llm:)
3942 pdf_to_text ( llm , **args )
4043 when "image_to_text"
4144 image_to_text ( llm , **args )
45+ when "prompt"
46+ prompt_call ( llm , **args )
4247 end
4348
4449 if expected_output
@@ -53,6 +58,19 @@ def run(llm:)
5358 else
5459 { result : :fail , expected_output : expected_output_regex , actual_output : result }
5560 end
61+ elsif expected_tool_call
62+ tool_call = result
63+
64+ if result . is_a? ( Array )
65+ tool_call = result . find { |r | r . is_a? ( DiscourseAi ::Completions ::ToolCall ) }
66+ end
67+ if !tool_call . is_a? ( DiscourseAi ::Completions ::ToolCall ) ||
68+ ( tool_call . name != expected_tool_call [ :name ] ) ||
69+ ( tool_call . parameters != expected_tool_call [ :params ] )
70+ { result : :fail , expected_output : expected_tool_call , actual_output : result }
71+ else
72+ { result : :pass }
73+ end
5674 else
5775 { result : :unknown , actual_output : result }
5876 end
@@ -133,4 +151,31 @@ def pdf_to_text(llm, path:)
133151 ensure
134152 upload . destroy if upload
135153 end
154+
155+ def prompt_call ( llm , system_prompt :, message :, tools : nil , stream : false )
156+ if tools
157+ tools . each do |tool |
158+ tool . symbolize_keys!
159+ tool [ :parameters ] . symbolize_keys! if tool [ :parameters ]
160+ end
161+ end
162+ prompt =
163+ DiscourseAi ::Completions ::Prompt . new (
164+ system_prompt ,
165+ messages : [ { type : :user , content : message } ] ,
166+ tools : tools ,
167+ )
168+
169+ result = nil
170+ if stream
171+ result = [ ]
172+ llm
173+ . llm_model
174+ . to_llm
175+ . generate ( prompt , user : Discourse . system_user ) { |partial | result << partial }
176+ else
177+ result = llm . llm_model . to_llm . generate ( prompt , user : Discourse . system_user )
178+ end
179+ result
180+ end
136181end
0 commit comments