@@ -9,7 +9,8 @@ class DiscourseAi::Evals::Eval
99              :args , 
1010              :vision , 
1111              :expected_output , 
12-               :expected_output_regex 
12+               :expected_output_regex , 
13+               :expected_tool_call 
1314
1415  def  initialize ( path :) 
1516    @yaml  =  YAML . load_file ( path ) . symbolize_keys 
@@ -24,6 +25,8 @@ def initialize(path:)
2425    @expected_output_regex  =  @yaml [ :expected_output_regex ] 
2526    @expected_output_regex  = 
2627      Regexp . new ( @expected_output_regex ,  Regexp ::MULTILINE )  if  @expected_output_regex 
28+     @expected_tool_call  =  @yaml [ :expected_tool_call ] 
29+     @expected_tool_call . symbolize_keys!  if  @expected_tool_call 
2730
2831    @args [ :path ]  =  File . expand_path ( File . join ( File . dirname ( path ) ,  @args [ :path ] ) )  if  @args &.key? ( 
2932      :path , 
@@ -39,6 +42,8 @@ def run(llm:)
3942        pdf_to_text ( llm ,  **args ) 
4043      when  "image_to_text" 
4144        image_to_text ( llm ,  **args ) 
45+       when  "prompt" 
46+         prompt_call ( llm ,  **args ) 
4247      end 
4348
4449    if  expected_output 
@@ -53,6 +58,19 @@ def run(llm:)
5358      else 
5459        {  result : :fail ,  expected_output : expected_output_regex ,  actual_output : result  } 
5560      end 
61+     elsif  expected_tool_call 
62+       tool_call  =  result 
63+ 
64+       if  result . is_a? ( Array ) 
65+         tool_call  =  result . find  {  |r | r . is_a? ( DiscourseAi ::Completions ::ToolCall )  } 
66+       end 
67+       if  !tool_call . is_a? ( DiscourseAi ::Completions ::ToolCall )  ||
68+            ( tool_call . name  != expected_tool_call [ :name ] )  ||
69+            ( tool_call . parameters  != expected_tool_call [ :params ] ) 
70+         {  result : :fail ,  expected_output : expected_tool_call ,  actual_output : result  } 
71+       else 
72+         {  result : :pass  } 
73+       end 
5674    else 
5775      {  result : :unknown ,  actual_output : result  } 
5876    end 
@@ -133,4 +151,31 @@ def pdf_to_text(llm, path:)
133151  ensure 
134152    upload . destroy  if  upload 
135153  end 
154+ 
155+   def  prompt_call ( llm ,  system_prompt :,  message :,  tools : nil ,  stream : false ) 
156+     if  tools 
157+       tools . each  do  |tool |
158+         tool . symbolize_keys! 
159+         tool [ :parameters ] . symbolize_keys!  if  tool [ :parameters ] 
160+       end 
161+     end 
162+     prompt  = 
163+       DiscourseAi ::Completions ::Prompt . new ( 
164+         system_prompt , 
165+         messages : [ {  type : :user ,  content : message  } ] , 
166+         tools : tools , 
167+       ) 
168+ 
169+     result  =  nil 
170+     if  stream 
171+       result  =  [ ] 
172+       llm 
173+         . llm_model 
174+         . to_llm 
175+         . generate ( prompt ,  user : Discourse . system_user )  {  |partial | result  << partial  } 
176+     else 
177+       result  =  llm . llm_model . to_llm . generate ( prompt ,  user : Discourse . system_user ) 
178+     end 
179+     result 
180+   end 
136181end 
0 commit comments