@@ -59,7 +59,17 @@ class DiscourseAi::Evals::Llm
5959
6060 def self . choose ( config_name )
6161 if CONFIGS [ config_name ] . nil?
62- CONFIGS . keys . map { |config_name | new ( config_name ) }
62+ CONFIGS
63+ . keys
64+ . map do |config_name |
65+ begin
66+ new ( config_name )
67+ rescue => e
68+ puts "Error initializing #{ config_name } : #{ e } "
69+ nil
70+ end
71+ end
72+ . compact
6373 elsif !CONFIGS . include? ( config_name )
6474 raise "Invalid llm"
6575 else
@@ -82,17 +92,28 @@ def initialize(config_name)
8292 @llm = DiscourseAi ::Completions ::Llm . proxy ( @llm_model )
8393 end
8494
85- def eval ( type :, args :, expected_output : nil )
95+ def eval ( type :, args :, expected_output : nil , expected_output_regex : nil )
8696 result =
8797 case type
8898 when "helper"
8999 helper ( **args )
90100 end
91101
92- if expected_output && result == expected_output
93- { result : :pass }
102+ if expected_output
103+ if result == expected_output
104+ { result : :pass }
105+ else
106+ { result : :fail , expected_output : expected_output , actual_output : result }
107+ end
108+ elsif expected_output_regex
109+ expected_output_regex = Regexp . new ( expected_output_regex )
110+ if result . match? ( expected_output_regex )
111+ { result : :pass }
112+ else
113+ { result : :fail , expected_output : expected_output_regex , actual_output : result }
114+ end
94115 else
95- { result : :fail , expected_output : expected_output , actual_output : result }
116+ { result : :unknown , actual_output : result }
96117 end
97118 end
98119
0 commit comments