11#!/usr/bin/env ruby
22# frozen_string_literal: true
33
4+ # got to ensure evals are here
5+ # rubocop:disable Discourse/Plugins/NamespaceConstants
6+ EVAL_PATH = File . join ( __dir__ , "cases" )
7+ # rubocop:enable Discourse/Plugins/NamespaceConstants
8+ #
9+ if !Dir . exist? ( EVAL_PATH )
10+ puts "Evals are missing, cloning from discourse/discourse-ai-evals"
11+
12+ success =
13+ system ( "git clone [email protected] :discourse/discourse-ai-evals.git '#{ EVAL_PATH } ' 2>/dev/null" ) 14+
15+ # Fall back to HTTPS if SSH fails
16+ if !success
17+ puts "SSH clone failed, falling back to HTTPS..."
18+ success = system ( "git clone https://github.com/discourse/discourse-ai-evals.git '#{ EVAL_PATH } '" )
19+ end
20+
21+ if success
22+ puts "Successfully cloned evals repository"
23+ else
24+ abort "Failed to clone evals repository"
25+ end
26+ end
27+
428discourse_path = File . expand_path ( File . join ( File . dirname ( __FILE__ ) , "../../.." ) )
529# rubocop:disable Discourse/NoChdir
630Dir . chdir ( discourse_path )
@@ -27,10 +51,8 @@ OptionParser
2751 end
2852 . parse!
2953
30- # Ensure output directory exists
3154FileUtils . mkdir_p ( options [ :output_dir ] )
3255
33- # Load and run the specified evaluation
3456if options [ :eval_name ] . nil?
3557 puts "Error: Must specify an evaluation name with -e or --eval"
3658 exit 1
5476
5577eval_info = YAML . load_file ( cases [ options [ :eval_name ] ] ) . symbolize_keys
5678
79+ # correct relative paths in args
80+ begin
81+ eval_info [ :args ] &.each do |k , v |
82+ if k . to_sym == :path
83+ root = File . dirname ( cases [ options [ :eval_name ] ] )
84+ eval_info [ :args ] [ k ] = File . join ( root , v )
85+ end
86+ end
87+ end
88+
5789puts "Running evaluation '#{ options [ :eval_name ] } '"
5890
5991log_filename = "#{ options [ :eval_name ] } -#{ Time . now . strftime ( "%Y%m%d-%H%M%S" ) } .log"
6092logs_dir = File . join ( __dir__ , "log" )
61- FileUtils . mkdir_p ( logs_dir ) # Create directory if it doesn't exist
93+ FileUtils . mkdir_p ( logs_dir )
6294log_file = File . join ( logs_dir , log_filename )
6395
6496logger = Logger . new ( File . open ( log_file , "a" ) )
@@ -68,6 +100,11 @@ logger.info("Starting evaluation '#{options[:eval_name]}'")
68100Thread . current [ :llm_audit_log ] = logger
69101
70102llms . each do |llm |
103+ if eval_info [ :vision ] && !llm . vision?
104+ logger . info ( "Skipping LLM: #{ llm . name } as it does not support vision" )
105+ next
106+ end
107+
71108 logger . info ( "Evaluating with LLM: #{ llm . name } " )
72109 eval =
73110 llm . eval (
0 commit comments