Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ node_modules
.env
evals/log
evals/cases
config/eval-llms.local.yml
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ For more information, please see: https://meta.discourse.org/t/discourse-ai/2592

### Evals

The directory `evals` contains AI evals for the Discourse AI plugin.
The directory `evals` contains AI evals for the Discourse AI plugin.
You may create a local config by copying `config/eval-llms.yml` to `config/eval-llms.local.yml` and modifying the values.

To run them use:

Expand Down
60 changes: 60 additions & 0 deletions config/eval-llms.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
llms:
gpt-4o:
display_name: GPT-4o
name: gpt-4o
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
api_key_env: OPENAI_API_KEY
provider: open_ai
url: https://api.openai.com/v1/chat/completions
max_prompt_tokens: 131072
vision_enabled: true

gpt-4o-mini:
display_name: GPT-4o-mini
name: gpt-4o-mini
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
api_key_env: OPENAI_API_KEY
provider: open_ai
url: https://api.openai.com/v1/chat/completions
max_prompt_tokens: 131072
vision_enabled: true

claude-3.5-haiku:
display_name: Claude 3.5 Haiku
name: claude-3-5-haiku-latest
tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer
api_key_env: ANTHROPIC_API_KEY
provider: anthropic
url: https://api.anthropic.com/v1/messages
max_prompt_tokens: 200000
vision_enabled: false

claude-3.5-sonnet:
display_name: Claude 3.5 Sonnet
name: claude-3-5-sonnet-latest
tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer
api_key_env: ANTHROPIC_API_KEY
provider: anthropic
url: https://api.anthropic.com/v1/messages
max_prompt_tokens: 200000
vision_enabled: true

gemini-2.0-flash:
display_name: Gemini 2.0 Flash
name: gemini-2-0-flash
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
api_key_env: GEMINI_API_KEY
provider: google
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash
max_prompt_tokens: 1000000
vision_enabled: true

gemini-2.0-pro-exp:
display_name: Gemini 2.0 pro
name: gemini-2-0-pro-exp
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
api_key_env: GEMINI_API_KEY
provider: google
url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp
max_prompt_tokens: 1000000
vision_enabled: true
115 changes: 34 additions & 81 deletions evals/lib/llm.rb
Original file line number Diff line number Diff line change
@@ -1,71 +1,23 @@
# frozen_string_literal: true

class DiscourseAi::Evals::Llm
CONFIGS = {
"gpt-4o" => {
display_name: "GPT-4o",
name: "gpt-4o",
tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
api_key_env: "OPENAI_API_KEY",
provider: "open_ai",
url: "https://api.openai.com/v1/chat/completions",
max_prompt_tokens: 131_072,
vision_enabled: true,
},
"gpt-4o-mini" => {
display_name: "GPT-4o-mini",
name: "gpt-4o-mini",
tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
api_key_env: "OPENAI_API_KEY",
provider: "open_ai",
url: "https://api.openai.com/v1/chat/completions",
max_prompt_tokens: 131_072,
vision_enabled: true,
},
"claude-3.5-haiku" => {
display_name: "Claude 3.5 Haiku",
name: "claude-3-5-haiku-latest",
tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
api_key_env: "ANTHROPIC_API_KEY",
provider: "anthropic",
url: "https://api.anthropic.com/v1/messages",
max_prompt_tokens: 200_000,
vision_enabled: false,
},
"claude-3.5-sonnet" => {
display_name: "Claude 3.5 Sonnet",
name: "claude-3-5-sonnet-latest",
tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
api_key_env: "ANTHROPIC_API_KEY",
provider: "anthropic",
url: "https://api.anthropic.com/v1/messages",
max_prompt_tokens: 200_000,
vision_enabled: true,
},
"gemini-2.0-flash" => {
display_name: "Gemini 2.0 Flash",
name: "gemini-2-0-flash",
tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
api_key_env: "GEMINI_API_KEY",
provider: "google",
url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
max_prompt_tokens: 1_000_000,
vision_enabled: true,
},
"gemini-2.0-pro-exp" => {
display_name: "Gemini 2.0 pro",
name: "gemini-2-0-pro-exp",
tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
api_key_env: "GEMINI_API_KEY",
provider: "google",
url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp",
max_prompt_tokens: 1_000_000,
vision_enabled: true,
},
}
def self.configs
return @configs if @configs

yaml_path = File.join(File.dirname(__FILE__), "../../config/eval-llms.yml")
local_yaml_path = File.join(File.dirname(__FILE__), "../../config/eval-llms.local.yml")

configs = YAML.load_file(yaml_path)["llms"] || {}
if File.exist?(local_yaml_path)
local_configs = YAML.load_file(local_yaml_path)["llms"] || {}
configs = configs.merge(local_configs)
end

@configs = configs
end

def self.print
CONFIGS
configs
.keys
.map do |config_name|
begin
Expand All @@ -79,38 +31,39 @@ def self.print
end

def self.choose(config_name)
if CONFIGS[config_name].nil?
CONFIGS
return [] unless configs
if !config_name || !configs[config_name]
configs
.keys
.map do |config_name|
.map do |name|
begin
new(config_name)
rescue => e
puts "Error initializing #{config_name}: #{e}"
new(name)
rescue StandardError
nil
end
end
.compact
elsif !CONFIGS.include?(config_name)
raise "Invalid llm"
else
[new(config_name)]
end
end

attr_reader :llm_model
attr_reader :llm_proxy
attr_reader :config_name
attr_reader :llm_model, :llm_proxy, :config_name

def initialize(config_name)
config = CONFIGS[config_name].dup
api_key_env = config.delete(:api_key_env)
if !ENV[api_key_env]
raise "Missing API key for #{config_name}, should be set via #{api_key_env}"
config = self.class.configs[config_name].dup
if config["api_key_env"]
api_key_env = config.delete("api_key_env")
unless ENV[api_key_env]
raise "Missing API key for #{config_name}, should be set via #{api_key_env}"
end
config[:api_key] = ENV[api_key_env]
elsif config["api_key"]
config[:api_key] = config.delete("api_key")
else
raise "No API key or API key env var configured for #{config_name}"
end

config[:api_key] = ENV[api_key_env]
@llm_model = LlmModel.new(config)
@llm_model = LlmModel.new(config.symbolize_keys)
@llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model)
@config_name = config_name
end
Expand Down
Loading