Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions config/eval-llms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ llms:
max_prompt_tokens: 200000
vision_enabled: true

claude-3.7-sonnet:
display_name: Claude 3.7 Sonnet
name: claude-3-7-sonnet-latest
tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer
api_key_env: ANTHROPIC_API_KEY
provider: anthropic
url: https://api.anthropic.com/v1/messages
max_prompt_tokens: 200000
vision_enabled: true

gemini-2.0-flash:
display_name: Gemini 2.0 Flash
name: gemini-2-0-flash
Expand Down
14 changes: 9 additions & 5 deletions evals/lib/eval.rb
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def to_json
def judge_result(result)
prompt = judge[:prompt].dup
prompt.sub!("{{output}}", result)
prompt.sub!("{{input}}", args[:input])
args.each { |key, value| prompt.sub!("{{#{key}}}", value.to_s) }

prompt += <<~SUFFIX

Expand All @@ -145,7 +145,8 @@ def judge_result(result)
messages: [{ type: :user, content: prompt }],
)

result = judge_llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user)
result =
judge_llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user, temperature: 0)

if rating = result.match(%r{\[RATING\](\d+)\[/RATING\]})
rating = rating[1].to_i
Expand Down Expand Up @@ -219,7 +220,7 @@ def pdf_to_text(llm, path:)
upload.destroy if upload
end

def prompt_call(llm, system_prompt:, message:, tools: nil, stream: false)
def prompt_call(llm, system_prompt:, message:, temperature: nil, tools: nil, stream: false)
if tools
tools.each do |tool|
tool.symbolize_keys!
Expand All @@ -230,16 +231,19 @@ def prompt_call(llm, system_prompt:, message:, tools: nil, stream: false)
DiscourseAi::Completions::Prompt.new(
system_prompt,
messages: [{ type: :user, content: message }],
tools: tools,
)

prompt.tools = tools if tools

result = nil
if stream
result = []
llm
.llm_model
.to_llm
.generate(prompt, user: Discourse.system_user) { |partial| result << partial }
.generate(prompt, user: Discourse.system_user, temperature: temperature) do |partial|
result << partial
end
else
result = llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user)
end
Expand Down