Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 7e72a32

Browse files
committed
improve eval output
1 parent 72559c7 commit 7e72a32

File tree

2 files changed

+41
-5
lines changed

2 files changed

+41
-5
lines changed

evals/lib/eval.rb

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ def run(llm:)
8585
{ result: :pass }
8686
end
8787
else
88-
{ result: :unknown, actual_output: result }
88+
{ result: :pass }
8989
end
90-
rescue EvalError
91-
{ result: :fail }
90+
rescue EvalError => e
91+
{ result: :fail, message: e.message, context: e.context }
9292
end
9393

9494
def print
@@ -218,10 +218,39 @@ def edit_artifact(llm, css_path:, js_path:, html_path:, instructions_path:)
218218
raise EvalError.new("Failed to apply all changes", diff.failed_searches)
219219
end
220220

221+
raise EvalError.new("Invalid JS", artifact.js) if !valid_javascript?(artifact.js)
222+
221223
version = artifact.versions.last
222224
output = { css: version.css, js: version.js, html: version.html }
223225

224226
artifact.destroy
225227
output
226228
end
229+
230+
def valid_javascript?(str)
231+
require "open3"
232+
233+
# Create a temporary file with the JavaScript code
234+
Tempfile.create(%w[test .js]) do |f|
235+
f.write(str)
236+
f.flush
237+
238+
File.write("/tmp/test.js", str)
239+
240+
begin
241+
Discourse::Utils.execute_command(
242+
"node",
243+
"--check",
244+
f.path,
245+
failure_message: "Invalid JavaScript syntax",
246+
timeout: 30, # reasonable timeout in seconds
247+
)
248+
true
249+
rescue Discourse::Utils::CommandError
250+
false
251+
end
252+
end
253+
rescue StandardError
254+
false
255+
end
227256
end

evals/lib/runner.rb

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,16 @@ def run!
155155

156156
if result[:result] == :fail
157157
puts "Failed 🔴"
158-
puts "---- Expected ----\n#{result[:expected_output]}"
159-
puts "---- Actual ----\n#{result[:actual_output]}"
158+
puts "Error: #{result[:message]}" if result[:message]
159+
if result[:expected_output] && result[:actual_output]
160+
puts "---- Expected ----\n#{result[:expected_output]}"
161+
puts "---- Actual ----\n#{result[:actual_output]}"
162+
end
160163
logger.error("Evaluation failed with LLM: #{llm.name}")
164+
logger.error("Error: #{result[:message]}") if result[:message]
165+
logger.error("Expected: #{result[:expected_output]}") if result[:expected_output]
166+
logger.error("Actual: #{result[:actual_output]}") if result[:actual_output]
167+
logger.error("Context: #{result[:context]}") if result[:context]
161168
elsif result[:result] == :pass
162169
puts "Passed 🟢"
163170
logger.info("Evaluation passed with LLM: #{llm.name}")

0 commit comments

Comments
 (0)