Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit a1d4e26

Browse files
committed
improve eval output
1 parent 9796f03 commit a1d4e26

File tree

2 files changed

+41
-5
lines changed

2 files changed

+41
-5
lines changed

evals/lib/eval.rb

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ def run(llm:)
8585
{ result: :pass }
8686
end
8787
else
88-
{ result: :unknown, actual_output: result }
88+
{ result: :pass }
8989
end
90-
rescue EvalError
91-
{ result: :fail }
90+
rescue EvalError => e
91+
{ result: :fail, message: e.message, context: e.context }
9292
end
9393

9494
def print
@@ -227,10 +227,39 @@ def edit_artifact(llm, css_path:, js_path:, html_path:, instructions_path:)
227227
raise EvalError.new("Failed to apply all changes", diff.failed_searches)
228228
end
229229

230+
raise EvalError.new("Invalid JS", artifact.js) if !valid_javascript?(artifact.js)
231+
230232
version = artifact.versions.last
231233
output = { css: version.css, js: version.js, html: version.html }
232234

233235
artifact.destroy
234236
output
235237
end
238+
239+
def valid_javascript?(str)
240+
require "open3"
241+
242+
# Create a temporary file with the JavaScript code
243+
Tempfile.create(%w[test .js]) do |f|
244+
f.write(str)
245+
f.flush
246+
247+
File.write("/tmp/test.js", str)
248+
249+
begin
250+
Discourse::Utils.execute_command(
251+
"node",
252+
"--check",
253+
f.path,
254+
failure_message: "Invalid JavaScript syntax",
255+
timeout: 30, # reasonable timeout in seconds
256+
)
257+
true
258+
rescue Discourse::Utils::CommandError
259+
false
260+
end
261+
end
262+
rescue StandardError
263+
false
264+
end
236265
end

evals/lib/runner.rb

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,16 @@ def run!
155155

156156
if result[:result] == :fail
157157
puts "Failed 🔴"
158-
puts "---- Expected ----\n#{result[:expected_output]}"
159-
puts "---- Actual ----\n#{result[:actual_output]}"
158+
puts "Error: #{result[:message]}" if result[:message]
159+
if result[:expected_output] && result[:actual_output]
160+
puts "---- Expected ----\n#{result[:expected_output]}"
161+
puts "---- Actual ----\n#{result[:actual_output]}"
162+
end
160163
logger.error("Evaluation failed with LLM: #{llm.name}")
164+
logger.error("Error: #{result[:message]}") if result[:message]
165+
logger.error("Expected: #{result[:expected_output]}") if result[:expected_output]
166+
logger.error("Actual: #{result[:actual_output]}") if result[:actual_output]
167+
logger.error("Context: #{result[:context]}") if result[:context]
161168
elsif result[:result] == :pass
162169
puts "Passed 🟢"
163170
logger.info("Evaluation passed with LLM: #{llm.name}")

0 commit comments

Comments
 (0)