Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 3b752d0

Browse files
committed
some attempts at improving reliability
1 parent 9fafaa4 commit 3b752d0

File tree

3 files changed

+56
-11
lines changed

3 files changed

+56
-11
lines changed

evals/lib/eval.rb

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ class DiscourseAi::Evals::Eval
1212
:expected_output_regex,
1313
:expected_tool_call
1414

15+
class EvalError < StandardError
16+
attr_reader :context
17+
18+
def initialize(message, context)
19+
super(message)
20+
@context = context
21+
end
22+
end
23+
1524
def initialize(path:)
1625
@yaml = YAML.load_file(path).symbolize_keys
1726
@path = path
@@ -78,6 +87,8 @@ def run(llm:)
7887
else
7988
{ result: :unknown, actual_output: result }
8089
end
90+
rescue EvalError
91+
{ result: :fail }
8192
end
8293

8394
def print
@@ -190,14 +201,22 @@ def edit_artifact(llm, css_path:, js_path:, html_path:, instructions_path:)
190201
)
191202

192203
post = Post.new(topic_id: 1, id: 1)
193-
DiscourseAi::AiBot::ArtifactUpdateStrategies::Diff.new(
194-
llm: llm.llm_model.to_llm,
195-
post: post,
196-
user: Discourse.system_user,
197-
artifact: artifact,
198-
artifact_version: nil,
199-
instructions: instructions,
200-
).apply
204+
diff =
205+
DiscourseAi::AiBot::ArtifactUpdateStrategies::Diff.new(
206+
llm: llm.llm_model.to_llm,
207+
post: post,
208+
user: Discourse.system_user,
209+
artifact: artifact,
210+
artifact_version: nil,
211+
instructions: instructions,
212+
)
213+
diff.apply
214+
215+
if diff.failed_searches.present?
216+
puts "Eval Errors encountered"
217+
p diff.failed_searches
218+
raise EvalError.new("Failed to apply all changes", diff.failed_searches)
219+
end
201220

202221
version = artifact.versions.last
203222
output = { css: version.css, js: version.js, html: version.html }

lib/ai_bot/artifact_update_strategies/diff.rb

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,15 @@ module DiscourseAi
33
module AiBot
44
module ArtifactUpdateStrategies
55
class Diff < Base
6+
attr_reader :failed_searches
7+
68
private
79

10+
def initialize(**kwargs)
11+
super
12+
@failed_searches = []
13+
end
14+
815
def build_prompt
916
DiscourseAi::Completions::Prompt.new(
1017
system_prompt,
@@ -58,8 +65,10 @@ def apply_changes(changes)
5865
block[:replace],
5966
)
6067
rescue DiscourseAi::Utils::DiffUtils::SimpleDiff::NoMatchError
68+
@failed_searches << { section: section, search: block[:search] }
6169
# TODO, we may need to inform caller here, LLM made a mistake which it
6270
# should correct
71+
puts "Failed to find search: #{block[:search]}"
6372
end
6473
end
6574
updated_content[section == :javascript ? :js : section] = content
@@ -112,6 +121,7 @@ def system_prompt
112121
7. When specifying a SEARCH block, ALWAYS keep it 8 lines or less, you will be interrupted and a retry will be required if you exceed this limit
113122
8. NEVER EVER ask followup questions, ALL changes must be performed in a single response, you are consumed via an API, there is no opportunity for humans in the loop
114123
9. When performing a non-contiguous search, ALWAYS use ... to denote the skipped lines
124+
10. Be mindful that ... non-contiguous search is not greedy, the following line will only match the first occurrence of the search block
115125
116126
JavaScript libraries must be sourced from the following CDNs, otherwise CSP will reject it:
117127
#{AiArtifact::ALLOWED_CDN_SOURCES.join("\n")}
@@ -157,9 +167,10 @@ def system_prompt
157167
>>>>>>> REPLACE
158168
[/CSS]
159169
160-
Example - Non contiguous search in CSS (replace all CSS with new CSS)
170+
Example - Non contiguous search in CSS (replace most CSS with new CSS)
161171
162172
Original CSS:
173+
163174
[CSS]
164175
body {
165176
color: red;
@@ -170,6 +181,9 @@ def system_prompt
170181
.alert {
171182
background-color: green;
172183
}
184+
.alert2 {
185+
background-color: green;
186+
}
173187
[/CSS]
174188
175189
[CSS]
@@ -184,7 +198,16 @@ def system_prompt
184198
}
185199
>>>>>>> REPLACE
186200
187-
This will replace the entire CSS block with the new CSS block, given that the search block is non-contiguous and unambiguous.
201+
RESULT:
202+
203+
[CSS]
204+
body {
205+
color: red;
206+
}
207+
.alert2 {
208+
background-color: green;
209+
}
210+
[/CSS]
188211
189212
PROMPT
190213
end

lib/utils/diff_utils/simple_diff.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ def non_contiguous_match_range(lines, search_lines)
8686
search_index = 0
8787

8888
lines.each_with_index do |line, idx|
89-
search_index += 1 if search_lines[search_index].strip == "..."
89+
if search_lines[search_index].strip == "..."
90+
search_index += 1
91+
break if search_lines[search_index].nil?
92+
end
9093
if line.strip == search_lines[search_index].strip
9194
first_idx ||= idx
9295
last_idx = idx

0 commit comments

Comments
 (0)