Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit fcdf028

Browse files
committed
simplify code
1 parent ce7c672 commit fcdf028

File tree

2 files changed

+108
-129
lines changed

2 files changed

+108
-129
lines changed

lib/ai_helper/assistant.rb

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,16 +144,14 @@ def generate_prompt(
144144
helper_chunk = partial.read_buffered_property(schema_key)
145145
bad_json ||= partial.broken?
146146
if !helper_chunk.nil? && !helper_chunk.empty?
147-
if bad_json
147+
if bad_json || schema_type == "string" || schema_type == "array"
148148
helper_response << helper_chunk
149149
else
150-
if schema_type == "string" || schema_type == "array"
151-
helper_response << helper_chunk
152-
else
153-
helper_response = helper_chunk
154-
end
155-
block.call(helper_chunk) if block
150+
# TODO this feels a bit odd
151+
# why is this allowed to throw away potential data?
152+
helper_response = helper_chunk
156153
end
154+
block.call(helper_chunk) if block && !bad_json
157155
end
158156
elsif type.blank?
159157
# Assume response is a regular completion.
@@ -173,7 +171,6 @@ def generate_prompt(
173171
schema_type,
174172
schema_key,
175173
)
176-
p helper_response
177174
block.call(helper_response) if block
178175
end
179176
helper_response
@@ -280,7 +277,7 @@ def generate_image_caption(upload, user)
280277
Proc.new do |partial, _, type|
281278
if type == :structured_output
282279
structured_output = partial
283-
_json_summary_schema_key = bot.persona.response_format&.first.to_h
280+
bot.persona.response_format&.first.to_h
284281
end
285282
end
286283

lib/utils/best_effort_json_parser.rb

Lines changed: 102 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,149 +1,131 @@
11
# frozen_string_literal: true
22

3+
require "json"
4+
35
module DiscourseAi
46
module Utils
57
class BestEffortJsonParser
6-
def self.extract_key(helper_response, schema_type, schema_key)
7-
schema_type = schema_type.to_sym
8-
schema_key = schema_key.to_sym
8+
class << self
9+
def extract_key(helper_response, schema_type, schema_key)
10+
return helper_response unless helper_response.is_a?(String)
911

10-
return helper_response unless helper_response.is_a?(String)
12+
schema_type = schema_type.to_sym
13+
schema_key = schema_key&.to_sym
14+
cleaned = remove_markdown_fences(helper_response.strip)
1115

12-
# First attempt: try to parse after removing markdown fences
13-
cleaned = helper_response.strip
16+
parsed =
17+
try_parse(cleaned) || try_parse(fix_common_issues(cleaned)) ||
18+
manual_extract(cleaned, schema_key, schema_type)
1419

15-
# Remove markdown code fences
16-
if cleaned.match?(/^```(?:json)?\s*\n/i)
17-
cleaned = cleaned.gsub(/^```(?:json)?\s*\n/i, "").gsub(/\n```\s*$/, "")
20+
value = parsed.is_a?(Hash) ? parsed[schema_key.to_s] : parsed
21+
parsed = cast_value(value, schema_type)
1822
end
1923

20-
# Try standard JSON parse
21-
begin
22-
parsed = JSON.parse(cleaned)
23-
return extract_value(parsed, schema_key, schema_type)
24-
rescue JSON::ParserError
25-
# Continue to next attempt
24+
private
25+
26+
def remove_markdown_fences(text)
27+
return text unless text.match?(/^```(?:json)?\s*\n/i)
28+
29+
text.gsub(/^```(?:json)?\s*\n/i, "").gsub(/\n```\s*$/, "")
2630
end
2731

28-
# Second attempt: fix common JSON issues
29-
fixed_json =
30-
cleaned.gsub(/(\w+):/, '"\1":') # Fix unquoted keys
31-
.gsub(/'/, '\"') # Replace single quotes with double quotes
32+
def fix_common_issues(text)
33+
text.gsub(/(\w+):/, '"\1":').gsub(/'/, "\"")
34+
end
3235

33-
begin
34-
parsed = JSON.parse(fixed_json)
35-
return extract_value(parsed, schema_key, schema_type)
36+
def try_parse(text)
37+
JSON.parse(text)
3638
rescue JSON::ParserError
37-
# Continue to manual extraction
39+
nil
3840
end
3941

40-
# Third attempt: manual extraction based on key
41-
if schema_key
42-
key_str = schema_key.to_s
43-
44-
# Look for the key in various formats
45-
patterns = [
46-
/"#{key_str}"\s*:\s*"([^"]+)"/, # "key": "value"
47-
/'#{key_str}'\s*:\s*'([^']+)'/, # 'key': 'value'
48-
/#{key_str}\s*:\s*"([^"]+)"/, # key: "value"
49-
/#{key_str}\s*:\s*'([^']+)'/, # key: 'value'
50-
/"#{key_str}"\s*:\s*\[([^\]]+)\]/, # "key": [array]
51-
/'#{key_str}'\s*:\s*\[([^\]]+)\]/, # 'key': [array]
52-
/#{key_str}\s*:\s*\[([^\]]+)\]/, # key: [array]
53-
]
54-
55-
# For objects, handle separately to deal with nesting
56-
object_patterns = [
57-
/"#{key_str}"\s*:\s*\{/, # "key": {
58-
/'#{key_str}'\s*:\s*\{/, # 'key': {
59-
/#{key_str}\s*:\s*\{/, # key: {
60-
]
61-
62-
# Try string/array patterns first
63-
patterns.each do |pattern|
64-
if match = helper_response.match(pattern)
65-
value = match[1]
66-
67-
case schema_type
68-
when :string
69-
return value
70-
when :array
71-
begin
72-
return JSON.parse("[#{value}]")
73-
rescue StandardError
74-
# Try to split by comma and clean up
75-
items = value.split(",").map { |item| item.strip.gsub(/^['"]|['"]$/, "") }
76-
return items
77-
end
78-
end
79-
end
42+
def manual_extract(text, key, schema_type)
43+
return default_for(schema_type) unless key
44+
45+
case schema_type
46+
when :object
47+
extract_object(text, key.to_s)
48+
when :array, :string
49+
extract_scalar(text, key.to_s, schema_type)
50+
else
51+
default_for(schema_type)
8052
end
53+
end
8154

82-
# Try object patterns
83-
if schema_type == :object
84-
object_patterns.each do |pattern|
85-
if match = helper_response.match(pattern)
86-
# Find the starting brace position after the key
87-
start_pos = match.end(0) - 1 # Position of the opening brace
88-
if start_pos >= 0 && helper_response[start_pos] == "{"
89-
# Extract the full object by counting braces
90-
brace_count = 0
91-
end_pos = start_pos
92-
93-
helper_response[start_pos..-1].each_char.with_index do |char, idx|
94-
if char == "{"
95-
brace_count += 1
96-
elsif char == "}"
97-
brace_count -= 1
98-
if brace_count == 0
99-
end_pos = start_pos + idx
100-
break
101-
end
102-
end
103-
end
104-
105-
if brace_count == 0
106-
object_str = helper_response[start_pos..end_pos]
107-
begin
108-
return JSON.parse(object_str)
109-
rescue StandardError
110-
# Try to fix and parse
111-
fixed = object_str.gsub(/(\w+):/, '"\1":').gsub(/'/, '"')
112-
begin
113-
return JSON.parse(fixed)
114-
rescue StandardError
115-
return {}
116-
end
117-
end
118-
end
119-
end
120-
end
55+
def extract_scalar(text, key, schema_type)
56+
patterns =
57+
if schema_type == :array
58+
[
59+
/"#{key}"\s*:\s*\[([^\]]+)\]/,
60+
/'#{key}'\s*:\s*\[([^\]]+)\]/,
61+
/#{key}\s*:\s*\[([^\]]+)\]/,
62+
]
63+
else
64+
[
65+
/"#{key}"\s*:\s*"([^"]+)"/,
66+
/'#{key}'\s*:\s*'([^']+)'/,
67+
/#{key}\s*:\s*"([^"]+)"/,
68+
/#{key}\s*:\s*'([^']+)'/,
69+
]
12170
end
71+
72+
patterns.each do |pattern|
73+
match = text.match(pattern)
74+
next unless match
75+
76+
value = match[1]
77+
return schema_type == :array ? parse_array(value) : value
12278
end
79+
80+
default_for(schema_type)
12381
end
12482

125-
case schema_type
126-
when :array
127-
[]
128-
when :object
129-
{}
130-
else
131-
""
83+
def parse_array(value)
84+
JSON.parse("[#{value}]")
85+
rescue JSON::ParserError
86+
value.split(",").map { |item| item.strip.gsub(/^['"]|['"]$/, "") }
87+
end
88+
89+
def extract_object(text, key)
90+
pattern = /("#{key}"|'#{key}'|#{key})\s*:\s*\{/
91+
match = text.match(pattern) or return {}
92+
93+
start = match.end(0) - 1
94+
return {} unless text[start] == "{"
95+
96+
end_pos = find_matching_brace(text, start)
97+
return {} unless end_pos
98+
99+
obj_str = text[start..end_pos]
100+
try_parse(obj_str) || try_parse(fix_common_issues(obj_str)) || {}
132101
end
133-
end
134102

135-
def self.extract_value(parsed, schema_key, schema_type)
136-
return parsed unless parsed.is_a?(Hash) && schema_key
103+
def find_matching_brace(text, start_pos)
104+
brace_count = 0
137105

138-
value = parsed[schema_key.to_s]
106+
text[start_pos..-1].each_char.with_index do |char, idx|
107+
brace_count += 1 if char == "{"
108+
if char == "}"
109+
brace_count -= 1
110+
return start_pos + idx if brace_count.zero?
111+
end
112+
end
113+
nil
114+
end
115+
116+
def cast_value(value, schema_type)
117+
case schema_type
118+
when :array
119+
value.is_a?(Array) ? value : []
120+
when :object
121+
value.is_a?(Hash) ? value : {}
122+
else
123+
value.to_s
124+
end
125+
end
139126

140-
case schema_type
141-
when :array
142-
value.is_a?(Array) ? value : []
143-
when :object
144-
value.is_a?(Hash) ? value : {}
145-
else
146-
value.to_s
127+
def default_for(schema_type)
128+
schema_type == :array ? [] : schema_type == :object ? {} : ""
147129
end
148130
end
149131
end

0 commit comments

Comments
 (0)