implement best effort json parsing direct in the structured output object

SamSaffron · SamSaffron · commit 0faa98964706 · 2025-07-04T14:23:23.000+10:00
diff --git a/lib/ai_helper/assistant.rb b/lib/ai_helper/assistant.rb
@@ -142,13 +142,10 @@ def generate_prompt(
           Proc.new do |partial, _, type|
             if type == :structured_output && schema_type
               helper_chunk = partial.read_buffered_property(schema_key)
-              bad_json ||= partial.broken?
               if !helper_chunk.nil? && !helper_chunk.empty?
-                if bad_json || schema_type == "string" || schema_type == "array"
+                if schema_type == "string" || schema_type == "array"
                   helper_response << helper_chunk
                 else
-                  # TODO this feels a bit odd
-                  # why is this allowed to throw away potential data?
                   helper_response = helper_chunk
                 end
                 block.call(helper_chunk) if block && !bad_json
@@ -162,17 +159,6 @@ def generate_prompt(
 
         bot.reply(context, &buffer_blk)
 
-        # handle edge cases where structured output is all over the place
-        if bad_json
-          helper_response = helper_response.join if helper_response.is_a?(Array)
-          helper_response =
-            DiscourseAi::Utils::BestEffortJsonParser.extract_key(
-              helper_response,
-              schema_type,
-              schema_key,
-            )
-          block.call(helper_response) if block
-        end
         helper_response
       end
 
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
@@ -187,10 +187,10 @@ def perform_completion!(
                 blk =
                   lambda do |partial|
                     if partial.is_a?(String)
-                      partial = xml_stripper << partial if xml_stripper
+                      partial = xml_stripper << partial if xml_stripper && !partial.empty?
 
                       if structured_output.present?
-                        structured_output << partial
+                        structured_output << partial if !partial.empty?
                         partial = structured_output
                       end
                     end
@@ -252,6 +252,15 @@ def perform_completion!(
               end
               xml_tool_processor.finish.each { |partial| blk.call(partial) } if xml_tool_processor
               decode_chunk_finish.each { |partial| blk.call(partial) }
+
+              if structured_output
+                structured_output.finish
+                if structured_output.broken?
+                  # signal last partial output which will get parsed
+                  # by best effort json parser
+                  blk.call("")
+                end
+              end
               return response_data
             ensure
               if log
@@ -448,6 +457,7 @@ def non_streaming_response(
 
           if structured_output.present?
             response_data.each { |data| structured_output << data if data.is_a?(String) }
+            structured_output.finish
 
             return structured_output
           end
diff --git a/lib/completions/json_streaming_parser.rb b/lib/completions/json_streaming_parser.rb
@@ -53,6 +53,7 @@ def initialize
         #
         # Returns a UTF-8 encoded String.
         def <<(data)
+          data = data.dup if data.frozen?
           # Avoid state machine for complete UTF-8.
           if @buffer.empty?
             data.force_encoding(Encoding::UTF_8)
diff --git a/lib/completions/structured_output.rb b/lib/completions/structured_output.rb
@@ -17,27 +17,48 @@ def initialize(json_schema_properties)
         @raw_cursor = 0
 
         @partial_json_tracker = JsonStreamingTracker.new(self)
+
+        @type_map = {}
+        json_schema_properties.each { |name, prop| @type_map[name.to_sym] = prop[:type].to_sym }
+
+        @done = false
+      end
+
+      def to_s
+        # we may want to also normalize the JSON here for the broken case
+        @raw_response
       end
 
       attr_reader :last_chunk_buffer
 
       def <<(raw)
+        raise "Cannot append to a completed StructuredOutput" if @done
         @raw_response << raw
         @partial_json_tracker << raw
       end
 
+      def finish
+        @done = true
+      end
+
       def broken?
         @partial_json_tracker.broken?
       end
 
       def read_buffered_property(prop_name)
-        # Safeguard: If the model is misbehaving and generating something that's not a JSON,
-        # treat response as a normal string.
-        # This is a best-effort to recover from an unexpected scenario.
         if @partial_json_tracker.broken?
-          unread_chunk = @raw_response[@raw_cursor..]
-          @raw_cursor = @raw_response.length
-          return unread_chunk
+          if @done
+            return nil if @type_map[prop_name.to_sym].nil?
+            return(
+              DiscourseAi::Utils::BestEffortJsonParser.extract_key(
+                @raw_response,
+                @type_map[prop_name.to_sym],
+                prop_name,
+              )
+            )
+          else
+            return nil
+          end
         end
 
         # Maybe we haven't read that part of the JSON yet.
diff --git a/lib/utils/best_effort_json_parser.rb b/lib/utils/best_effort_json_parser.rb
@@ -18,7 +18,8 @@ def extract_key(helper_response, schema_type, schema_key)
               manual_extract(cleaned, schema_key, schema_type)
 
           value = parsed.is_a?(Hash) ? parsed[schema_key.to_s] : parsed
-          parsed = cast_value(value, schema_type)
+
+          cast_value(value, schema_type)
         end
 
         private
@@ -119,6 +120,9 @@ def cast_value(value, schema_type)
             value.is_a?(Array) ? value : []
           when :object
             value.is_a?(Hash) ? value : {}
+          when :boolean
+            return value if [true, false, nil].include?(value)
+            value.to_s.downcase == "true"
           else
             value.to_s
           end
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -59,7 +59,7 @@ def stub_raw(chunks, body_blk: nil)
     stub.to_return(status: 200, body: chunks)
   end
 
-  def stub_streamed_response(prompt, deltas, tool_call: false)
+  def stub_streamed_response(prompt, deltas, tool_call: false, skip_body_check: false)
     chunks =
       deltas.each_with_index.map do |_, index|
         if index == (deltas.length - 1)
@@ -71,10 +71,13 @@ def stub_streamed_response(prompt, deltas, tool_call: false)
 
     chunks = (chunks.join("\n\n") << "data: [DONE]").split("")
 
-    WebMock
-      .stub_request(:post, "https://api.openai.com/v1/chat/completions")
-      .with(body: request_body(prompt, stream: true, tool_call: tool_call))
-      .to_return(status: 200, body: chunks)
+    mock = WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions")
+
+    if !skip_body_check
+      mock = mock.with(body: request_body(prompt, stream: true, tool_call: tool_call))
+    end
+
+    mock.to_return(status: 200, body: chunks)
 
     yield if block_given?
   end
@@ -401,6 +404,41 @@ def request_body(prompt, stream: false, tool_call: false)
     end
   end
 
+  describe "structured outputs" do
+    it "falls back to best-effort parsing on broken JSON responses" do
+      prompt = compliance.generic_prompt
+      deltas = ["```json\n{ message: 'hel", "lo' }"]
+
+      model_params = {
+        response_format: {
+          json_schema: {
+            schema: {
+              properties: {
+                message: {
+                  type: "string",
+                },
+              },
+            },
+          },
+        },
+      }
+
+      read_properties = []
+      open_ai_mock.with_chunk_array_support do
+        # skip body check cause of response format
+        open_ai_mock.stub_streamed_response(prompt, deltas, skip_body_check: true)
+
+        dialect = compliance.dialect(prompt: prompt)
+
+        endpoint.perform_completion!(dialect, user, model_params) do |partial|
+          read_properties << partial.read_buffered_property(:message)
+        end
+      end
+
+      expect(read_properties.join).to eq("hello")
+    end
+  end
+
   describe "disabled tool use" do
     it "can properly disable tool use with :none" do
       llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
diff --git a/spec/lib/completions/structured_output_spec.rb b/spec/lib/completions/structured_output_spec.rb
@@ -127,13 +127,31 @@
       chunks = [+"I'm not", +"a", +"JSON :)"]
 
       structured_output << chunks[0]
-      expect(structured_output.read_buffered_property(nil)).to eq("I'm not")
+      expect(structured_output.read_buffered_property(:bob)).to eq(nil)
 
       structured_output << chunks[1]
-      expect(structured_output.read_buffered_property(nil)).to eq("a")
+      expect(structured_output.read_buffered_property(:bob)).to eq(nil)
 
       structured_output << chunks[2]
-      expect(structured_output.read_buffered_property(nil)).to eq("JSON :)")
+
+      structured_output.finish
+      expect(structured_output.read_buffered_property(:bob)).to eq(nil)
+    end
+
+    it "can handle broken JSON" do
+      broken_json = <<~JSON
+        ```json
+        {
+          "message": "This is a broken JSON",
+          bool: true
+        }
+      JSON
+
+      structured_output << broken_json
+      structured_output.finish
+
+      expect(structured_output.read_buffered_property(:message)).to eq("This is a broken JSON")
+      expect(structured_output.read_buffered_property(:bool)).to eq(true)
     end
   end
 end

Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@ def initialize`
`53`	`53`	`#`
`54`	`54`	`# Returns a UTF-8 encoded String.`
`55`	`55`	`def <<(data)`
	`56`	`+ data = data.dup if data.frozen?`
`56`	`57`	`# Avoid state machine for complete UTF-8.`
`57`	`58`	`if @buffer.empty?`
`58`	`59`	`data.force_encoding(Encoding::UTF_8)`