FIX: make AI helper more robust (#1484)

SamSaffron · web-flow · commit ab5edae1216f · 2025-07-04T14:47:11.000+10:00
* FIX: make AI helper more robust

- If JSON is broken for structured output then lean on a more forgiving parser
- Gemini 2.5 flash does not support temp, support opting out
- Evals for assistant were broken, fix interface
- Add some missing LLMs
- Translator was not mapped correctly to the feature - fix that
- Don't mix XML in prompt for translator

* lint

* correct logic

* simplify code

* implement best effort json parsing direct in the structured output object
diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
@@ -65,6 +65,8 @@ def self.provider_params
       google: {
         disable_native_tools: :checkbox,
         enable_thinking: :checkbox,
+        disable_temperature: :checkbox,
+        disable_top_p: :checkbox,
         thinking_tokens: :number,
       },
       azure: {
diff --git a/config/eval-llms.yml b/config/eval-llms.yml
@@ -1,4 +1,27 @@
 llms:
+  o3:
+    display_name: O3
+    name: o3
+    tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
+    api_key_env: OPENAI_API_KEY
+    provider: open_ai
+    url: https://api.openai.com/v1/chat/completions
+    max_prompt_tokens: 131072
+    vision_enabled: true
+    provider_params:
+      disable_top_p: true
+      disable_temperature: true
+
+  gpt-41:
+    display_name: GPT-4.1
+    name: gpt-4.1
+    tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer
+    api_key_env: OPENAI_API_KEY
+    provider: open_ai
+    url: https://api.openai.com/v1/chat/completions
+    max_prompt_tokens: 131072
+    vision_enabled: true
+
   gpt-4o:
     display_name: GPT-4o
     name: gpt-4o
@@ -74,12 +97,25 @@ llms:
     max_prompt_tokens: 1000000
     vision_enabled: true
 
-  gemini-2.0-pro-exp:
+  gemini-2.5-flash:
+    display_name: Gemini 2.5 Flash
+    name: gemini-2-5-flash
+    tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
+    api_key_env: GEMINI_API_KEY
+    provider: google
+    url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash
+    max_prompt_tokens: 1000000
+    vision_enabled: true
+    provider_params:
+      disable_top_p: true
+      disable_temperature: true
+
+  gemini-2.0-pro:
     display_name: Gemini 2.0 pro
-    name: gemini-2-0-pro-exp
+    name: gemini-2-0-pro
     tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer
     api_key_env: GEMINI_API_KEY
     provider: google
-    url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp
+    url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro
     max_prompt_tokens: 1000000
     vision_enabled: true
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -249,6 +249,7 @@ en:
           markdown_tables: "Generate Markdown table"
           custom_prompt: "Custom prompt"
           image_caption: "Caption images"
+          translator: "Translator"
 
         translation:
           name: "Translation"
@@ -257,7 +258,7 @@ en:
           post_raw_translator: "Post raw translator"
           topic_title_translator: "Topic title translator"
           short_text_translator: "Short text translator"
-        
+
         spam:
           name: "Spam"
           description: "Identifies potential spam using the selected LLM and flags it for site moderators to inspect in the review queue"
diff --git a/evals/lib/eval.rb b/evals/lib/eval.rb
@@ -200,12 +200,7 @@ class << user
       user.admin = true
     end
     result =
-      helper.generate_and_send_prompt(
-        name,
-        input,
-        current_user = user,
-        _force_default_locale = false,
-      )
+      helper.generate_and_send_prompt(name, input, current_user = user, force_default_locale: false)
 
     result[:suggestions].first
   end
diff --git a/lib/ai_helper/assistant.rb b/lib/ai_helper/assistant.rb
@@ -82,7 +82,7 @@ def attach_user_context(context, user = nil, force_default_locale: false)
         context.user_language = "#{locale_hash["name"]}"
 
         if user
-          timezone = user.user_option.timezone || "UTC"
+          timezone = user&.user_option&.timezone || "UTC"
           current_time = Time.now.in_time_zone(timezone)
 
           temporal_context = {
@@ -126,21 +126,29 @@ def generate_prompt(
           )
         context = attach_user_context(context, user, force_default_locale: force_default_locale)
 
-        helper_response = +""
+        bad_json = false
+        json_summary_schema_key = bot.persona.response_format&.first.to_h
+
+        schema_key = json_summary_schema_key["key"]&.to_sym
+        schema_type = json_summary_schema_key["type"]
+
+        if schema_type == "array"
+          helper_response = []
+        else
+          helper_response = +""
+        end
 
         buffer_blk =
           Proc.new do |partial, _, type|
-            json_summary_schema_key = bot.persona.response_format&.first.to_h
-            helper_response = [] if json_summary_schema_key["type"] == "array"
-            if type == :structured_output
-              helper_chunk = partial.read_buffered_property(json_summary_schema_key["key"]&.to_sym)
+            if type == :structured_output && schema_type
+              helper_chunk = partial.read_buffered_property(schema_key)
               if !helper_chunk.nil? && !helper_chunk.empty?
-                if json_summary_schema_key["type"] != "array"
-                  helper_response = helper_chunk
-                else
+                if schema_type == "string" || schema_type == "array"
                   helper_response << helper_chunk
+                else
+                  helper_response = helper_chunk
                 end
-                block.call(helper_chunk) if block
+                block.call(helper_chunk) if block && !bad_json
               end
             elsif type.blank?
               # Assume response is a regular completion.
@@ -255,7 +263,7 @@ def generate_image_caption(upload, user)
           Proc.new do |partial, _, type|
             if type == :structured_output
               structured_output = partial
-              json_summary_schema_key = bot.persona.response_format&.first.to_h
+              bot.persona.response_format&.first.to_h
             end
           end
 
@@ -287,6 +295,11 @@ def build_bot(helper_mode, user)
       end
 
       def find_ai_helper_model(helper_mode, persona_klass)
+        if helper_mode == IMAGE_CAPTION && @image_caption_llm.is_a?(LlmModel)
+          return @image_caption_llm
+        end
+
+        return @helper_llm if helper_mode != IMAGE_CAPTION && @helper_llm.is_a?(LlmModel)
         self.class.find_ai_helper_model(helper_mode, persona_klass)
       end
 
@@ -299,9 +312,9 @@ def self.find_ai_helper_model(helper_mode, persona_klass)
 
         if !model_id
           if helper_mode == IMAGE_CAPTION
-            model_id = @helper_llm || SiteSetting.ai_helper_image_caption_model&.split(":")&.last
+            model_id = SiteSetting.ai_helper_image_caption_model&.split(":")&.last
           else
-            model_id = @image_caption_llm || SiteSetting.ai_helper_model&.split(":")&.last
+            model_id = SiteSetting.ai_helper_model&.split(":")&.last
           end
         end
 
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
@@ -187,10 +187,10 @@ def perform_completion!(
                 blk =
                   lambda do |partial|
                     if partial.is_a?(String)
-                      partial = xml_stripper << partial if xml_stripper
+                      partial = xml_stripper << partial if xml_stripper && !partial.empty?
 
                       if structured_output.present?
-                        structured_output << partial
+                        structured_output << partial if !partial.empty?
                         partial = structured_output
                       end
                     end
@@ -252,6 +252,15 @@ def perform_completion!(
               end
               xml_tool_processor.finish.each { |partial| blk.call(partial) } if xml_tool_processor
               decode_chunk_finish.each { |partial| blk.call(partial) }
+
+              if structured_output
+                structured_output.finish
+                if structured_output.broken?
+                  # signal last partial output which will get parsed
+                  # by best effort json parser
+                  blk.call("")
+                end
+              end
               return response_data
             ensure
               if log
@@ -448,6 +457,7 @@ def non_streaming_response(
 
           if structured_output.present?
             response_data.each { |data| structured_output << data if data.is_a?(String) }
+            structured_output.finish
 
             return structured_output
           end
diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb
@@ -33,7 +33,8 @@ def normalize_model_params(model_params)
 
           model_params[:topP] = model_params.delete(:top_p) if model_params[:top_p]
 
-          # temperature already supported
+          model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature")
+          model_params.delete(:topP) if llm_model.lookup_custom_param("disable_top_p")
 
           model_params
         end
diff --git a/lib/completions/json_streaming_parser.rb b/lib/completions/json_streaming_parser.rb
@@ -53,6 +53,7 @@ def initialize
         #
         # Returns a UTF-8 encoded String.
         def <<(data)
+          data = data.dup if data.frozen?
           # Avoid state machine for complete UTF-8.
           if @buffer.empty?
             data.force_encoding(Encoding::UTF_8)
diff --git a/lib/completions/structured_output.rb b/lib/completions/structured_output.rb
@@ -17,23 +17,48 @@ def initialize(json_schema_properties)
         @raw_cursor = 0
 
         @partial_json_tracker = JsonStreamingTracker.new(self)
+
+        @type_map = {}
+        json_schema_properties.each { |name, prop| @type_map[name.to_sym] = prop[:type].to_sym }
+
+        @done = false
+      end
+
+      def to_s
+        # we may want to also normalize the JSON here for the broken case
+        @raw_response
       end
 
       attr_reader :last_chunk_buffer
 
       def <<(raw)
+        raise "Cannot append to a completed StructuredOutput" if @done
         @raw_response << raw
         @partial_json_tracker << raw
       end
 
+      def finish
+        @done = true
+      end
+
+      def broken?
+        @partial_json_tracker.broken?
+      end
+
       def read_buffered_property(prop_name)
-        # Safeguard: If the model is misbehaving and generating something that's not a JSON,
-        # treat response as a normal string.
-        # This is a best-effort to recover from an unexpected scenario.
         if @partial_json_tracker.broken?
-          unread_chunk = @raw_response[@raw_cursor..]
-          @raw_cursor = @raw_response.length
-          return unread_chunk
+          if @done
+            return nil if @type_map[prop_name.to_sym].nil?
+            return(
+              DiscourseAi::Utils::BestEffortJsonParser.extract_key(
+                @raw_response,
+                @type_map[prop_name.to_sym],
+                prop_name,
+              )
+            )
+          else
+            return nil
+          end
         end
 
         # Maybe we haven't read that part of the JSON yet.
diff --git a/lib/configuration/feature.rb b/lib/configuration/feature.rb
@@ -103,6 +103,12 @@ def ai_helper_features
               DiscourseAi::Configuration::Module::AI_HELPER_ID,
               DiscourseAi::Configuration::Module::AI_HELPER,
             ),
+            new(
+              "translator",
+              "ai_helper_translator_persona",
+              DiscourseAi::Configuration::Module::AI_HELPER_ID,
+              DiscourseAi::Configuration::Module::AI_HELPER,
+            ),
             new(
               "custom_prompt",
               "ai_helper_custom_prompt_persona",
diff --git a/lib/personas/translator.rb b/lib/personas/translator.rb
@@ -19,11 +19,12 @@ def system_prompt
 
           Format your response as a JSON object with a single key named "output", which has the translation as the value.
           Your output should be in the following format:
-            <output>
-              {"output": "xx"}
-            </output>
+
+          {"output": "xx"}
 
           Where "xx" is replaced by the translation.
+
+          reply with valid JSON only
         PROMPT
       end
 
diff --git a/lib/utils/best_effort_json_parser.rb b/lib/utils/best_effort_json_parser.rb
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
diff --git a/spec/lib/completions/structured_output_spec.rb b/spec/lib/completions/structured_output_spec.rb
diff --git a/spec/lib/utils/best_effort_json_parser_spec.rb b/spec/lib/utils/best_effort_json_parser_spec.rb

Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@ def initialize`
`53`	`53`	`#`
`54`	`54`	`# Returns a UTF-8 encoded String.`
`55`	`55`	`def <<(data)`
	`56`	`+ data = data.dup if data.frozen?`
`56`	`57`	`# Avoid state machine for complete UTF-8.`
`57`	`58`	`if @buffer.empty?`
`58`	`59`	`data.force_encoding(Encoding::UTF_8)`