preserve thinking context across turns

SamSaffron · SamSaffron · commit a3160509a0b2 · 2025-03-03T14:48:39.000+11:00
diff --git a/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs b/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs
@@ -168,12 +168,14 @@ export default class BotSelector extends Component {
       .filter((bot) => !bot.is_persona)
       .filter(Boolean);
 
-    return availableBots.map((bot) => {
-      return {
-        id: bot.id,
-        name: bot.display_name,
-      };
-    });
+    return availableBots
+      .map((bot) => {
+        return {
+          id: bot.id,
+          name: bot.display_name,
+        };
+      })
+      .sort((a, b) => a.name.localeCompare(b.name));
   }
 
   <template>
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
@@ -261,6 +261,7 @@ en:
     ai_bot:
       reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]"
       default_pm_prefix: "[Untitled AI bot PM]"
+      thinking: "Thinking..."
       personas:
         default_llm_required: "Default LLM model is required prior to enabling Chat"
         cannot_delete_system_persona: "System personas cannot be deleted, please disable it instead"
diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
@@ -7,7 +7,8 @@ class Bot
 
       BOT_NOT_FOUND = Class.new(StandardError)
       MAX_COMPLETIONS = 5
-      MAX_TOOLS = 5
+      # limit is arbitrary, but 5 which was used in the past was too low
+      MAX_TOOLS = 20
 
       def self.as(bot_user, persona: DiscourseAi::AiBot::Personas::General.new, model: nil)
         new(bot_user, persona, model)
@@ -117,6 +118,7 @@ def reply(context, &update_blk)
               prompt,
               feature_name: "bot",
               partial_tool_calls: allow_partial_tool_calls,
+              output_thinking: true,
               **llm_kwargs,
             ) do |partial, cancel|
               tool =
@@ -158,26 +160,68 @@ def reply(context, &update_blk)
                 if partial.is_a?(DiscourseAi::Completions::ToolCall)
                   Rails.logger.warn("DiscourseAi: Tool not found: #{partial.name}")
                 else
-                  update_blk.call(partial, cancel)
+                  if partial.is_a?(DiscourseAi::Completions::Thinking)
+                    if partial.partial? && partial.message.present?
+                      update_blk.call(partial.message, cancel, nil, :thinking)
+                    end
+                    if !partial.partial?
+                      # this will be dealt with later
+                      raw_context << partial
+                    end
+                  else
+                    update_blk.call(partial, cancel)
+                  end
                 end
               end
             end
 
           if !tool_found
             ongoing_chain = false
-            raw_context << [result, bot_user.username]
+            text = result
+
+            # we must strip out thinking
+            if result.is_a?(Array)
+              text = +""
+              result.each { |item| text << item if item.is_a?(String) }
+            end
+            raw_context << [text, bot_user.username]
           end
+
           total_completions += 1
 
           # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
           prompt.tools = [] if total_completions == MAX_COMPLETIONS
         end
 
-        raw_context
+        embed_thinking(raw_context)
       end
 
       private
 
+      def embed_thinking(raw_context)
+        embedded_thinking = []
+        thinking_info = nil
+        raw_context.each do |context|
+          if context.is_a?(DiscourseAi::Completions::Thinking)
+            thinking_info ||= {}
+            if context.redacted
+              thinking_info[:redacted_thinking_signature] = context.signature
+            else
+              thinking_info[:thinking] = context.message
+              thinking_info[:thinking_signature] = context.signature
+            end
+          else
+            if thinking_info
+              context = context.dup
+              context[4] = thinking_info
+            end
+            embedded_thinking << context
+          end
+        end
+
+        embedded_thinking
+      end
+
       def process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
         tool_call_id = tool.tool_call_id
         invocation_result_json = invoke_tool(tool, llm, cancel, context, &update_blk).to_json
diff --git a/lib/ai_bot/playground.rb b/lib/ai_bot/playground.rb
@@ -220,6 +220,9 @@ def conversation_context(post)
                 custom_context[:id] = message[1] if custom_context[:type] != :model
                 custom_context[:name] = message[3] if message[3]
 
+                thinking = message[4]
+                custom_context[:thinking] = thinking if thinking
+
                 builder.push(**custom_context)
               end
             end
@@ -473,8 +476,20 @@ def reply_to(post, custom_instructions: nil, &blk)
 
         post_streamer = PostStreamer.new(delay: Rails.env.test? ? 0 : 0.5) if stream_reply
 
+        started_thinking = false
+
         new_custom_prompts =
           bot.reply(context) do |partial, cancel, placeholder, type|
+            if type == :thinking && !started_thinking
+              reply << "<details><summary>#{I18n.t("discourse_ai.ai_bot.thinking")}</summary>"
+              started_thinking = true
+            end
+
+            if type != :thinking && started_thinking
+              reply << "</details>\n\n"
+              started_thinking = false
+            end
+
             reply << partial
             raw = reply.dup
             raw << "\n\n" << placeholder if placeholder.present?
@@ -527,8 +542,10 @@ def reply_to(post, custom_instructions: nil, &blk)
             )
         end
 
-        # we do not need to add a custom prompt for a single reply
-        if new_custom_prompts.length > 1
+        # a bit messy internally, but this is how we tell
+        is_thinking = new_custom_prompts.any? { |prompt| prompt[4].present? }
+
+        if is_thinking || new_custom_prompts.length > 1
           reply_post.post_custom_prompt ||= reply_post.build_post_custom_prompt(custom_prompt: [])
           prompt = reply_post.post_custom_prompt.custom_prompt || []
           prompt.concat(new_custom_prompts)
diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb
@@ -29,7 +29,8 @@ def perform_completion!(
           model_params,
           feature_name: nil,
           feature_context: nil,
-          partial_tool_calls: false
+          partial_tool_calls: false,
+          output_thinking: false
         )
           @dialect = dialect
           @model_params = model_params
@@ -51,6 +52,8 @@ def perform_completion!(
             as_array.each do |response|
               if is_tool?(response)
                 yield(response, cancel_fn)
+              elsif is_thinking?(response)
+                yield(response, cancel_fn)
               else
                 response.each_char do |char|
                   break if cancelled
@@ -70,6 +73,10 @@ def tokenizer
 
         private
 
+        def is_thinking?(response)
+          response.is_a?(DiscourseAi::Completions::Thinking)
+        end
+
         def is_tool?(response)
           response.is_a?(DiscourseAi::Completions::ToolCall)
         end
diff --git a/lib/completions/prompt_messages_builder.rb b/lib/completions/prompt_messages_builder.rb
@@ -102,7 +102,7 @@ def to_a(limit: nil, style: nil)
         end
       end
 
-      def push(type:, content:, name: nil, upload_ids: nil, id: nil)
+      def push(type:, content:, name: nil, upload_ids: nil, id: nil, thinking: nil)
         if !%i[user model tool tool_call system].include?(type)
           raise ArgumentError, "type must be either :user, :model, :tool, :tool_call or :system"
         end
@@ -112,6 +112,15 @@ def push(type:, content:, name: nil, upload_ids: nil, id: nil)
         message[:name] = name.to_s if name
         message[:upload_ids] = upload_ids if upload_ids
         message[:id] = id.to_s if id
+        if thinking
+          message[:thinking] = thinking["thinking"] if thinking["thinking"]
+          message[:thinking_signature] = thinking["thinking_signature"] if thinking[
+            "thinking_signature"
+          ]
+          message[:redacted_thinking_signature] = thinking[
+            "redacted_thinking_signature"
+          ] if thinking["redacted_thinking_signature"]
+        end
 
         @raw_messages << message
       end
diff --git a/lib/completions/thinking.rb b/lib/completions/thinking.rb
@@ -12,6 +12,10 @@ def initialize(message:, signature: nil, redacted: false, partial: false)
         @partial = partial
       end
 
+      def partial?
+        !!@partial
+      end
+
       def ==(other)
         message == other.message && signature == other.signature && redacted == other.redacted &&
           partial == other.partial
diff --git a/spec/lib/modules/ai_bot/playground_spec.rb b/spec/lib/modules/ai_bot/playground_spec.rb
@@ -828,6 +828,59 @@
   end
 
   describe "#reply_to" do
+    it "preserves thinking context between replies" do
+      thinking_progress =
+        DiscourseAi::Completions::Thinking.new(message: "I should say hello", partial: true)
+      thinking =
+        DiscourseAi::Completions::Thinking.new(
+          message: "I should say hello",
+          signature: "thinking-signature-123",
+          partial: false,
+        )
+
+      thinking_redacted =
+        DiscourseAi::Completions::Thinking.new(
+          message: nil,
+          signature: "thinking-redacted-signature-123",
+          partial: false,
+          redacted: true,
+        )
+
+      first_responses = [[thinking_progress, thinking, thinking_redacted, "Hello Sam"]]
+
+      DiscourseAi::Completions::Llm.with_prepared_responses(first_responses) do
+        playground.reply_to(third_post)
+      end
+
+      new_post = third_post.topic.reload.posts.order(:post_number).last
+      expect(new_post.raw).to include("Hello Sam")
+      expect(new_post.raw).to include("I should say hello")
+
+      post = Fabricate(:post, topic: third_post.topic, user: user, raw: "Say Cat")
+
+      prompt_detail = nil
+      # Capture the prompt to verify thinking context was included
+      DiscourseAi::Completions::Llm.with_prepared_responses(["Cat"]) do |_, _, prompts|
+        playground.reply_to(post)
+        prompt_detail = prompts.first
+      end
+
+      last_messages = prompt_detail.messages.last(2)
+
+      expect(last_messages).to eq(
+        [
+          {
+            type: :model,
+            content: "Hello Sam",
+            thinking: "I should say hello",
+            thinking_signature: "thinking-signature-123",
+            redacted_thinking_signature: "thinking-redacted-signature-123",
+          },
+          { type: :user, content: "Say Cat", id: "bruce1" },
+        ],
+      )
+    end
+
     it "streams the bot reply through MB and create a new post in the PM with a cooked responses" do
       expected_bot_response =
         "Hello this is a bot and what you just said is an interesting question"