discourse · SamSaffron · Jun 3, 2025 · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/app/jobs/regular/create_ai_reply.rb b/app/jobs/regular/create_ai_reply.rb
@@ -15,7 +15,7 @@ def execute(args)
 
         bot = DiscourseAi::Personas::Bot.as(bot_user, persona: persona.new)
 
-        DiscourseAi::AiBot::Playground.new(bot).reply_to(post)
+        DiscourseAi::AiBot::Playground.new(bot).reply_to(post, feature_name: "bot")
       rescue DiscourseAi::Personas::Bot::BOT_NOT_FOUND
         Rails.logger.warn(
           "Bot not found for post #{post.id} - perhaps persona was deleted or bot was disabled",

diff --git a/lib/personas/forum_researcher.rb b/lib/personas/forum_researcher.rb
@@ -13,43 +13,45 @@ def tools
 
       def system_prompt
         <<~PROMPT
-            You are a helpful Discourse assistant specializing in forum research.
-            You _understand_ and **generate** Discourse Markdown.
-
-            You live in the forum with the URL: {site_url}
-            The title of your site: {site_title}
-            The description is: {site_description}
-            The participants in this conversation are: {participants}
-            The date now is: {time}, much has changed since you were trained.
-            Topic URLs are formatted as: /t/-/TOPIC_ID
-            Post URLs are formatted as: /t/-/TOPIC_ID/POST_NUMBER
-
-            As a forum researcher, guide users through a structured research process:
-            1. UNDERSTAND: First clarify the user's research goal - what insights are they seeking?
-            2. PLAN: Design an appropriate research approach with specific filters
-            3. TEST: Always begin with dry_run:true to gauge the scope of results
-            4. REFINE: If results are too broad/narrow, suggest filter adjustments
-            5. EXECUTE: Run the final analysis only when filters are well-tuned
-            6. SUMMARIZE: Present findings with links to supporting evidence
-
-            BE MINDFUL: specify all research goals in one request to avoid multiple processing runs.
-
-            REMEMBER: Different filters serve different purposes:
-            - Use post date filters (after/before) for analyzing specific posts
-            - Use topic date filters (topic_after/topic_before) for analyzing entire topics
-            - Combine user/group filters with categories/tags to find specialized contributions
-
-            Always ground your analysis with links to original posts on the forum.
-
-            Research workflow best practices:
-            1. Start with a dry_run to gauge the scope (set dry_run:true)
-            2. For temporal analysis, specify explicit date ranges
-            3. For user behavior analysis, combine @username with categories or tags
-
-            - When formatting research results, format backing links clearly:
-               - When it is a good fit, link to the topic with descriptive text.
-               - When it is a good fit, link using markdown footnotes.
-          PROMPT
+          You are a helpful Discourse assistant specializing in forum research.
+          You _understand_ and **generate** Discourse Markdown.
+
+          You live in the forum with the URL: {site_url}
+          The title of your site: {site_title}
+          The description is: {site_description}
+          The participants in this conversation are: {participants}
+          The date now is: {time}, much has changed since you were trained.
+          Topic URLs are formatted as: /t/-/TOPIC_ID
+          Post URLs are formatted as: /t/-/TOPIC_ID/POST_NUMBER
+
+          CRITICAL: Research is extremely expensive. You MUST gather ALL research goals upfront and execute them in a SINGLE request. Never run multiple research operations.
+
+          As a forum researcher, follow this structured process:
+          1. UNDERSTAND: Clarify ALL research goals - what insights are they seeking?
+          2. PLAN: Design ONE comprehensive research approach covering all objectives
+          3. TEST: Always begin with dry_run:true to gauge the scope of results
+          4. REFINE: If results are too broad/narrow, suggest filter adjustments (but don't re-run yet)
+          5. EXECUTE: Run the final analysis ONCE when filters are well-tuned for all goals
+          6. SUMMARIZE: Present findings with links to supporting evidence
+
+          Before any research, ask users to specify:
+          - ALL research questions they want answered
+          - Time periods of interest
+          - Specific users, categories, or tags to focus on
+          - Expected scope (broad overview vs. deep dive)
+
+          Research filter guidelines:
+          - Use post date filters (after/before) for analyzing specific posts
+          - Use topic date filters (topic_after/topic_before) for analyzing entire topics
+          - Combine user/group filters with categories/tags to find specialized contributions
+
+          When formatting results:
+          - Link to topics with descriptive text when relevant
+          - Use markdown footnotes for supporting evidence
+          - Always ground analysis with links to original forum posts
+
+          Remember: ONE research request should answer ALL questions. Plan comprehensively before executing.
+        PROMPT
       end
     end
   end

diff --git a/lib/personas/tool_runner.rb b/lib/personas/tool_runner.rb
@@ -13,6 +13,9 @@ class ToolRunner
       MARSHAL_STACK_DEPTH = 20
       MAX_HTTP_REQUESTS = 20
 
+      MAX_SLEEP_CALLS = 30
+      MAX_SLEEP_DURATION_MS = 60_000
+
       def initialize(parameters:, llm:, bot_user:, context: nil, tool:, timeout: nil)
         if context && !context.is_a?(DiscourseAi::Personas::BotContext)
           raise ArgumentError, "context must be a BotContext object"
@@ -28,6 +31,7 @@ def initialize(parameters:, llm:, bot_user:, context: nil, tool:, timeout: nil)
         @timeout = timeout || DEFAULT_TIMEOUT
         @running_attached_function = false
 
+        @sleep_calls_made = 0
         @http_requests_made = 0
       end
 
@@ -44,6 +48,7 @@ def mini_racer_context
             attach_index(ctx)
             attach_upload(ctx)
             attach_chain(ctx)
+            attach_sleep(ctx)
             attach_discourse(ctx)
             ctx.eval(framework_script)
             ctx
@@ -73,6 +78,9 @@ def framework_script
         const upload = {
           create: _upload_create,
           getUrl: _upload_get_url,
+          getBase64: function(id, maxPixels) {
+            return _upload_get_base64(id, maxPixels);
+          }
         }
 
         const chain = {
@@ -310,6 +318,33 @@ def attach_chain(mini_racer_context)
         mini_racer_context.attach("_chain_set_custom_raw", ->(raw) { self.custom_raw = raw })
       end
 
+      # this is useful for polling apis
+      def attach_sleep(mini_racer_context)
+        mini_racer_context.attach(
+          "sleep",
+          ->(duration_ms) do
+            @sleep_calls_made += 1
+            if @sleep_calls_made > MAX_SLEEP_CALLS
+              raise TooManyRequestsError.new("Tool made too many sleep calls")
+            end
+
+            duration_ms = duration_ms.to_i
+            if duration_ms > MAX_SLEEP_DURATION_MS
+              raise ArgumentError.new(
+                      "Sleep duration cannot exceed #{MAX_SLEEP_DURATION_MS}ms (1 minute)",
+                    )
+            end
+
+            raise ArgumentError.new("Sleep duration must be positive") if duration_ms <= 0
+
+            in_attached_function do
+              sleep(duration_ms / 1000.0)
+              { slept: duration_ms }
+            end
+          end,
+        )
+      end
+
       def attach_discourse(mini_racer_context)
         mini_racer_context.attach(
           "_discourse_get_post",
@@ -571,6 +606,42 @@ def attach_discourse(mini_racer_context)
       end
 
       def attach_upload(mini_racer_context)
+        mini_racer_context.attach(
+          "_upload_get_base64",
+          ->(upload_id_or_url, max_pixels) do
+            in_attached_function do
+              return nil if upload_id_or_url.blank?
+
+              upload = nil
+
+              # Handle both upload ID and short URL
+              if upload_id_or_url.to_s.start_with?("upload://")
+                # Handle short URL format
+                sha1 = Upload.sha1_from_short_url(upload_id_or_url)
+                return nil if sha1.blank?
+                upload = Upload.find_by(sha1: sha1)
+              else
+                # Handle numeric ID
+                upload_id = upload_id_or_url.to_i
+                return nil if upload_id <= 0
+                upload = Upload.find_by(id: upload_id)
+              end
+
+              return nil if upload.nil?
+
+              max_pixels = max_pixels&.to_i
+              max_pixels = nil if max_pixels && max_pixels <= 0
+
+              encoded_uploads =
+                DiscourseAi::Completions::UploadEncoder.encode(
+                  upload_ids: [upload.id],
+                  max_pixels: max_pixels || 10_000_000, # Default to 10M pixels if not specified
+                )
+
+              encoded_uploads.first&.dig(:base64)
+            end
+          end,
+        )
         mini_racer_context.attach(
           "_upload_get_url",
           ->(short_url) do
@@ -629,13 +700,18 @@ def attach_http(mini_racer_context)
 
               in_attached_function do
                 headers = (options && options["headers"]) || {}
+                base64_encode = options && options["base64Encode"]
 
                 result = {}
                 DiscourseAi::Personas::Tools::Tool.send_http_request(
                   url,
                   headers: headers,
                 ) do |response|
-                  result[:body] = response.body
+                  if base64_encode
+                    result[:body] = Base64.strict_encode64(response.body)
+                  else
+                    result[:body] = response.body
+                  end
                   result[:status] = response.code.to_i
                 end
 
@@ -658,6 +734,7 @@ def attach_http(mini_racer_context)
                 in_attached_function do
                   headers = (options && options["headers"]) || {}
                   body = options && options["body"]
+                  base64_encode = options && options["base64Encode"]
 
                   result = {}
                   DiscourseAi::Personas::Tools::Tool.send_http_request(
@@ -666,7 +743,11 @@ def attach_http(mini_racer_context)
                     headers: headers,
                     body: body,
                   ) do |response|
-                    result[:body] = response.body
+                    if base64_encode
+                      result[:body] = Base64.strict_encode64(response.body)
+                    else
+                      result[:body] = response.body
+                    end
                     result[:status] = response.code.to_i
                   end
 

diff --git a/lib/personas/tools/researcher.rb b/lib/personas/tools/researcher.rb
@@ -33,19 +33,24 @@ def filter_description
             <<~TEXT
               Filter string to target specific content.
               - Supports user (@username)
+              - post_type:first - only includes first posts in topics
+              - post_type:reply - only replies in topics
               - date ranges (after:YYYY-MM-DD, before:YYYY-MM-DD for posts; topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD for topics)
-              - categories (category:category1,category2)
-              - tags (tag:tag1,tag2)
-              - groups (group:group1,group2).
+              - categories (category:category1,category2 or categories:category1,category2)
+              - tags (tag:tag1,tag2 or tags:tag1,tag2)
+              - groups (group:group1,group2 or groups:group1,group2)
               - status (status:open, status:closed, status:archived, status:noreplies, status:single_user)
-              - keywords (keywords:keyword1,keyword2) - specific words to search for in posts
-              - max_results (max_results:10) the maximum number of results to return (optional)
-              - order (order:latest, order:oldest, order:latest_topic, order:oldest_topic) - the order of the results (optional)
-              - topic (topic:topic_id1,topic_id2) - add specific topics to the filter, topics will unconditionally be included
+              - keywords (keywords:keyword1,keyword2) - searches for specific words within post content using full-text search
+              - topic_keywords (topic_keywords:keyword1,keyword2) - searches for keywords within topics, returns all posts from matching topics
+              - topics (topic:topic_id1,topic_id2 or topics:topic_id1,topic_id2) - target specific topics by ID
+              - max_results (max_results:10) - limits the maximum number of results returned (optional)
+              - order (order:latest, order:oldest, order:latest_topic, order:oldest_topic, order:likes) - controls result ordering (optional, defaults to latest posts)
 
-              If multiple tags or categories are specified, they are treated as OR conditions.
+              Multiple filters can be combined with spaces for AND logic. Example: '@sam after:2023-01-01 tag:feature'
 
-              Multiple filters can be combined with spaces. Example: '@sam after:2023-01-01 tag:feature'
+              Use OR to combine filter segments for inclusive logic.
+              Example: 'category:feature,bug OR tag:feature-tag' - includes posts in feature OR bug categories, OR posts with feature-tag tag
+              Example: '@sam category:bug' - includes posts by @sam AND in bug category
             TEXT
           end
 
@@ -145,10 +150,23 @@ def process_filter(filter, goals, post, &blk)
           results = []
 
           formatter.each_chunk { |chunk| results << run_inference(chunk[:text], goals, post, &blk) }
-          { dry_run: false, goals: goals, filter: @filter, results: results }
+
+          if context.cancel_manager&.cancelled?
+            {
+              dry_run: false,
+              goals: goals,
+              filter: @filter,
+              results: "Cancelled by user",
+              cancelled_by_user: true,
+            }
+          else
+            { dry_run: false, goals: goals, filter: @filter, results: results }
+          end
         end
 
         def run_inference(chunk_text, goals, post, &blk)
+          return if context.cancel_manager&.cancelled?
+
           system_prompt = goal_system_prompt(goals)
           user_prompt = goal_user_prompt(goals, chunk_text)