From 5f451a64866280c66d9ddd4cc66e78709b16c9fd Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 14:30:45 +1000 Subject: [PATCH 01/10] FIX: pass bot feature name when responding as bot --- app/jobs/regular/create_ai_reply.rb | 2 +- spec/lib/modules/ai_bot/playground_spec.rb | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/jobs/regular/create_ai_reply.rb b/app/jobs/regular/create_ai_reply.rb index e8a0f0ea9..14653a615 100644 --- a/app/jobs/regular/create_ai_reply.rb +++ b/app/jobs/regular/create_ai_reply.rb @@ -15,7 +15,7 @@ def execute(args) bot = DiscourseAi::Personas::Bot.as(bot_user, persona: persona.new) - DiscourseAi::AiBot::Playground.new(bot).reply_to(post) + DiscourseAi::AiBot::Playground.new(bot).reply_to(post, feature_name: "bot") rescue DiscourseAi::Personas::Bot::BOT_NOT_FOUND Rails.logger.warn( "Bot not found for post #{post.id} - perhaps persona was deleted or bot was disabled", diff --git a/spec/lib/modules/ai_bot/playground_spec.rb b/spec/lib/modules/ai_bot/playground_spec.rb index e9ad0ac35..22a6996f4 100644 --- a/spec/lib/modules/ai_bot/playground_spec.rb +++ b/spec/lib/modules/ai_bot/playground_spec.rb @@ -176,7 +176,7 @@ reply_post = nil - DiscourseAi::Completions::Llm.with_prepared_responses(responses) do |_, _, _prompt| + DiscourseAi::Completions::Llm.with_prepared_responses(responses) do new_post = Fabricate(:post, raw: "Can you use the custom tool?") reply_post = playground.reply_to(new_post) end @@ -255,14 +255,18 @@ body = "Hey @#{persona.user.username}, can you help me with this image? #{image}" prompts = nil + options = nil DiscourseAi::Completions::Llm.with_prepared_responses( ["I understood image"], - ) do |_, _, inner_prompts| + ) do |_, _, inner_prompts, inner_options| + options = inner_options post = create_post(title: "some new topic I created", raw: body) prompts = inner_prompts end + expect(options[0][:feature_name]).to eq("bot") + content = prompts[0].messages[1][:content] expect(content).to include({ upload_id: upload.id }) From ae938f51cf51e232c72504e1086b5a22d7d49012 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 14:38:03 +1000 Subject: [PATCH 02/10] FEATURE: allow tools to sleep (useful for when we are polling) --- lib/personas/tool_runner.rb | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/personas/tool_runner.rb b/lib/personas/tool_runner.rb index 309f6d252..7a1600beb 100644 --- a/lib/personas/tool_runner.rb +++ b/lib/personas/tool_runner.rb @@ -13,6 +13,9 @@ class ToolRunner MARSHAL_STACK_DEPTH = 20 MAX_HTTP_REQUESTS = 20 + MAX_SLEEP_CALLS = 30 + MAX_SLEEP_DURATION_MS = 60_000 + def initialize(parameters:, llm:, bot_user:, context: nil, tool:, timeout: nil) if context && !context.is_a?(DiscourseAi::Personas::BotContext) raise ArgumentError, "context must be a BotContext object" @@ -29,6 +32,8 @@ def initialize(parameters:, llm:, bot_user:, context: nil, tool:, timeout: nil) @running_attached_function = false @http_requests_made = 0 + @sleep_calls_made = 0 + @http_requests_made = 0 end def mini_racer_context @@ -44,6 +49,7 @@ def mini_racer_context attach_index(ctx) attach_upload(ctx) attach_chain(ctx) + attach_sleep(ctx) attach_discourse(ctx) ctx.eval(framework_script) ctx @@ -310,6 +316,33 @@ def attach_chain(mini_racer_context) mini_racer_context.attach("_chain_set_custom_raw", ->(raw) { self.custom_raw = raw }) end + # this is useful for polling apis + def attach_sleep(mini_racer_context) + mini_racer_context.attach( + "sleep", + ->(duration_ms) do + @sleep_calls_made += 1 + if @sleep_calls_made > MAX_SLEEP_CALLS + raise TooManyRequestsError.new("Tool made too many sleep calls") + end + + duration_ms = duration_ms.to_i + if duration_ms > MAX_SLEEP_DURATION_MS + raise ArgumentError.new( + "Sleep duration cannot exceed #{MAX_SLEEP_DURATION_MS}ms (1 minute)", + ) + end + + raise ArgumentError.new("Sleep duration must be positive") if duration_ms <= 0 + + in_attached_function do + sleep(duration_ms / 1000.0) + { slept: duration_ms } + end + end, + ) + end + def attach_discourse(mini_racer_context) mini_racer_context.attach( "_discourse_get_post", From b5f24a790510299b9ee1b572bc7f5c0937bf3c66 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 14:46:48 +1000 Subject: [PATCH 03/10] add a test --- spec/models/ai_tool_spec.rb | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/spec/models/ai_tool_spec.rb b/spec/models/ai_tool_spec.rb index 4e6ba5a03..6bef15d8f 100644 --- a/spec/models/ai_tool_spec.rb +++ b/spec/models/ai_tool_spec.rb @@ -676,6 +676,26 @@ def stub_embeddings end end + it "can use sleep function with limits" do + script = <<~JS + function invoke(params) { + let results = []; + for (let i = 0; i < 3; i++) { + let result = sleep(1); // 1ms sleep + results.push(result); + } + return results; + } + JS + + tool = create_tool(script: script) + runner = tool.runner({}, llm: nil, bot_user: nil) + + result = runner.invoke + + expect(result).to eq([{ "slept" => 1 }, { "slept" => 1 }, { "slept" => 1 }]) + end + describe "upload URL resolution" do it "can resolve upload short URLs to public URLs" do upload = From 39a0ef7717091e6ec62e923bfa60ad9529a37e2d Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 15:02:45 +1000 Subject: [PATCH 04/10] prompt engineering and faster exit when research is cancelled --- lib/personas/forum_researcher.rb | 76 ++++++++++++++++---------------- lib/personas/tools/researcher.rb | 15 ++++++- 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/lib/personas/forum_researcher.rb b/lib/personas/forum_researcher.rb index a94d9e46e..3381b8313 100644 --- a/lib/personas/forum_researcher.rb +++ b/lib/personas/forum_researcher.rb @@ -13,43 +13,45 @@ def tools def system_prompt <<~PROMPT - You are a helpful Discourse assistant specializing in forum research. - You _understand_ and **generate** Discourse Markdown. - - You live in the forum with the URL: {site_url} - The title of your site: {site_title} - The description is: {site_description} - The participants in this conversation are: {participants} - The date now is: {time}, much has changed since you were trained. - Topic URLs are formatted as: /t/-/TOPIC_ID - Post URLs are formatted as: /t/-/TOPIC_ID/POST_NUMBER - - As a forum researcher, guide users through a structured research process: - 1. UNDERSTAND: First clarify the user's research goal - what insights are they seeking? - 2. PLAN: Design an appropriate research approach with specific filters - 3. TEST: Always begin with dry_run:true to gauge the scope of results - 4. REFINE: If results are too broad/narrow, suggest filter adjustments - 5. EXECUTE: Run the final analysis only when filters are well-tuned - 6. SUMMARIZE: Present findings with links to supporting evidence - - BE MINDFUL: specify all research goals in one request to avoid multiple processing runs. - - REMEMBER: Different filters serve different purposes: - - Use post date filters (after/before) for analyzing specific posts - - Use topic date filters (topic_after/topic_before) for analyzing entire topics - - Combine user/group filters with categories/tags to find specialized contributions - - Always ground your analysis with links to original posts on the forum. - - Research workflow best practices: - 1. Start with a dry_run to gauge the scope (set dry_run:true) - 2. For temporal analysis, specify explicit date ranges - 3. For user behavior analysis, combine @username with categories or tags - - - When formatting research results, format backing links clearly: - - When it is a good fit, link to the topic with descriptive text. - - When it is a good fit, link using markdown footnotes. - PROMPT + You are a helpful Discourse assistant specializing in forum research. + You _understand_ and **generate** Discourse Markdown. + + You live in the forum with the URL: {site_url} + The title of your site: {site_title} + The description is: {site_description} + The participants in this conversation are: {participants} + The date now is: {time}, much has changed since you were trained. + Topic URLs are formatted as: /t/-/TOPIC_ID + Post URLs are formatted as: /t/-/TOPIC_ID/POST_NUMBER + + CRITICAL: Research is extremely expensive. You MUST gather ALL research goals upfront and execute them in a SINGLE request. Never run multiple research operations. + + As a forum researcher, follow this structured process: + 1. UNDERSTAND: Clarify ALL research goals - what insights are they seeking? + 2. PLAN: Design ONE comprehensive research approach covering all objectives + 3. TEST: Always begin with dry_run:true to gauge the scope of results + 4. REFINE: If results are too broad/narrow, suggest filter adjustments (but don't re-run yet) + 5. EXECUTE: Run the final analysis ONCE when filters are well-tuned for all goals + 6. SUMMARIZE: Present findings with links to supporting evidence + + Before any research, ask users to specify: + - ALL research questions they want answered + - Time periods of interest + - Specific users, categories, or tags to focus on + - Expected scope (broad overview vs. deep dive) + + Research filter guidelines: + - Use post date filters (after/before) for analyzing specific posts + - Use topic date filters (topic_after/topic_before) for analyzing entire topics + - Combine user/group filters with categories/tags to find specialized contributions + + When formatting results: + - Link to topics with descriptive text when relevant + - Use markdown footnotes for supporting evidence + - Always ground analysis with links to original forum posts + + Remember: ONE research request should answer ALL questions. Plan comprehensively before executing. + PROMPT end end end diff --git a/lib/personas/tools/researcher.rb b/lib/personas/tools/researcher.rb index d8221a5e2..f32456724 100644 --- a/lib/personas/tools/researcher.rb +++ b/lib/personas/tools/researcher.rb @@ -145,10 +145,23 @@ def process_filter(filter, goals, post, &blk) results = [] formatter.each_chunk { |chunk| results << run_inference(chunk[:text], goals, post, &blk) } - { dry_run: false, goals: goals, filter: @filter, results: results } + + if this.context.cancel_manager&.cancelled? + { + dry_run: false, + goals: goals, + filter: @filter, + results: "Cancelled by user", + cancelled_by_user: true, + } + else + { dry_run: false, goals: goals, filter: @filter, results: results } + end end def run_inference(chunk_text, goals, post, &blk) + return if context.cancel_manager&.cancelled? + system_prompt = goal_system_prompt(goals) user_prompt = goal_user_prompt(goals, chunk_text) From d22d5ad8202cb421afd0139e6c39288b335d882a Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 15:41:20 +1000 Subject: [PATCH 05/10] Add support for getting base64 encoded uploads --- lib/personas/tool_runner.rb | 39 ++++++++++++++++++++++++ spec/models/ai_tool_spec.rb | 61 +++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/lib/personas/tool_runner.rb b/lib/personas/tool_runner.rb index 7a1600beb..f11ad91ad 100644 --- a/lib/personas/tool_runner.rb +++ b/lib/personas/tool_runner.rb @@ -79,6 +79,9 @@ def framework_script const upload = { create: _upload_create, getUrl: _upload_get_url, + getBase64: function(id, maxPixels) { + return _upload_get_base64(id, maxPixels); + } } const chain = { @@ -604,6 +607,42 @@ def attach_discourse(mini_racer_context) end def attach_upload(mini_racer_context) + mini_racer_context.attach( + "_upload_get_base64", + ->(upload_id_or_url, max_pixels) do + in_attached_function do + return nil if upload_id_or_url.blank? + + upload = nil + + # Handle both upload ID and short URL + if upload_id_or_url.to_s.start_with?("upload://") + # Handle short URL format + sha1 = Upload.sha1_from_short_url(upload_id_or_url) + return nil if sha1.blank? + upload = Upload.find_by(sha1: sha1) + else + # Handle numeric ID + upload_id = upload_id_or_url.to_i + return nil if upload_id <= 0 + upload = Upload.find_by(id: upload_id) + end + + return nil if upload.nil? + + max_pixels = max_pixels&.to_i + max_pixels = nil if max_pixels && max_pixels <= 0 + + encoded_uploads = + DiscourseAi::Completions::UploadEncoder.encode( + upload_ids: [upload.id], + max_pixels: max_pixels || 10_000_000, # Default to 10M pixels if not specified + ) + + encoded_uploads.first&.dig(:base64) + end + end, + ) mini_racer_context.attach( "_upload_get_url", ->(short_url) do diff --git a/spec/models/ai_tool_spec.rb b/spec/models/ai_tool_spec.rb index 6bef15d8f..ccfa5ea76 100644 --- a/spec/models/ai_tool_spec.rb +++ b/spec/models/ai_tool_spec.rb @@ -696,6 +696,67 @@ def stub_embeddings expect(result).to eq([{ "slept" => 1 }, { "slept" => 1 }, { "slept" => 1 }]) end + let(:jpg) { plugin_file_from_fixtures("1x1.jpg") } + + describe "upload base64 encoding" do + it "can get base64 data from upload ID and short URL" do + upload = UploadCreator.new(jpg, "1x1.jpg").create_for(Discourse.system_user.id) + + # Test with upload ID + script_id = <<~JS + function invoke(params) { + return upload.getBase64(params.upload_id, params.max_pixels); + } + JS + + tool = create_tool(script: script_id) + runner = + tool.runner( + { "upload_id" => upload.id, "max_pixels" => 1_000_000 }, + llm: nil, + bot_user: nil, + ) + result_id = runner.invoke + + expect(result_id).to be_present + expect(result_id).to be_a(String) + expect(result_id.length).to be > 0 + + # Test with short URL + script_url = <<~JS + function invoke(params) { + return upload.getBase64(params.short_url, params.max_pixels); + } + JS + + tool = create_tool(script: script_url) + runner = + tool.runner( + { "short_url" => upload.short_url, "max_pixels" => 1_000_000 }, + llm: nil, + bot_user: nil, + ) + result_url = runner.invoke + + expect(result_url).to be_present + expect(result_url).to be_a(String) + expect(result_url).to eq(result_id) # Should return same base64 data + + # Test with invalid upload ID + script_invalid = <<~JS + function invoke(params) { + return upload.getBase64(99999); + } + JS + + tool = create_tool(script: script_invalid) + runner = tool.runner({}, llm: nil, bot_user: nil) + result_invalid = runner.invoke + + expect(result_invalid).to be_nil + end + end + describe "upload URL resolution" do it "can resolve upload short URLs to public URLs" do upload = From 21a814b334a28ac3e20ca82988f8b9527a061026 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 16:27:36 +1000 Subject: [PATCH 06/10] FEATURE: allow base 64 encoding results from an http call --- lib/personas/tool_runner.rb | 7 ++++++- spec/models/ai_tool_spec.rb | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/lib/personas/tool_runner.rb b/lib/personas/tool_runner.rb index f11ad91ad..ee2151f6b 100644 --- a/lib/personas/tool_runner.rb +++ b/lib/personas/tool_runner.rb @@ -730,6 +730,7 @@ def attach_http(mini_racer_context) in_attached_function do headers = (options && options["headers"]) || {} body = options && options["body"] + base64_encode = options && options["base64Encode"] result = {} DiscourseAi::Personas::Tools::Tool.send_http_request( @@ -738,7 +739,11 @@ def attach_http(mini_racer_context) headers: headers, body: body, ) do |response| - result[:body] = response.body + if base64_encode + result[:body] = Base64.strict_encode64(response.body) + else + result[:body] = response.body + end result[:status] = response.code.to_i end diff --git a/spec/models/ai_tool_spec.rb b/spec/models/ai_tool_spec.rb index ccfa5ea76..f6163d846 100644 --- a/spec/models/ai_tool_spec.rb +++ b/spec/models/ai_tool_spec.rb @@ -43,6 +43,38 @@ def create_tool( expect(runner.invoke).to eq("query" => "test") end + it "can base64 encode binary HTTP responses" do + # Create binary data with all possible byte values (0-255) + binary_data = (0..255).map(&:chr).join + expected_base64 = Base64.strict_encode64(binary_data) + + script = <<~JS + function invoke(params) { + const result = http.post("https://example.com/binary", { + body: "test", + base64Encode: true + }); + return result.body; + } + JS + + tool = create_tool(script: script) + runner = tool.runner({}, llm: nil, bot_user: nil) + + stub_request(:post, "https://example.com/binary").to_return( + status: 200, + body: binary_data, + headers: { + }, + ) + + result = runner.invoke + + expect(result).to eq(expected_base64) + # Verify we can decode back to original binary data + expect(Base64.strict_decode64(result).bytes).to eq((0..255).to_a) + end + it "can perform HTTP requests with various verbs" do %i[post put delete patch].each do |verb| script = <<~JS From 86f665da5e99461a0fd66d164de0747687467277 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 16:48:28 +1000 Subject: [PATCH 07/10] also implement encoding for get --- lib/personas/tool_runner.rb | 7 ++++++- spec/models/ai_tool_spec.rb | 31 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/lib/personas/tool_runner.rb b/lib/personas/tool_runner.rb index ee2151f6b..8b9ae8cc9 100644 --- a/lib/personas/tool_runner.rb +++ b/lib/personas/tool_runner.rb @@ -701,13 +701,18 @@ def attach_http(mini_racer_context) in_attached_function do headers = (options && options["headers"]) || {} + base64_encode = options && options["base64Encode"] result = {} DiscourseAi::Personas::Tools::Tool.send_http_request( url, headers: headers, ) do |response| - result[:body] = response.body + if base64_encode + result[:body] = Base64.strict_encode64(response.body) + else + result[:body] = response.body + end result[:status] = response.code.to_i end diff --git a/spec/models/ai_tool_spec.rb b/spec/models/ai_tool_spec.rb index f6163d846..8d9f5eb16 100644 --- a/spec/models/ai_tool_spec.rb +++ b/spec/models/ai_tool_spec.rb @@ -75,6 +75,37 @@ def create_tool( expect(Base64.strict_decode64(result).bytes).to eq((0..255).to_a) end + it "can base64 encode binary GET responses" do + # Create binary data with all possible byte values (0-255) + binary_data = (0..255).map(&:chr).join + expected_base64 = Base64.strict_encode64(binary_data) + + script = <<~JS + function invoke(params) { + const result = http.get("https://example.com/binary", { + base64Encode: true + }); + return result.body; + } + JS + + tool = create_tool(script: script) + runner = tool.runner({}, llm: nil, bot_user: nil) + + stub_request(:get, "https://example.com/binary").to_return( + status: 200, + body: binary_data, + headers: { + }, + ) + + result = runner.invoke + + expect(result).to eq(expected_base64) + # Verify we can decode back to original binary data + expect(Base64.strict_decode64(result).bytes).to eq((0..255).to_a) + end + it "can perform HTTP requests with various verbs" do %i[post put delete patch].each do |verb| script = <<~JS From 841b0dd246457ad40c9ccd73a0f2e793b995130d Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 2 Jun 2025 17:35:47 +1000 Subject: [PATCH 08/10] fix regression --- lib/personas/tools/researcher.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/personas/tools/researcher.rb b/lib/personas/tools/researcher.rb index f32456724..3524b0190 100644 --- a/lib/personas/tools/researcher.rb +++ b/lib/personas/tools/researcher.rb @@ -146,7 +146,7 @@ def process_filter(filter, goals, post, &blk) formatter.each_chunk { |chunk| results << run_inference(chunk[:text], goals, post, &blk) } - if this.context.cancel_manager&.cancelled? + if context.cancel_manager&.cancelled? { dry_run: false, goals: goals, From 7b4685df1e8d8f07109933b0f5b791ec461e5e20 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Tue, 3 Jun 2025 14:11:27 +1000 Subject: [PATCH 09/10] dupe line --- lib/personas/tool_runner.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/personas/tool_runner.rb b/lib/personas/tool_runner.rb index 8b9ae8cc9..85002e306 100644 --- a/lib/personas/tool_runner.rb +++ b/lib/personas/tool_runner.rb @@ -31,7 +31,6 @@ def initialize(parameters:, llm:, bot_user:, context: nil, tool:, timeout: nil) @timeout = timeout || DEFAULT_TIMEOUT @running_attached_function = false - @http_requests_made = 0 @sleep_calls_made = 0 @http_requests_made = 0 end From a439a48a879ffa2bffa916026adb2d948fdf649e Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Tue, 3 Jun 2025 15:03:47 +1000 Subject: [PATCH 10/10] improve filter logic and feature set --- lib/personas/tools/researcher.rb | 23 ++-- lib/utils/research/filter.rb | 158 +++++++++++++++++-------- spec/lib/utils/research/filter_spec.rb | 51 +++++++- 3 files changed, 173 insertions(+), 59 deletions(-) diff --git a/lib/personas/tools/researcher.rb b/lib/personas/tools/researcher.rb index 3524b0190..0f612b81d 100644 --- a/lib/personas/tools/researcher.rb +++ b/lib/personas/tools/researcher.rb @@ -33,19 +33,24 @@ def filter_description <<~TEXT Filter string to target specific content. - Supports user (@username) + - post_type:first - only includes first posts in topics + - post_type:reply - only replies in topics - date ranges (after:YYYY-MM-DD, before:YYYY-MM-DD for posts; topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD for topics) - - categories (category:category1,category2) - - tags (tag:tag1,tag2) - - groups (group:group1,group2). + - categories (category:category1,category2 or categories:category1,category2) + - tags (tag:tag1,tag2 or tags:tag1,tag2) + - groups (group:group1,group2 or groups:group1,group2) - status (status:open, status:closed, status:archived, status:noreplies, status:single_user) - - keywords (keywords:keyword1,keyword2) - specific words to search for in posts - - max_results (max_results:10) the maximum number of results to return (optional) - - order (order:latest, order:oldest, order:latest_topic, order:oldest_topic) - the order of the results (optional) - - topic (topic:topic_id1,topic_id2) - add specific topics to the filter, topics will unconditionally be included + - keywords (keywords:keyword1,keyword2) - searches for specific words within post content using full-text search + - topic_keywords (topic_keywords:keyword1,keyword2) - searches for keywords within topics, returns all posts from matching topics + - topics (topic:topic_id1,topic_id2 or topics:topic_id1,topic_id2) - target specific topics by ID + - max_results (max_results:10) - limits the maximum number of results returned (optional) + - order (order:latest, order:oldest, order:latest_topic, order:oldest_topic, order:likes) - controls result ordering (optional, defaults to latest posts) - If multiple tags or categories are specified, they are treated as OR conditions. + Multiple filters can be combined with spaces for AND logic. Example: '@sam after:2023-01-01 tag:feature' - Multiple filters can be combined with spaces. Example: '@sam after:2023-01-01 tag:feature' + Use OR to combine filter segments for inclusive logic. + Example: 'category:feature,bug OR tag:feature-tag' - includes posts in feature OR bug categories, OR posts with feature-tag tag + Example: '@sam category:bug' - includes posts by @sam AND in bug category TEXT end diff --git a/lib/utils/research/filter.rb b/lib/utils/research/filter.rb index fe0e8033d..2d9934783 100644 --- a/lib/utils/research/filter.rb +++ b/lib/utils/research/filter.rb @@ -4,7 +4,6 @@ module DiscourseAi module Utils module Research class Filter - # Stores custom filter handlers def self.register_filter(matcher, &block) (@registered_filters ||= {})[matcher] = block end @@ -19,7 +18,6 @@ def self.word_to_date(str) attr_reader :term, :filters, :order, :guardian, :limit, :offset, :invalid_filters - # Define all filters at class level register_filter(/\Astatus:open\z/i) do |relation, _, _| relation.where("topics.closed = false AND topics.archived = false") end @@ -109,6 +107,30 @@ def self.word_to_date(str) end end + register_filter(/\Atopic_keywords?:(.*)\z/i) do |relation, keywords_param, _| + if keywords_param.blank? + relation + else + keywords = keywords_param.split(",").map(&:strip).reject(&:blank?) + if keywords.empty? + relation + else + ts_query = keywords.map { |kw| kw.gsub(/['\\]/, " ") }.join(" | ") + + relation.where( + "posts.topic_id IN ( + SELECT posts2.topic_id + FROM posts posts2 + JOIN post_search_data ON post_search_data.post_id = posts2.id + WHERE post_search_data.search_data @@ to_tsquery(?, ?) + )", + ::Search.ts_config, + ts_query, + ) + end + end + end + register_filter(/\A(?:categories?|category):(.*)\z/i) do |relation, category_param, _| if category_param.include?(",") category_names = category_param.split(",").map(&:strip) @@ -140,26 +162,36 @@ def self.word_to_date(str) end end - register_filter(/\Ain:posted\z/i) do |relation, _, filter| - if filter.guardian.user - relation.where("posts.user_id = ?", filter.guardian.user.id) - else - relation.where("1 = 0") # No results if not logged in - end - end + register_filter(/\Agroups?:([a-zA-Z0-9_\-,]+)\z/i) do |relation, groups_param, filter| + if groups_param.include?(",") + group_names = groups_param.split(",").map(&:strip) + found_group_ids = [] + group_names.each do |name| + group = Group.find_by("name ILIKE ?", name) + found_group_ids << group.id if group + end - register_filter(/\Agroup:([a-zA-Z0-9_\-]+)\z/i) do |relation, name, filter| - group = Group.find_by("name ILIKE ?", name) - if group + return relation.where("1 = 0") if found_group_ids.empty? relation.where( "posts.user_id IN ( - SELECT gu.user_id FROM group_users gu - WHERE gu.group_id = ? - )", - group.id, + SELECT gu.user_id FROM group_users gu + WHERE gu.group_id IN (?) + )", + found_group_ids, ) else - relation.where("1 = 0") # No results if group doesn't exist + group = Group.find_by("name ILIKE ?", groups_param) + if group + relation.where( + "posts.user_id IN ( + SELECT gu.user_id FROM group_users gu + WHERE gu.group_id = ? + )", + group.id, + ) + else + relation.where("1 = 0") # No results if group doesn't exist + end end end @@ -188,23 +220,36 @@ def self.word_to_date(str) relation end + register_filter(/\Aorder:likes\z/i) do |relation, order_str, filter| + filter.set_order!(:likes) + relation + end + register_filter(/\Atopics?:(.*)\z/i) do |relation, topic_param, filter| if topic_param.include?(",") topic_ids = topic_param.split(",").map(&:strip).map(&:to_i).reject(&:zero?) return relation.where("1 = 0") if topic_ids.empty? - filter.always_return_topic_ids!(topic_ids) - relation + relation.where("posts.topic_id IN (?)", topic_ids) else topic_id = topic_param.to_i if topic_id > 0 - filter.always_return_topic_ids!([topic_id]) - relation + relation.where("posts.topic_id = ?", topic_id) else relation.where("1 = 0") # No results if topic_id is invalid end end end + register_filter(/\Apost_type:(first|reply)\z/i) do |relation, post_type, _| + if post_type.downcase == "first" + relation.where("posts.post_number = 1") + elsif post_type.downcase == "reply" + relation.where("posts.post_number > 1") + else + relation + end + end + def initialize(term, guardian: nil, limit: nil, offset: nil) @guardian = guardian || Guardian.new @limit = limit @@ -212,9 +257,9 @@ def initialize(term, guardian: nil, limit: nil, offset: nil) @filters = [] @valid = true @order = :latest_post - @topic_ids = nil @invalid_filters = [] @term = term.to_s.strip + @or_groups = [] process_filters(@term) end @@ -223,42 +268,38 @@ def set_order!(order) @order = order end - def always_return_topic_ids!(topic_ids) - if @topic_ids - @topic_ids = @topic_ids + topic_ids - else - @topic_ids = topic_ids - end - end - def limit_by_user!(limit) @limit = limit if limit.to_i < @limit.to_i || @limit.nil? end def search - filtered = + base_relation = Post .secured(@guardian) .joins(:topic) .merge(Topic.secured(@guardian)) .where("topics.archetype = 'regular'") - original_filtered = filtered - @filters.each do |filter_block, match_data| - filtered = filter_block.call(filtered, match_data, self) + # Handle OR groups + if @or_groups.any? + or_relations = + @or_groups.map do |or_group| + group_relation = base_relation + or_group.each do |filter_block, match_data| + group_relation = filter_block.call(group_relation, match_data, self) + end + group_relation + end + + # Combine OR groups + filtered = or_relations.reduce { |combined, current| combined.or(current) } + else + filtered = base_relation end - if @topic_ids.present? - if original_filtered == filtered - filtered = original_filtered.where("posts.topic_id IN (?)", @topic_ids) - else - filtered = - original_filtered.where( - "posts.topic_id IN (?) OR posts.id IN (?)", - @topic_ids, - filtered.select("posts.id"), - ) - end + # Apply regular AND filters + @filters.each do |filter_block, match_data| + filtered = filter_block.call(filtered, match_data, self) end filtered = filtered.limit(@limit) if @limit.to_i > 0 @@ -272,17 +313,36 @@ def search filtered = filtered.order("topics.created_at DESC, posts.post_number DESC") elsif @order == :oldest_topic filtered = filtered.order("topics.created_at ASC, posts.post_number ASC") + elsif @order == :likes + filtered = filtered.order("posts.like_count DESC, posts.created_at DESC") end filtered end - private - def process_filters(term) return if term.blank? - term + # Split by OR first, then process each group + or_parts = term.split(/\s+OR\s+/i) + + if or_parts.size > 1 + # Multiple OR groups + or_parts.each do |or_part| + group_filters = [] + process_filter_group(or_part.strip, group_filters) + @or_groups << group_filters if group_filters.any? + end + else + # Single group (AND logic) + process_filter_group(term, @filters) + end + end + + private + + def process_filter_group(term_part, filter_collection) + term_part .to_s .scan(/(([^" \t\n\x0B\f\r]+)?(("[^"]+")?))/) .to_a @@ -292,7 +352,7 @@ def process_filters(term) found = false self.class.registered_filters.each do |matcher, block| if word =~ matcher - @filters << [block, $1] + filter_collection << [block, $1] found = true break end diff --git a/spec/lib/utils/research/filter_spec.rb b/spec/lib/utils/research/filter_spec.rb index 866d63e81..4e23a393a 100644 --- a/spec/lib/utils/research/filter_spec.rb +++ b/spec/lib/utils/research/filter_spec.rb @@ -8,6 +8,7 @@ end fab!(:user) + fab!(:user2) { Fabricate(:user) } fab!(:feature_tag) { Fabricate(:tag, name: "feature") } fab!(:bug_tag) { Fabricate(:tag, name: "bug") } @@ -15,6 +16,9 @@ fab!(:announcement_category) { Fabricate(:category, name: "Announcements") } fab!(:feedback_category) { Fabricate(:category, name: "Feedback") } + fab!(:group1) { Fabricate(:group, name: "group1") } + fab!(:group2) { Fabricate(:group, name: "group2") } + fab!(:feature_topic) do Fabricate( :topic, @@ -54,6 +58,32 @@ fab!(:feature_bug_post) { Fabricate(:post, topic: feature_bug_topic, user: user) } fab!(:no_tag_post) { Fabricate(:post, topic: no_tag_topic, user: user) } + describe "group filtering" do + before do + group1.add(user) + group2.add(user2) + end + + it "supports filtering by groups" do + no_tag_post.update!(user_id: user2.id) + + filter = described_class.new("group:group1") + expect(filter.search.pluck(:id)).to contain_exactly( + feature_post.id, + bug_post.id, + feature_bug_post.id, + ) + + filter = described_class.new("groups:group1,group2") + expect(filter.search.pluck(:id)).to contain_exactly( + feature_post.id, + bug_post.id, + feature_bug_post.id, + no_tag_post.id, + ) + end + end + describe "security filtering" do fab!(:secure_group) { Fabricate(:group) } fab!(:secure_category) { Fabricate(:category, name: "Secure") } @@ -122,7 +152,7 @@ # it can tack on topics filter = described_class.new( - "category:Announcements topic:#{feature_bug_post.topic.id},#{no_tag_post.topic.id}", + "category:Announcements OR topic:#{feature_bug_post.topic.id},#{no_tag_post.topic.id}", ) expect(filter.search.pluck(:id)).to contain_exactly( feature_post.id, @@ -175,6 +205,25 @@ Fabricate(:post, raw: "No fruits here", topic: no_tag_topic, user: user) end + fab!(:reply_on_bananas) do + Fabricate(:post, raw: "Just a reply", topic: post_with_bananas.topic, user: user) + end + + it "correctly filters posts by topic_keywords" do + topic1 = post_with_bananas.topic + topic2 = post_with_both.topic + + filter = described_class.new("topic_keywords:banana") + expected = topic1.posts.pluck(:id) + topic2.posts.pluck(:id) + expect(filter.search.pluck(:id)).to contain_exactly(*expected) + + filter = described_class.new("topic_keywords:banana post_type:first") + expect(filter.search.pluck(:id)).to contain_exactly( + topic1.posts.order(:post_number).first.id, + topic2.posts.order(:post_number).first.id, + ) + end + it "correctly filters posts by full text keywords" do filter = described_class.new("keywords:apples") expect(filter.search.pluck(:id)).to contain_exactly(post_with_apples.id, post_with_both.id)