Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit c34fcc8

Browse files
authored
FEATURE: forum researcher persona for deep research (#1313)
This commit introduces a new Forum Researcher persona specialized in deep forum content analysis along with comprehensive improvements to our AI infrastructure. Key additions: New Forum Researcher persona with advanced filtering and analysis capabilities Robust filtering system supporting tags, categories, dates, users, and keywords LLM formatter to efficiently process and chunk research results Infrastructure improvements: Implemented CancelManager class to centrally manage AI completion cancellations Replaced callback-based cancellation with a more robust pattern Added systematic cancellation monitoring with callbacks Other improvements: Added configurable default_enabled flag to control which personas are enabled by default Updated translation strings for the new researcher functionality Added comprehensive specs for the new components Renames Researcher -> Web Researcher This change makes our AI platform more stable while adding powerful research capabilities that can analyze forum trends and surface relevant content.
1 parent 22ccf29 commit c34fcc8

File tree

37 files changed

+1490
-133
lines changed

37 files changed

+1490
-133
lines changed

assets/javascripts/discourse/lib/ai-streamer/progress-handlers.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { later } from "@ember/runloop";
22
import PostUpdater from "./updaters/post-updater";
33

44
const PROGRESS_INTERVAL = 40;
5-
const GIVE_UP_INTERVAL = 60000;
5+
const GIVE_UP_INTERVAL = 600000; // 10 minutes which is our max thinking time for now
66
export const MIN_LETTERS_PER_INTERVAL = 6;
77
const MAX_FLUSH_TIME = 800;
88

config/locales/server.en.yml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,15 +296,18 @@ en:
296296
designer:
297297
name: Designer
298298
description: "AI Bot specialized in generating and editing images"
299+
forum_researcher:
300+
name: Forum Researcher
301+
description: "AI Bot specialized in deep research for the forum"
299302
sql_helper:
300303
name: SQL Helper
301304
description: "AI Bot specialized in helping craft SQL queries on this Discourse instance"
302305
settings_explorer:
303306
name: Settings Explorer
304307
description: "AI Bot specialized in helping explore Discourse site settings"
305308
researcher:
306-
name: Researcher
307-
description: "AI Bot with Google access that can research information for you"
309+
name: Web Researcher
310+
description: "AI Bot with Google access that can both search and read web pages"
308311
creative:
309312
name: Creative
310313
description: "AI Bot with no external integrations specialized in creative tasks"
@@ -327,6 +330,16 @@ en:
327330
summarizing: "Summarizing topic"
328331
searching: "Searching for: '%{query}'"
329332
tool_options:
333+
researcher:
334+
max_results:
335+
name: "Maximum number of results"
336+
description: "Maximum number of results to include in a filter"
337+
include_private:
338+
name: "Include private"
339+
description: "Include private topics in the filters"
340+
max_tokens_per_post:
341+
name: "Maximum tokens per post"
342+
description: "Maximum number of tokens to use for each post in the filter"
330343
create_artifact:
331344
creator_llm:
332345
name: "LLM"
@@ -385,6 +398,7 @@ en:
385398
javascript_evaluator: "Evaluate JavaScript"
386399
create_image: "Creating image"
387400
edit_image: "Editing image"
401+
researcher: "Researching"
388402
tool_help:
389403
read_artifact: "Read a web artifact using the AI Bot"
390404
update_artifact: "Update a web artifact using the AI Bot"
@@ -411,6 +425,7 @@ en:
411425
dall_e: "Generate image using DALL-E 3"
412426
search_meta_discourse: "Search Meta Discourse"
413427
javascript_evaluator: "Evaluate JavaScript"
428+
researcher: "Research forum information using the AI Bot"
414429
tool_description:
415430
read_artifact: "Read a web artifact using the AI Bot"
416431
update_artifact: "Updated a web artifact using the AI Bot"
@@ -445,6 +460,12 @@ en:
445460
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
446461
setting_context: "Reading context for: %{setting_name}"
447462
schema: "%{tables}"
463+
researcher_dry_run:
464+
one: "Proposed research: %{goals}\n\nFound %{count} result for '%{filter}'"
465+
other: "Proposed research: %{goals}\n\nFound %{count} result for '%{filter}'"
466+
researcher:
467+
one: "Researching: %{goals}\n\nFound %{count} result for '%{filter}'"
468+
other: "Researching: %{goals}\n\nFound %{count} result for '%{filter}'"
448469
search_settings:
449470
one: "Found %{count} result for '%{query}'"
450471
other: "Found %{count} results for '%{query}'"

db/fixtures/personas/603_ai_personas.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def from_setting(setting_name)
3333
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
3434
end
3535

36-
persona.enabled = !summarization_personas.include?(persona_class)
36+
persona.enabled = persona_class.default_enabled
3737
persona.priority = true if persona_class == DiscourseAi::Personas::General
3838
end
3939

lib/ai_bot/chat_streamer.rb

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,23 @@
66
module DiscourseAi
77
module AiBot
88
class ChatStreamer
9-
attr_accessor :cancel
109
attr_reader :reply,
1110
:guardian,
1211
:thread_id,
1312
:force_thread,
1413
:in_reply_to_id,
1514
:channel,
16-
:cancelled
17-
18-
def initialize(message:, channel:, guardian:, thread_id:, in_reply_to_id:, force_thread:)
15+
:cancel_manager
16+
17+
def initialize(
18+
message:,
19+
channel:,
20+
guardian:,
21+
thread_id:,
22+
in_reply_to_id:,
23+
force_thread:,
24+
cancel_manager: nil
25+
)
1926
@message = message
2027
@channel = channel
2128
@guardian = guardian
@@ -35,6 +42,8 @@ def initialize(message:, channel:, guardian:, thread_id:, in_reply_to_id:, force
3542
guardian: guardian,
3643
thread_id: thread_id,
3744
)
45+
46+
@cancel_manager = cancel_manager
3847
end
3948

4049
def <<(partial)
@@ -111,8 +120,7 @@ def run
111120

112121
streaming = ChatSDK::Message.stream(message_id: reply.id, raw: buffer, guardian: guardian)
113122
if !streaming
114-
cancel.call
115-
@cancelled = true
123+
@cancel_manager.cancel! if @cancel_manager
116124
end
117125
end
118126
end

lib/ai_bot/playground.rb

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ def reply_to_chat_message(message, channel, context_post_ids)
331331
),
332332
user: message.user,
333333
skip_tool_details: true,
334+
cancel_manager: DiscourseAi::Completions::CancelManager.new,
334335
)
335336

336337
reply = nil
@@ -347,15 +348,14 @@ def reply_to_chat_message(message, channel, context_post_ids)
347348
thread_id: message.thread_id,
348349
in_reply_to_id: in_reply_to_id,
349350
force_thread: force_thread,
351+
cancel_manager: context.cancel_manager,
350352
)
351353

352354
new_prompts =
353-
bot.reply(context) do |partial, cancel, placeholder, type|
355+
bot.reply(context) do |partial, placeholder, type|
354356
# no support for tools or thinking by design
355357
next if type == :thinking || type == :tool_details || type == :partial_tool
356-
streamer.cancel = cancel
357358
streamer << partial
358-
break if streamer.cancelled
359359
end
360360

361361
reply = streamer.reply
@@ -383,6 +383,7 @@ def reply_to(
383383
auto_set_title: true,
384384
silent_mode: false,
385385
feature_name: nil,
386+
cancel_manager: nil,
386387
&blk
387388
)
388389
# this is a multithreading issue
@@ -471,16 +472,26 @@ def reply_to(
471472

472473
redis_stream_key = "gpt_cancel:#{reply_post.id}"
473474
Discourse.redis.setex(redis_stream_key, MAX_STREAM_DELAY_SECONDS, 1)
475+
476+
cancel_manager ||= DiscourseAi::Completions::CancelManager.new
477+
context.cancel_manager = cancel_manager
478+
context
479+
.cancel_manager
480+
.start_monitor(delay: 0.2) do
481+
context.cancel_manager.cancel! if !Discourse.redis.get(redis_stream_key)
482+
end
483+
484+
context.cancel_manager.add_callback(
485+
lambda { reply_post.update!(raw: reply, cooked: PrettyText.cook(reply)) },
486+
)
474487
end
475488

476489
context.skip_tool_details ||= !bot.persona.class.tool_details
477-
478490
post_streamer = PostStreamer.new(delay: Rails.env.test? ? 0 : 0.5) if stream_reply
479-
480491
started_thinking = false
481492

482493
new_custom_prompts =
483-
bot.reply(context) do |partial, cancel, placeholder, type|
494+
bot.reply(context) do |partial, placeholder, type|
484495
if type == :thinking && !started_thinking
485496
reply << "<details><summary>#{I18n.t("discourse_ai.ai_bot.thinking")}</summary>"
486497
started_thinking = true
@@ -499,15 +510,6 @@ def reply_to(
499510
blk.call(partial)
500511
end
501512

502-
if stream_reply && !Discourse.redis.get(redis_stream_key)
503-
cancel&.call
504-
reply_post.update!(raw: reply, cooked: PrettyText.cook(reply))
505-
# we do not break out, cause if we do
506-
# we will not get results from bot
507-
# leading to broken context
508-
# we need to trust it to cancel at the endpoint
509-
end
510-
511513
if post_streamer
512514
post_streamer.run_later do
513515
Discourse.redis.expire(redis_stream_key, MAX_STREAM_DELAY_SECONDS)
@@ -568,6 +570,8 @@ def reply_to(
568570
end
569571
raise e
570572
ensure
573+
context.cancel_manager.stop_monitor if context&.cancel_manager
574+
571575
# since we are skipping validations and jobs we
572576
# may need to fix participant count
573577
if reply_post && reply_post.topic && reply_post.topic.private_message? &&
@@ -649,7 +653,7 @@ def publish_update(bot_reply_post, payload)
649653
payload,
650654
user_ids: bot_reply_post.topic.allowed_user_ids,
651655
max_backlog_size: 2,
652-
max_backlog_age: 60,
656+
max_backlog_age: MAX_STREAM_DELAY_SECONDS,
653657
)
654658
end
655659
end

lib/completions/cancel_manager.rb

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# frozen_string_literal: true
2+
3+
# special object that can be used to cancel completions and http requests
4+
module DiscourseAi
5+
module Completions
6+
class CancelManager
7+
attr_reader :cancelled
8+
attr_reader :callbacks
9+
10+
def initialize
11+
@cancelled = false
12+
@callbacks = Concurrent::Array.new
13+
@mutex = Mutex.new
14+
@monitor_thread = nil
15+
end
16+
17+
def monitor_thread
18+
@mutex.synchronize { @monitor_thread }
19+
end
20+
21+
def start_monitor(delay: 0.5, &block)
22+
@mutex.synchronize do
23+
raise "Already monitoring" if @monitor_thread
24+
raise "Expected a block" if !block
25+
26+
db = RailsMultisite::ConnectionManagement.current_db
27+
@stop_monitor = false
28+
29+
@monitor_thread =
30+
Thread.new do
31+
begin
32+
loop do
33+
done = false
34+
@mutex.synchronize { done = true if @stop_monitor }
35+
break if done
36+
sleep delay
37+
@mutex.synchronize { done = true if @stop_monitor }
38+
@mutex.synchronize { done = true if cancelled? }
39+
break if done
40+
41+
should_cancel = false
42+
RailsMultisite::ConnectionManagement.with_connection(db) do
43+
should_cancel = block.call
44+
end
45+
46+
@mutex.synchronize { cancel! if should_cancel }
47+
48+
break if cancelled?
49+
end
50+
ensure
51+
@mutex.synchronize { @monitor_thread = nil }
52+
end
53+
end
54+
end
55+
end
56+
57+
def stop_monitor
58+
monitor_thread = nil
59+
60+
@mutex.synchronize { monitor_thread = @monitor_thread }
61+
62+
if monitor_thread
63+
@mutex.synchronize { @stop_monitor = true }
64+
# so we do not deadlock
65+
monitor_thread.wakeup
66+
monitor_thread.join(2)
67+
# should not happen
68+
if monitor_thread.alive?
69+
Rails.logger.warn("DiscourseAI: CancelManager monitor thread did not stop in time")
70+
monitor_thread.kill if monitor_thread.alive?
71+
end
72+
@monitor_thread = nil
73+
end
74+
end
75+
76+
def cancelled?
77+
@cancelled
78+
end
79+
80+
def add_callback(cb)
81+
@callbacks << cb
82+
end
83+
84+
def remove_callback(cb)
85+
@callbacks.delete(cb)
86+
end
87+
88+
def cancel!
89+
@cancelled = true
90+
monitor_thread = @monitor_thread
91+
if monitor_thread && monitor_thread != Thread.current
92+
monitor_thread.wakeup
93+
monitor_thread.join(2)
94+
if monitor_thread.alive?
95+
Rails.logger.warn("DiscourseAI: CancelManager monitor thread did not stop in time")
96+
monitor_thread.kill if monitor_thread.alive?
97+
end
98+
end
99+
@callbacks.each do |cb|
100+
begin
101+
cb.call
102+
rescue StandardError
103+
# ignore cause this may have already been cancelled
104+
end
105+
end
106+
end
107+
end
108+
end
109+
end

0 commit comments

Comments
 (0)