@@ -31,26 +31,28 @@ def signature
3131
3232 def filter_description
3333 <<~TEXT
34- Filter string to target specific content.
35- - Supports user (@username)
36- - post_type:first - only includes first posts in topics
37- - post_type:reply - only replies in topics
38- - date ranges (after:YYYY-MM-DD, before:YYYY-MM-DD for posts; topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD for topics)
39- - categories (category:category1,category2 or categories:category1,category2)
40- - tags (tag:tag1,tag2 or tags:tag1,tag2)
41- - groups (group:group1,group2 or groups:group1,group2)
42- - status (status:open, status:closed, status:archived, status:noreplies, status:single_user)
43- - keywords (keywords:keyword1,keyword2) - searches for specific words within post content using full-text search
44- - topic_keywords (topic_keywords:keyword1,keyword2) - searches for keywords within topics, returns all posts from matching topics
45- - topics (topic:topic_id1,topic_id2 or topics:topic_id1,topic_id2) - target specific topics by ID
46- - max_results (max_results:10) - limits the maximum number of results returned (optional)
47- - order (order:latest, order:oldest, order:latest_topic, order:oldest_topic, order:likes) - controls result ordering (optional, defaults to latest posts)
48-
49- Multiple filters can be combined with spaces for AND logic. Example: '@sam after:2023-01-01 tag:feature'
50-
51- Use OR to combine filter segments for inclusive logic.
52- Example: 'category:feature,bug OR tag:feature-tag' - includes posts in feature OR bug categories, OR posts with feature-tag tag
53- Example: '@sam category:bug' - includes posts by @sam AND in bug category
34+ Filter string to target specific content. Space-separated filters use AND logic, OR creates separate filter groups.
35+
36+ **Filters:**
37+ - username:user1 or usernames:user1,user2 - posts by specific users
38+ - group:group1 or groups:group1,group2 - posts by users in specific groups
39+ - post_type:first|reply - first posts only or replies only
40+ - keywords:word1,word2 - full-text search in post content
41+ - topic_keywords:word1,word2 - full-text search in topics (returns all posts from matching topics)
42+ - topic:123 or topics:123,456 - specific topics by ID
43+ - category:name1 or categories:name1,name2 - posts in categories (by name/slug)
44+ - tag:tag1 or tags:tag1,tag2 - posts in topics with tags
45+ - after:YYYY-MM-DD, before:YYYY-MM-DD - filter by post creation date
46+ - topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD - filter by topic creation date
47+ - status:open|closed|archived|noreplies|single_user - topic status filters
48+ - max_results:N - limit results (per OR group)
49+ - order:latest|oldest|latest_topic|oldest_topic|likes - sort order
50+
51+ **OR Logic:** Each OR group processes independently - filters don't cross boundaries.
52+
53+ Examples:
54+ - 'username:sam after:2023-01-01' - sam's posts after date
55+ - 'max_results:50 category:bugs OR tag:urgent' - (≤50 bug posts) OR (all urgent posts)
5456 TEXT
5557 end
5658
@@ -60,9 +62,11 @@ def name
6062
6163 def accepted_options
6264 [
65+ option ( :researcher_llm , type : :llm ) ,
6366 option ( :max_results , type : :integer ) ,
6467 option ( :include_private , type : :boolean ) ,
6568 option ( :max_tokens_per_post , type : :integer ) ,
69+ option ( :max_tokens_per_batch , type : :integer ) ,
6670 ]
6771 end
6872 end
@@ -134,17 +138,32 @@ def description_args
134138 protected
135139
136140 MIN_TOKENS_FOR_RESEARCH = 8000
141+ MIN_TOKENS_FOR_POST = 50
142+
137143 def process_filter ( filter , goals , post , &blk )
138- if llm . max_prompt_tokens < MIN_TOKENS_FOR_RESEARCH
144+ if researcher_llm . max_prompt_tokens < MIN_TOKENS_FOR_RESEARCH
139145 raise ArgumentError ,
140146 "LLM max tokens too low for research. Minimum is #{ MIN_TOKENS_FOR_RESEARCH } ."
141147 end
148+
149+ max_tokens_per_batch = options [ :max_tokens_per_batch ] . to_i
150+ if max_tokens_per_batch <= MIN_TOKENS_FOR_RESEARCH
151+ max_tokens_per_batch = researcher_llm . max_prompt_tokens - 2000
152+ end
153+
154+ max_tokens_per_post = options [ :max_tokens_per_post ]
155+ if max_tokens_per_post . nil?
156+ max_tokens_per_post = 2000
157+ elsif max_tokens_per_post < MIN_TOKENS_FOR_POST
158+ max_tokens_per_post = MIN_TOKENS_FOR_POST
159+ end
160+
142161 formatter =
143162 DiscourseAi ::Utils ::Research ::LlmFormatter . new (
144163 filter ,
145- max_tokens_per_batch : llm . max_prompt_tokens - 2000 ,
146- tokenizer : llm . tokenizer ,
147- max_tokens_per_post : options [ : max_tokens_per_post] || 2000 ,
164+ max_tokens_per_batch : max_tokens_per_batch ,
165+ tokenizer : researcher_llm . tokenizer ,
166+ max_tokens_per_post : max_tokens_per_post ,
148167 )
149168
150169 results = [ ]
@@ -164,6 +183,14 @@ def process_filter(filter, goals, post, &blk)
164183 end
165184 end
166185
186+ def researcher_llm
187+ @researcher_llm ||=
188+ (
189+ options [ :researcher_llm ] . present? &&
190+ LlmModel . find_by ( id : options [ :researcher_llm ] . to_i ) &.to_llm
191+ ) || self . llm
192+ end
193+
167194 def run_inference ( chunk_text , goals , post , &blk )
168195 return if context . cancel_manager &.cancelled?
169196
@@ -179,7 +206,7 @@ def run_inference(chunk_text, goals, post, &blk)
179206 )
180207
181208 results = [ ]
182- llm . generate (
209+ researcher_llm . generate (
183210 prompt ,
184211 user : post . user ,
185212 feature_name : context . feature_name ,
0 commit comments