Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit a439a48

Browse files
committed
improve filter logic and feature set
1 parent 7b4685d commit a439a48

File tree

3 files changed

+173
-59
lines changed

3 files changed

+173
-59
lines changed

lib/personas/tools/researcher.rb

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,24 @@ def filter_description
3333
<<~TEXT
3434
Filter string to target specific content.
3535
- Supports user (@username)
36+
- post_type:first - only includes first posts in topics
37+
- post_type:reply - only replies in topics
3638
- date ranges (after:YYYY-MM-DD, before:YYYY-MM-DD for posts; topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD for topics)
37-
- categories (category:category1,category2)
38-
- tags (tag:tag1,tag2)
39-
- groups (group:group1,group2).
39+
- categories (category:category1,category2 or categories:category1,category2)
40+
- tags (tag:tag1,tag2 or tags:tag1,tag2)
41+
- groups (group:group1,group2 or groups:group1,group2)
4042
- status (status:open, status:closed, status:archived, status:noreplies, status:single_user)
41-
- keywords (keywords:keyword1,keyword2) - specific words to search for in posts
42-
- max_results (max_results:10) the maximum number of results to return (optional)
43-
- order (order:latest, order:oldest, order:latest_topic, order:oldest_topic) - the order of the results (optional)
44-
- topic (topic:topic_id1,topic_id2) - add specific topics to the filter, topics will unconditionally be included
43+
- keywords (keywords:keyword1,keyword2) - searches for specific words within post content using full-text search
44+
- topic_keywords (topic_keywords:keyword1,keyword2) - searches for keywords within topics, returns all posts from matching topics
45+
- topics (topic:topic_id1,topic_id2 or topics:topic_id1,topic_id2) - target specific topics by ID
46+
- max_results (max_results:10) - limits the maximum number of results returned (optional)
47+
- order (order:latest, order:oldest, order:latest_topic, order:oldest_topic, order:likes) - controls result ordering (optional, defaults to latest posts)
4548
46-
If multiple tags or categories are specified, they are treated as OR conditions.
49+
Multiple filters can be combined with spaces for AND logic. Example: '@sam after:2023-01-01 tag:feature'
4750
48-
Multiple filters can be combined with spaces. Example: '@sam after:2023-01-01 tag:feature'
51+
Use OR to combine filter segments for inclusive logic.
52+
Example: 'category:feature,bug OR tag:feature-tag' - includes posts in feature OR bug categories, OR posts with feature-tag tag
53+
Example: '@sam category:bug' - includes posts by @sam AND in bug category
4954
TEXT
5055
end
5156

lib/utils/research/filter.rb

Lines changed: 109 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ module DiscourseAi
44
module Utils
55
module Research
66
class Filter
7-
# Stores custom filter handlers
87
def self.register_filter(matcher, &block)
98
(@registered_filters ||= {})[matcher] = block
109
end
@@ -19,7 +18,6 @@ def self.word_to_date(str)
1918

2019
attr_reader :term, :filters, :order, :guardian, :limit, :offset, :invalid_filters
2120

22-
# Define all filters at class level
2321
register_filter(/\Astatus:open\z/i) do |relation, _, _|
2422
relation.where("topics.closed = false AND topics.archived = false")
2523
end
@@ -109,6 +107,30 @@ def self.word_to_date(str)
109107
end
110108
end
111109

110+
register_filter(/\Atopic_keywords?:(.*)\z/i) do |relation, keywords_param, _|
111+
if keywords_param.blank?
112+
relation
113+
else
114+
keywords = keywords_param.split(",").map(&:strip).reject(&:blank?)
115+
if keywords.empty?
116+
relation
117+
else
118+
ts_query = keywords.map { |kw| kw.gsub(/['\\]/, " ") }.join(" | ")
119+
120+
relation.where(
121+
"posts.topic_id IN (
122+
SELECT posts2.topic_id
123+
FROM posts posts2
124+
JOIN post_search_data ON post_search_data.post_id = posts2.id
125+
WHERE post_search_data.search_data @@ to_tsquery(?, ?)
126+
)",
127+
::Search.ts_config,
128+
ts_query,
129+
)
130+
end
131+
end
132+
end
133+
112134
register_filter(/\A(?:categories?|category):(.*)\z/i) do |relation, category_param, _|
113135
if category_param.include?(",")
114136
category_names = category_param.split(",").map(&:strip)
@@ -140,26 +162,36 @@ def self.word_to_date(str)
140162
end
141163
end
142164

143-
register_filter(/\Ain:posted\z/i) do |relation, _, filter|
144-
if filter.guardian.user
145-
relation.where("posts.user_id = ?", filter.guardian.user.id)
146-
else
147-
relation.where("1 = 0") # No results if not logged in
148-
end
149-
end
165+
register_filter(/\Agroups?:([a-zA-Z0-9_\-,]+)\z/i) do |relation, groups_param, filter|
166+
if groups_param.include?(",")
167+
group_names = groups_param.split(",").map(&:strip)
168+
found_group_ids = []
169+
group_names.each do |name|
170+
group = Group.find_by("name ILIKE ?", name)
171+
found_group_ids << group.id if group
172+
end
150173

151-
register_filter(/\Agroup:([a-zA-Z0-9_\-]+)\z/i) do |relation, name, filter|
152-
group = Group.find_by("name ILIKE ?", name)
153-
if group
174+
return relation.where("1 = 0") if found_group_ids.empty?
154175
relation.where(
155176
"posts.user_id IN (
156-
SELECT gu.user_id FROM group_users gu
157-
WHERE gu.group_id = ?
158-
)",
159-
group.id,
177+
SELECT gu.user_id FROM group_users gu
178+
WHERE gu.group_id IN (?)
179+
)",
180+
found_group_ids,
160181
)
161182
else
162-
relation.where("1 = 0") # No results if group doesn't exist
183+
group = Group.find_by("name ILIKE ?", groups_param)
184+
if group
185+
relation.where(
186+
"posts.user_id IN (
187+
SELECT gu.user_id FROM group_users gu
188+
WHERE gu.group_id = ?
189+
)",
190+
group.id,
191+
)
192+
else
193+
relation.where("1 = 0") # No results if group doesn't exist
194+
end
163195
end
164196
end
165197

@@ -188,33 +220,46 @@ def self.word_to_date(str)
188220
relation
189221
end
190222

223+
register_filter(/\Aorder:likes\z/i) do |relation, order_str, filter|
224+
filter.set_order!(:likes)
225+
relation
226+
end
227+
191228
register_filter(/\Atopics?:(.*)\z/i) do |relation, topic_param, filter|
192229
if topic_param.include?(",")
193230
topic_ids = topic_param.split(",").map(&:strip).map(&:to_i).reject(&:zero?)
194231
return relation.where("1 = 0") if topic_ids.empty?
195-
filter.always_return_topic_ids!(topic_ids)
196-
relation
232+
relation.where("posts.topic_id IN (?)", topic_ids)
197233
else
198234
topic_id = topic_param.to_i
199235
if topic_id > 0
200-
filter.always_return_topic_ids!([topic_id])
201-
relation
236+
relation.where("posts.topic_id = ?", topic_id)
202237
else
203238
relation.where("1 = 0") # No results if topic_id is invalid
204239
end
205240
end
206241
end
207242

243+
register_filter(/\Apost_type:(first|reply)\z/i) do |relation, post_type, _|
244+
if post_type.downcase == "first"
245+
relation.where("posts.post_number = 1")
246+
elsif post_type.downcase == "reply"
247+
relation.where("posts.post_number > 1")
248+
else
249+
relation
250+
end
251+
end
252+
208253
def initialize(term, guardian: nil, limit: nil, offset: nil)
209254
@guardian = guardian || Guardian.new
210255
@limit = limit
211256
@offset = offset
212257
@filters = []
213258
@valid = true
214259
@order = :latest_post
215-
@topic_ids = nil
216260
@invalid_filters = []
217261
@term = term.to_s.strip
262+
@or_groups = []
218263

219264
process_filters(@term)
220265
end
@@ -223,42 +268,38 @@ def set_order!(order)
223268
@order = order
224269
end
225270

226-
def always_return_topic_ids!(topic_ids)
227-
if @topic_ids
228-
@topic_ids = @topic_ids + topic_ids
229-
else
230-
@topic_ids = topic_ids
231-
end
232-
end
233-
234271
def limit_by_user!(limit)
235272
@limit = limit if limit.to_i < @limit.to_i || @limit.nil?
236273
end
237274

238275
def search
239-
filtered =
276+
base_relation =
240277
Post
241278
.secured(@guardian)
242279
.joins(:topic)
243280
.merge(Topic.secured(@guardian))
244281
.where("topics.archetype = 'regular'")
245-
original_filtered = filtered
246282

247-
@filters.each do |filter_block, match_data|
248-
filtered = filter_block.call(filtered, match_data, self)
283+
# Handle OR groups
284+
if @or_groups.any?
285+
or_relations =
286+
@or_groups.map do |or_group|
287+
group_relation = base_relation
288+
or_group.each do |filter_block, match_data|
289+
group_relation = filter_block.call(group_relation, match_data, self)
290+
end
291+
group_relation
292+
end
293+
294+
# Combine OR groups
295+
filtered = or_relations.reduce { |combined, current| combined.or(current) }
296+
else
297+
filtered = base_relation
249298
end
250299

251-
if @topic_ids.present?
252-
if original_filtered == filtered
253-
filtered = original_filtered.where("posts.topic_id IN (?)", @topic_ids)
254-
else
255-
filtered =
256-
original_filtered.where(
257-
"posts.topic_id IN (?) OR posts.id IN (?)",
258-
@topic_ids,
259-
filtered.select("posts.id"),
260-
)
261-
end
300+
# Apply regular AND filters
301+
@filters.each do |filter_block, match_data|
302+
filtered = filter_block.call(filtered, match_data, self)
262303
end
263304

264305
filtered = filtered.limit(@limit) if @limit.to_i > 0
@@ -272,17 +313,36 @@ def search
272313
filtered = filtered.order("topics.created_at DESC, posts.post_number DESC")
273314
elsif @order == :oldest_topic
274315
filtered = filtered.order("topics.created_at ASC, posts.post_number ASC")
316+
elsif @order == :likes
317+
filtered = filtered.order("posts.like_count DESC, posts.created_at DESC")
275318
end
276319

277320
filtered
278321
end
279322

280-
private
281-
282323
def process_filters(term)
283324
return if term.blank?
284325

285-
term
326+
# Split by OR first, then process each group
327+
or_parts = term.split(/\s+OR\s+/i)
328+
329+
if or_parts.size > 1
330+
# Multiple OR groups
331+
or_parts.each do |or_part|
332+
group_filters = []
333+
process_filter_group(or_part.strip, group_filters)
334+
@or_groups << group_filters if group_filters.any?
335+
end
336+
else
337+
# Single group (AND logic)
338+
process_filter_group(term, @filters)
339+
end
340+
end
341+
342+
private
343+
344+
def process_filter_group(term_part, filter_collection)
345+
term_part
286346
.to_s
287347
.scan(/(([^" \t\n\x0B\f\r]+)?(("[^"]+")?))/)
288348
.to_a
@@ -292,7 +352,7 @@ def process_filters(term)
292352
found = false
293353
self.class.registered_filters.each do |matcher, block|
294354
if word =~ matcher
295-
@filters << [block, $1]
355+
filter_collection << [block, $1]
296356
found = true
297357
break
298358
end

spec/lib/utils/research/filter_spec.rb

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,17 @@
88
end
99

1010
fab!(:user)
11+
fab!(:user2) { Fabricate(:user) }
1112

1213
fab!(:feature_tag) { Fabricate(:tag, name: "feature") }
1314
fab!(:bug_tag) { Fabricate(:tag, name: "bug") }
1415

1516
fab!(:announcement_category) { Fabricate(:category, name: "Announcements") }
1617
fab!(:feedback_category) { Fabricate(:category, name: "Feedback") }
1718

19+
fab!(:group1) { Fabricate(:group, name: "group1") }
20+
fab!(:group2) { Fabricate(:group, name: "group2") }
21+
1822
fab!(:feature_topic) do
1923
Fabricate(
2024
:topic,
@@ -54,6 +58,32 @@
5458
fab!(:feature_bug_post) { Fabricate(:post, topic: feature_bug_topic, user: user) }
5559
fab!(:no_tag_post) { Fabricate(:post, topic: no_tag_topic, user: user) }
5660

61+
describe "group filtering" do
62+
before do
63+
group1.add(user)
64+
group2.add(user2)
65+
end
66+
67+
it "supports filtering by groups" do
68+
no_tag_post.update!(user_id: user2.id)
69+
70+
filter = described_class.new("group:group1")
71+
expect(filter.search.pluck(:id)).to contain_exactly(
72+
feature_post.id,
73+
bug_post.id,
74+
feature_bug_post.id,
75+
)
76+
77+
filter = described_class.new("groups:group1,group2")
78+
expect(filter.search.pluck(:id)).to contain_exactly(
79+
feature_post.id,
80+
bug_post.id,
81+
feature_bug_post.id,
82+
no_tag_post.id,
83+
)
84+
end
85+
end
86+
5787
describe "security filtering" do
5888
fab!(:secure_group) { Fabricate(:group) }
5989
fab!(:secure_category) { Fabricate(:category, name: "Secure") }
@@ -122,7 +152,7 @@
122152
# it can tack on topics
123153
filter =
124154
described_class.new(
125-
"category:Announcements topic:#{feature_bug_post.topic.id},#{no_tag_post.topic.id}",
155+
"category:Announcements OR topic:#{feature_bug_post.topic.id},#{no_tag_post.topic.id}",
126156
)
127157
expect(filter.search.pluck(:id)).to contain_exactly(
128158
feature_post.id,
@@ -175,6 +205,25 @@
175205
Fabricate(:post, raw: "No fruits here", topic: no_tag_topic, user: user)
176206
end
177207

208+
fab!(:reply_on_bananas) do
209+
Fabricate(:post, raw: "Just a reply", topic: post_with_bananas.topic, user: user)
210+
end
211+
212+
it "correctly filters posts by topic_keywords" do
213+
topic1 = post_with_bananas.topic
214+
topic2 = post_with_both.topic
215+
216+
filter = described_class.new("topic_keywords:banana")
217+
expected = topic1.posts.pluck(:id) + topic2.posts.pluck(:id)
218+
expect(filter.search.pluck(:id)).to contain_exactly(*expected)
219+
220+
filter = described_class.new("topic_keywords:banana post_type:first")
221+
expect(filter.search.pluck(:id)).to contain_exactly(
222+
topic1.posts.order(:post_number).first.id,
223+
topic2.posts.order(:post_number).first.id,
224+
)
225+
end
226+
178227
it "correctly filters posts by full text keywords" do
179228
filter = described_class.new("keywords:apples")
180229
expect(filter.search.pluck(:id)).to contain_exactly(post_with_apples.id, post_with_both.id)

0 commit comments

Comments
 (0)