@@ -4,103 +4,175 @@ module DiscourseAi
44 module Utils
55 module Research
66 class Filter
7- attr_reader :raw_filter , :parsed_components , :current_offset , :batch_size
8-
9- VALID_FILTER_PATTERNS = {
10- user : /\@ (\w +)/ ,
11- before : /before:(\d {4}-\d {2}-\d {2})/ ,
12- after : /after:(\d {4}-\d {2}-\d {2})/ ,
13- category : /category:([a-zA-Z0-9_\- ]+)/ ,
14- tag : /tag:([a-zA-Z0-9_\- ]+)/ ,
15- group : /group:([a-zA-Z0-9_\- ]+)/ ,
16- status : /status:(open|closed|archived|noreplies|single_user)/ ,
17- }
18-
19- DEFAULT_BATCH_SIZE = 20
20-
21- def initialize ( filter_string , batch_size : DEFAULT_BATCH_SIZE )
22- @raw_filter = filter_string . to_s
23- @batch_size = batch_size
24- @current_offset = 0
25- @parsed_components = parse_filter
26- end
27-
28- def parse_filter
29- components = {
30- users : [ ] ,
31- categories : [ ] ,
32- tags : [ ] ,
33- groups : [ ] ,
34- date_range : {
35- } ,
36- status : nil ,
37- raw : @raw_filter ,
38- }
39-
40- # Extract user mentions
41- @raw_filter
42- . scan ( VALID_FILTER_PATTERNS [ :user ] )
43- . each { |match | components [ :users ] << match [ 0 ] }
44-
45- # Extract date ranges
46- if before_match = @raw_filter . match ( VALID_FILTER_PATTERNS [ :before ] )
47- components [ :date_range ] [ :before ] = before_match [ 1 ]
7+ # Stores custom filter handlers
8+ def self . register_filter ( matcher , &block )
9+ ( @registered_filters ||= { } ) [ matcher ] = block
10+ end
11+
12+ def self . registered_filters
13+ @registered_filters ||= { }
14+ end
15+
16+ def self . word_to_date ( str )
17+ ::Search . word_to_date ( str )
18+ end
19+
20+ attr_reader :term , :filters , :order , :guardian , :limit , :offset
21+
22+ # Define all filters at class level
23+ register_filter ( /\A status:open\z /i ) do |relation , _ , _ |
24+ relation . where ( "topics.closed = false AND topics.archived = false" )
25+ end
26+
27+ register_filter ( /\A status:closed\z /i ) do |relation , _ , _ |
28+ relation . where ( "topics.closed = true" )
29+ end
30+
31+ register_filter ( /\A status:archived\z /i ) do |relation , _ , _ |
32+ relation . where ( "topics.archived = true" )
33+ end
34+
35+ register_filter ( /\A status:noreplies\z /i ) do |relation , _ , _ |
36+ relation . where ( "topics.posts_count = 1" )
37+ end
38+
39+ register_filter ( /\A status:single_user\z /i ) do |relation , _ , _ |
40+ relation . where ( "topics.participant_count = 1" )
41+ end
42+
43+ # Date filters
44+ register_filter ( /\A before:(.*)\z /i ) do |relation , date_str , _ |
45+ if date = Filter . word_to_date ( date_str )
46+ relation . where ( "posts.created_at < ?" , date )
47+ else
48+ relation
4849 end
50+ end
4951
50- if after_match = @raw_filter . match ( VALID_FILTER_PATTERNS [ :after ] )
51- components [ :date_range ] [ :after ] = after_match [ 1 ]
52+ register_filter ( /\A after:(.*)\z /i ) do |relation , date_str , _ |
53+ if date = Filter . word_to_date ( date_str )
54+ relation . where ( "posts.created_at > ?" , date )
55+ else
56+ relation
5257 end
58+ end
5359
54- # Extract categories
55- @raw_filter
56- . scan ( VALID_FILTER_PATTERNS [ :category ] )
57- . each { |match | components [ :categories ] << match [ 0 ] }
60+ # Category filter
61+ register_filter ( /\A category:([a-zA-Z0-9_\- ]+)\z /i ) do |relation , slug , _ |
62+ category = Category . find_by ( "LOWER(slug) = LOWER(?)" , slug )
63+ if category
64+ category_ids = [ category . id ]
65+ category_ids +=
66+ Category . subcategory_ids ( category . id ) if category . subcategory_ids . present?
67+ relation . where ( "topics.category_id IN (?)" , category_ids )
68+ else
69+ relation . where ( "1 = 0" ) # No results if category doesn't exist
70+ end
71+ end
5872
59- # Extract tags
60- @raw_filter
61- . scan ( VALID_FILTER_PATTERNS [ :tag ] )
62- . each { |match | components [ :tags ] << match [ 0 ] }
73+ # Tag filter
74+ register_filter ( /\A tag:([a-zA-Z0-9_\- ]+)\z /i ) do |relation , name , _ |
75+ tag = Tag . find_by_name ( name )
76+ if tag
77+ relation . joins ( "INNER JOIN topic_tags ON topic_tags.topic_id = topics.id" ) . where (
78+ "topic_tags.tag_id = ?" ,
79+ tag . id ,
80+ )
81+ else
82+ relation . where ( "1 = 0" ) # No results if tag doesn't exist
83+ end
84+ end
6385
64- # Extract groups
65- @raw_filter
66- . scan ( VALID_FILTER_PATTERNS [ :group ] )
67- . each { |match | components [ :groups ] << match [ 0 ] }
86+ # User filter
87+ register_filter ( /\A \@ (\w +)\z /i ) do |relation , username , filter |
88+ user = User . find_by ( username_lower : username . downcase )
89+ if user
90+ relation . where ( "posts.user_id = ?" , user . id )
91+ else
92+ relation . where ( "1 = 0" ) # No results if user doesn't exist
93+ end
94+ end
6895
69- # Extract status
70- if status_match = @raw_filter . match ( VALID_FILTER_PATTERNS [ :status ] )
71- components [ :status ] = status_match [ 1 ]
96+ # Posted by current user
97+ register_filter ( /\A in:posted\z /i ) do |relation , _ , filter |
98+ if filter . guardian . user
99+ relation . where ( "posts.user_id = ?" , filter . guardian . user . id )
100+ else
101+ relation . where ( "1 = 0" ) # No results if not logged in
72102 end
103+ end
73104
74- components
105+ register_filter ( /\A group:([a-zA-Z0-9_\- ]+)\z /i ) do |relation , name , filter |
106+ group = Group . find_by ( "name ILIKE ?" , name )
107+ if group
108+ relation . where (
109+ "posts.user_id IN (
110+ SELECT gu.user_id FROM group_users gu
111+ WHERE gu.group_id = ?
112+ )" ,
113+ group . id ,
114+ )
115+ else
116+ relation . where ( "1 = 0" ) # No results if group doesn't exist
117+ end
75118 end
76119
77- def next_batch
78- previous_offset = @current_offset
79- @current_offset += @batch_size
80- previous_offset
120+ def initialize ( term , guardian : nil , limit : nil , offset : nil )
121+ @term = term . to_s
122+ @guardian = guardian || Guardian . new
123+ @limit = limit
124+ @offset = offset
125+ @filters = [ ]
126+ @valid = true
127+
128+ @term = process_filters ( @term )
81129 end
82130
83- def reset_batch
84- @current_offset = 0
131+ def search
132+ filtered = Post . secured ( @guardian ) . joins ( :topic ) . merge ( Topic . secured ( @guardian ) )
133+
134+ @filters . each do |filter_block , match_data |
135+ filtered = filter_block . call ( filtered , match_data , self )
136+ end
137+
138+ filtered = filtered . limit ( @limit ) if @limit . to_i > 0
139+ filtered = filtered . offset ( @offset ) if @offset . to_i > 0
140+
141+ filtered
85142 end
86143
87- def to_query_params
88- params = { }
89- params [ :username ] = parsed_components [ :users ] . first if parsed_components [ :users ] . any?
90- params [ :before ] = parsed_components [ :date_range ] [ :before ] if parsed_components [
91- :date_range
92- ] [
93- :before
94- ]
95- params [ :after ] = parsed_components [ :date_range ] [ :after ] if parsed_components [ :date_range ] [
96- :after
97- ]
98- params [ :category ] = parsed_components [ :categories ] . first if parsed_components [
99- :categories
100- ] . any?
101- params [ :tags ] = parsed_components [ :tags ] . join ( "," ) if parsed_components [ :tags ] . any?
102- params [ :status ] = parsed_components [ :status ] if parsed_components [ :status ]
103- params
144+ private
145+
146+ def process_filters ( term )
147+ return "" if term . blank?
148+
149+ term
150+ . to_s
151+ . scan ( /(([^" \t \n \x0B \f \r ]+)?(("[^"]+")?))/ )
152+ . to_a
153+ . map do |( word , _ ) |
154+ next if word . blank?
155+
156+ # Check for order:xxx syntax
157+ if word =~ /\A order:(\w +)\z /i
158+ @order = $1. downcase . to_sym
159+ next nil
160+ end
161+
162+ # Check registered filters
163+ found = false
164+ self . class . registered_filters . each do |matcher , block |
165+ if word =~ matcher
166+ @filters << [ block , $1]
167+ found = true
168+ break
169+ end
170+ end
171+
172+ found ? nil : word
173+ end
174+ . compact
175+ . join ( " " )
104176 end
105177 end
106178 end
0 commit comments