@@ -7,6 +7,7 @@ class SpamScanner
77 MINIMUM_EDIT_DIFFERENCE = 10
88 EDIT_DELAY_MINUTES = 10
99 MAX_AGE_TO_SCAN = 1 . day
10+ MAX_RAW_SCAN_LENGTH = 5000
1011
1112 SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
1213
@@ -17,6 +18,39 @@ def self.new_post(post)
1718 flag_post_for_scanning ( post )
1819 end
1920
21+ def self . ensure_flagging_user!
22+ if !SiteSetting . ai_spam_detection_user_id . present?
23+ User . transaction do
24+ # prefer a "high" id for this bot
25+ id = User . where ( "id > -20" ) . minimum ( :id ) - 1
26+ id = User . minimum ( :id ) - 1 if id == -100
27+
28+ user =
29+ User . create! (
30+ id : id ,
31+ username : UserNameSuggester . suggest ( "discourse_ai_spam" ) ,
32+ name : "Discourse AI Spam Scanner" ,
33+ email : "#{ SecureRandom . hex ( 10 ) } @invalid.invalid" ,
34+ active : true ,
35+ approved : true ,
36+ trust_level : TrustLevel [ 4 ] ,
37+ admin : true ,
38+ )
39+ Group . user_trust_level_change! ( user . id , user . trust_level )
40+
41+ SiteSetting . ai_spam_detection_user_id = user . id
42+ end
43+ end
44+ end
45+
46+ def self . flagging_user
47+ user = nil
48+ if SiteSetting . ai_spam_detection_user_id . present?
49+ user = User . find_by ( id : SiteSetting . ai_spam_detection_user_id )
50+ end
51+ user || Discourse . system_user
52+ end
53+
2054 def self . after_cooked_post ( post )
2155 return if !post . custom_fields [ SHOULD_SCAN_POST_CUSTOM_FIELD ]
2256 return if post . updated_at < MAX_AGE_TO_SCAN . ago
@@ -98,11 +132,9 @@ def self.perform_scan(post)
98132
99133 context = build_context ( post )
100134
101- args = { type : :user , content : context }
135+ args = { type : :user , content : context }
102136 upload_ids = post . upload_ids
103- if upload_ids . present?
104- args [ :upload_ids ] = upload_ids . take ( 3 )
105- end
137+ args [ :upload_ids ] = upload_ids . take ( 3 ) if upload_ids . present?
106138
107139 prompt . push ( **args )
108140
@@ -171,12 +203,11 @@ def self.build_context(post)
171203 context << "- Total posts: #{ post . user . post_count } "
172204 context << "- Trust level: #{ post . user . trust_level } "
173205
174- context << "\n Post Content: "
175- context << post . raw
206+ context << "\n Post Content (first #{ MAX_RAW_SCAN_LENGTH } chars): \n "
207+ context << post . raw [ 0 .. MAX_RAW_SCAN_LENGTH ]
176208 context . join ( "\n " )
177209 end
178210
179-
180211 def self . build_system_prompt ( custom_instructions )
181212 base_prompt = +<<~PROMPT
182213 You are a spam detection system. Analyze the following post content and context.
@@ -228,7 +259,7 @@ def self.handle_spam(post, log)
228259
229260 result =
230261 PostActionCreator . new (
231- Discourse . system_user ,
262+ flagging_user ,
232263 post ,
233264 PostActionType . types [ :spam ] ,
234265 reason : reason ,
@@ -237,6 +268,30 @@ def self.handle_spam(post, log)
237268
238269 log . update! ( reviewable : result . reviewable )
239270 SpamRule ::AutoSilence . new ( post . user , post ) . silence_user
271+ # this is required cause tl1 is not auto hidden
272+ # we want to also handle tl1
273+ hide_posts_and_topics ( post . user )
274+ end
275+
276+ def self . hide_posts_and_topics ( user )
277+ Post
278+ . where ( user_id : user . id )
279+ . where ( "created_at > ?" , 24 . hours . ago )
280+ . update_all (
281+ [
282+ "hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)" ,
283+ Post . hidden_reasons [ :new_user_spam_threshold_reached ] ,
284+ ] ,
285+ )
286+ topic_ids = Post
287+ . where ( user_id : user . id , post_number : 1 )
288+ . where ( "created_at > ?" , 24 . hours . ago )
289+ . select ( :topic_id )
290+
291+ Topic
292+ . where ( id : topic_ids )
293+ . update_all ( visible : false )
294+
240295 end
241296 end
242297 end
0 commit comments