Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 9df12aa

Browse files
committed
Dedicated user for spam scanning
1 parent 07fc403 commit 9df12aa

File tree

4 files changed

+77
-8
lines changed

4 files changed

+77
-8
lines changed

config/settings.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ discourse_ai:
326326
default: false
327327
hidden: true
328328

329+
ai_spam_detection_user_id:
330+
default: ""
331+
hidden: true
332+
329333
ai_spam_detection_model_allowed_seeded_models:
330334
default: ""
331335
hidden: true

lib/ai_moderation/entry_point.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ def inject_into(plugin)
77
plugin.on(:post_created) { |post| SpamScanner.new_post(post) }
88
plugin.on(:post_edited) { |post| SpamScanner.edited_post(post) }
99
plugin.on(:post_process_cooked) { |_doc, post| SpamScanner.after_cooked_post(post) }
10+
11+
plugin.on(:site_setting_changed) do |name, _old_value, new_value|
12+
if name == :ai_spam_detection_enabled && new_value
13+
SpamScanner.ensure_flagging_user!
14+
end
15+
end
1016
end
1117
end
1218
end

lib/ai_moderation/spam_scanner.rb

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class SpamScanner
77
MINIMUM_EDIT_DIFFERENCE = 10
88
EDIT_DELAY_MINUTES = 10
99
MAX_AGE_TO_SCAN = 1.day
10+
MAX_RAW_SCAN_LENGTH = 5000
1011

1112
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
1213

@@ -17,6 +18,39 @@ def self.new_post(post)
1718
flag_post_for_scanning(post)
1819
end
1920

21+
def self.ensure_flagging_user!
22+
if !SiteSetting.ai_spam_detection_user_id.present?
23+
User.transaction do
24+
# prefer a "high" id for this bot
25+
id = User.where("id > -20").minimum(:id) - 1
26+
id = User.minimum(:id) - 1 if id == -100
27+
28+
user =
29+
User.create!(
30+
id: id,
31+
username: UserNameSuggester.suggest("discourse_ai_spam"),
32+
name: "Discourse AI Spam Scanner",
33+
email: "#{SecureRandom.hex(10)}@invalid.invalid",
34+
active: true,
35+
approved: true,
36+
trust_level: TrustLevel[4],
37+
admin: true,
38+
)
39+
Group.user_trust_level_change!(user.id, user.trust_level)
40+
41+
SiteSetting.ai_spam_detection_user_id = user.id
42+
end
43+
end
44+
end
45+
46+
def self.flagging_user
47+
user = nil
48+
if SiteSetting.ai_spam_detection_user_id.present?
49+
user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
50+
end
51+
user || Discourse.system_user
52+
end
53+
2054
def self.after_cooked_post(post)
2155
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
2256
return if post.updated_at < MAX_AGE_TO_SCAN.ago
@@ -98,11 +132,9 @@ def self.perform_scan(post)
98132

99133
context = build_context(post)
100134

101-
args = {type: :user, content: context}
135+
args = { type: :user, content: context }
102136
upload_ids = post.upload_ids
103-
if upload_ids.present?
104-
args[:upload_ids] = upload_ids.take(3)
105-
end
137+
args[:upload_ids] = upload_ids.take(3) if upload_ids.present?
106138

107139
prompt.push(**args)
108140

@@ -171,12 +203,11 @@ def self.build_context(post)
171203
context << "- Total posts: #{post.user.post_count}"
172204
context << "- Trust level: #{post.user.trust_level}"
173205

174-
context << "\nPost Content:"
175-
context << post.raw
206+
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
207+
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
176208
context.join("\n")
177209
end
178210

179-
180211
def self.build_system_prompt(custom_instructions)
181212
base_prompt = +<<~PROMPT
182213
You are a spam detection system. Analyze the following post content and context.
@@ -228,7 +259,7 @@ def self.handle_spam(post, log)
228259

229260
result =
230261
PostActionCreator.new(
231-
Discourse.system_user,
262+
flagging_user,
232263
post,
233264
PostActionType.types[:spam],
234265
reason: reason,
@@ -237,6 +268,30 @@ def self.handle_spam(post, log)
237268

238269
log.update!(reviewable: result.reviewable)
239270
SpamRule::AutoSilence.new(post.user, post).silence_user
271+
# this is required cause tl1 is not auto hidden
272+
# we want to also handle tl1
273+
hide_posts_and_topics(post.user)
274+
end
275+
276+
def self.hide_posts_and_topics(user)
277+
Post
278+
.where(user_id: user.id)
279+
.where("created_at > ?", 24.hours.ago)
280+
.update_all(
281+
[
282+
"hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
283+
Post.hidden_reasons[:new_user_spam_threshold_reached],
284+
],
285+
)
286+
topic_ids = Post
287+
.where(user_id: user.id, post_number: 1)
288+
.where("created_at > ?", 24.hours.ago)
289+
.select(:topic_id)
290+
291+
Topic
292+
.where(id: topic_ids)
293+
.update_all(visible: false)
294+
240295
end
241296
end
242297
end

spec/lib/modules/ai_moderation/spam_scanner_spec.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@
160160
before { Jobs.run_immediately! }
161161

162162
it "Correctly handles spam scanning" do
163+
164+
expect(described_class.flagging_user.id).not_to eq(Discourse.system_user.id)
165+
163166
# flag post for scanning
164167
post = post_with_uploaded_image
165168

@@ -188,6 +191,7 @@
188191
expect(post.topic.reload.visible).to eq(false)
189192

190193
expect(log.reviewable).to be_present
194+
expect(log.reviewable.created_by_id).to eq(described_class.flagging_user.id)
191195
end
192196
end
193197
end

0 commit comments

Comments
 (0)