Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 0cb14a3

Browse files
committed
added some code for spam scanner
work in progress
1 parent af986be commit 0cb14a3

File tree

3 files changed

+219
-6
lines changed

3 files changed

+219
-6
lines changed

app/jobs/regular/ai_spam_scan.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class AiSpamScan < ::Jobs::Base
5+
def execute(args)
6+
return if !args[:post_id]
7+
post = Post.find_by(id: args[:post_id])
8+
return if !post
9+
10+
DiscourseAi::AiModeration::SpamScanner.perform_scan(post)
11+
end
12+
end
13+
end

app/models/ai_spam_log.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# frozen_string_literal: true
2+
class AiSpamLog < ActiveRecord::Base
3+
end

lib/ai_moderation/entry_point.rb

Lines changed: 203 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,213 @@
22

33
module DiscourseAi
44
module AiModeration
5-
class EntryPoint
6-
def inject_into(plugin)
7-
plugin.on(:post_created) do |post|
8-
SpamScanner.new_post(post)
5+
class SpamScanner
6+
POSTS_TO_SCAN = 3
7+
MINIMUM_EDIT_DIFFERENCE = 10
8+
EDIT_DELAY_MINUTES = 10
9+
10+
def self.new_post(post)
11+
return if !enabled?
12+
return if !should_scan_post?(post)
13+
14+
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
15+
end
16+
17+
def self.edited_post(post)
18+
return if !enabled?
19+
return if !should_scan_post?(post)
20+
return if scanned_max_times?(post)
21+
22+
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
23+
current_version = post.raw
24+
25+
# Skip if we can't determine the difference or if change is too small
26+
return if !significant_change?(previous_version, current_version)
27+
28+
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
29+
30+
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
31+
# Schedule delayed job if too soon after last scan
32+
delay_minutes =
33+
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
34+
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
35+
else
36+
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
937
end
38+
end
39+
40+
def self.enabled?
41+
SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
42+
end
43+
44+
def self.should_scan_post?(post)
45+
return false if !post.present?
46+
return false if post.user.trust_level > TrustLevel[1]
47+
return false if post.user.post_count > POSTS_TO_SCAN
48+
return false if post.topic.private_message?
49+
true
50+
end
51+
52+
def self.scanned_max_times?(post)
53+
AiSpamLog.where(post_id: post.id).sum(:scan_count) >= 3
54+
end
1055

11-
plugin.on(:post_edited) do |post|
12-
SpamScanner.edited_post(post)
56+
def self.significant_change?(previous_version, current_version)
57+
return true if previous_version.nil? # First edit should be scanned
58+
59+
# Use Discourse's built-in levenshtein implementation
60+
distance =
61+
ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
62+
63+
distance >= MINIMUM_EDIT_DIFFERENCE
64+
end
65+
66+
def self.perform_scan(post)
67+
return if !enabled?
68+
return if !should_scan_post?(post)
69+
70+
settings = AiModerationSetting.spam
71+
return if !settings || !settings.llm_model
72+
73+
llm = settings.llm_model.to_llm
74+
custom_instructions = settings.custom_instructions.presence
75+
76+
system_prompt = build_system_prompt(custom_instructions)
77+
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
78+
79+
context = build_context(post)
80+
prompt.push(type: :user, content: context)
81+
82+
begin
83+
result =
84+
llm.generate(
85+
prompt,
86+
temperature: 0.1,
87+
max_tokens: 100,
88+
user: Discourse.system_user,
89+
feature_name: "spam_detection",
90+
feature_context: {
91+
post_id: post.id,
92+
},
93+
)&.strip
94+
95+
is_spam = result.present? && result.downcase.include?("spam")
96+
97+
create_log_entry(post, settings.llm_model, result, is_spam)
98+
99+
handle_spam(post, result) if is_spam
100+
rescue StandardError => e
101+
Rails.logger.error("Error in SpamScanner for post #{post.id}: #{e.message}")
13102
end
14103
end
104+
105+
private
106+
107+
def self.build_context(post)
108+
context = []
109+
110+
# Clear distinction between reply and new topic
111+
if post.is_first_post?
112+
context << "NEW TOPIC POST ANALYSIS"
113+
context << "- Topic title: #{post.topic.title}"
114+
context << "- Category: #{post.topic.category&.name}"
115+
else
116+
context << "REPLY POST ANALYSIS"
117+
context << "- In topic: #{post.topic.title}"
118+
context << "- Topic started by: #{post.topic.user.username}"
119+
120+
# Include parent post context for replies
121+
if post.reply_to_post.present?
122+
parent = post.reply_to_post
123+
context << "\nReplying to #{parent.user.username}'s post:"
124+
context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
125+
context << parent.raw if parent.raw.length <= 500
126+
end
127+
end
128+
129+
context << "\nPost Author Information:"
130+
context << "- Username: #{post.user.username}"
131+
context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days"
132+
context << "- Total posts: #{post.user.post_count}"
133+
context << "- Trust level: #{post.user.trust_level}"
134+
135+
context << "\nPost Content:"
136+
context << post.raw
137+
138+
if post.linked_urls.present?
139+
context << "\nLinks in post:"
140+
context << post.linked_urls.join(", ")
141+
end
142+
143+
context.join("\n")
144+
end
145+
146+
def self.build_system_prompt(custom_instructions)
147+
base_prompt = <<~PROMPT
148+
You are a spam detection system. Analyze the following post content and context.
149+
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
150+
151+
Consider the post type carefully:
152+
- For REPLY posts: Check if the response is relevant and topical to the thread
153+
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
154+
155+
A post is spam if it matches any of these criteria:
156+
- Contains unsolicited commercial content or promotions
157+
- Has suspicious or unrelated external links
158+
- Shows patterns of automated/bot posting
159+
- Contains irrelevant content or advertisements
160+
- For replies: Completely unrelated to the discussion thread
161+
- Uses excessive keywords or repetitive text patterns
162+
- Shows suspicious formatting or character usage
163+
164+
Be especially strict with:
165+
- Replies that ignore the previous conversation
166+
- Posts containing multiple unrelated external links
167+
- Generic responses that could be posted anywhere
168+
169+
Be fair to:
170+
- New users making legitimate first contributions
171+
- Non-native speakers making genuine efforts to participate
172+
- Topic-relevant product mentions in appropriate contexts
173+
PROMPT
174+
175+
if custom_instructions.present?
176+
base_prompt += "\n\nAdditional site-specific instructions:\n#{custom_instructions}"
177+
end
178+
179+
base_prompt
180+
end
181+
182+
def self.create_log_entry(post, llm_model, result, is_spam)
183+
log = AiSpamLog.find_or_initialize_by(post_id: post.id)
184+
185+
if log.new_record?
186+
log.llm_model = llm_model
187+
log.is_spam = is_spam
188+
log.scan_count = 1
189+
else
190+
log.scan_count += 1
191+
end
192+
193+
last_audit = DiscourseAi::ApiAuditLog.last
194+
log.last_ai_api_audit_log_id = last_audit.id if last_audit
195+
196+
log.save!
197+
end
198+
199+
def self.handle_spam(post, result)
200+
SpamRule::AutoSilence.new(post.user, post).silence_user
201+
202+
reason = I18n.t("discourse_ai.spam_detection.flag_reason", result: result)
203+
204+
PostActionCreator.new(
205+
Discourse.system_user,
206+
post,
207+
PostActionType.types[:spam],
208+
message: reason,
209+
queue_for_review: true,
210+
).perform
211+
end
15212
end
16213
end
17214
end

0 commit comments

Comments
 (0)