Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 07fc403

Browse files
committed
scan images as well during spam scan
(if llm supports it)
1 parent db678a8 commit 07fc403

File tree

3 files changed

+58
-18
lines changed

3 files changed

+58
-18
lines changed

lib/ai_moderation/entry_point.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ module AiModeration
55
class EntryPoint
66
def inject_into(plugin)
77
plugin.on(:post_created) { |post| SpamScanner.new_post(post) }
8-
98
plugin.on(:post_edited) { |post| SpamScanner.edited_post(post) }
9+
plugin.on(:post_process_cooked) { |_doc, post| SpamScanner.after_cooked_post(post) }
1010
end
1111
end
1212
end

lib/ai_moderation/spam_scanner.rb

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,30 @@ class SpamScanner
66
POSTS_TO_SCAN = 3
77
MINIMUM_EDIT_DIFFERENCE = 10
88
EDIT_DELAY_MINUTES = 10
9+
MAX_AGE_TO_SCAN = 1.day
10+
11+
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
912

1013
def self.new_post(post)
1114
return if !enabled?
1215
return if !should_scan_post?(post)
1316

14-
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
17+
flag_post_for_scanning(post)
18+
end
19+
20+
def self.after_cooked_post(post)
21+
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
22+
return if post.updated_at < MAX_AGE_TO_SCAN.ago
23+
24+
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
25+
26+
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
27+
delay_minutes =
28+
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
29+
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
30+
else
31+
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
32+
end
1533
end
1634

1735
def self.edited_post(post)
@@ -22,19 +40,14 @@ def self.edited_post(post)
2240
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
2341
current_version = post.raw
2442

25-
# Skip if we can't determine the difference or if change is too small
2643
return if !significant_change?(previous_version, current_version)
2744

28-
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
45+
flag_post_for_scanning(post)
46+
end
2947

30-
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
31-
# Schedule delayed job if too soon after last scan
32-
delay_minutes =
33-
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
34-
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
35-
else
36-
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
37-
end
48+
def self.flag_post_for_scanning(post)
49+
post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
50+
post.save_custom_fields
3851
end
3952

4053
def self.enabled?
@@ -84,7 +97,14 @@ def self.perform_scan(post)
8497
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
8598

8699
context = build_context(post)
87-
prompt.push(type: :user, content: context)
100+
101+
args = {type: :user, content: context}
102+
upload_ids = post.upload_ids
103+
if upload_ids.present?
104+
args[:upload_ids] = upload_ids.take(3)
105+
end
106+
107+
prompt.push(**args)
88108

89109
begin
90110
result =
@@ -156,6 +176,7 @@ def self.build_context(post)
156176
context.join("\n")
157177
end
158178

179+
159180
def self.build_system_prompt(custom_instructions)
160181
base_prompt = +<<~PROMPT
161182
You are a spam detection system. Analyze the following post content and context.

spec/lib/modules/ai_moderation/spam_scanner_spec.rb

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@
109109

110110
describe ".new_post" do
111111
it "enqueues spam scan job for eligible posts" do
112-
expect { described_class.new_post(post) }.to change(Jobs::AiSpamScan.jobs, :size).by(1)
112+
expect {
113+
described_class.new_post(post)
114+
described_class.after_cooked_post(post)
115+
}.to change(Jobs::AiSpamScan.jobs, :size).by(1)
113116
end
114117

115118
it "doesn't enqueue jobs when disabled" do
@@ -127,7 +130,10 @@
127130
},
128131
)
129132

130-
expect { described_class.edited_post(post) }.to change(Jobs::AiSpamScan.jobs, :size).by(1)
133+
expect {
134+
described_class.edited_post(post)
135+
described_class.after_cooked_post(post)
136+
}.to change(Jobs::AiSpamScan.jobs, :size).by(1)
131137
end
132138

133139
it "schedules delayed job when edited too soon after last scan" do
@@ -139,28 +145,41 @@
139145
created_at: 5.minutes.ago,
140146
)
141147

142-
expect { described_class.edited_post(post) }.to change(Jobs::AiSpamScan.jobs, :size).by(1)
148+
expect {
149+
described_class.edited_post(post)
150+
described_class.after_cooked_post(post)
151+
}.to change(Jobs::AiSpamScan.jobs, :size).by(1)
143152
end
144153
end
145154

146155
describe "integration test" do
147156
fab!(:llm_model)
148157
let(:api_audit_log) { Fabricate(:api_audit_log) }
158+
fab!(:post_with_uploaded_image)
149159

150160
before { Jobs.run_immediately! }
151161

152162
it "Correctly handles spam scanning" do
153-
# we need a proper audit log so
163+
# flag post for scanning
164+
post = post_with_uploaded_image
165+
166+
described_class.new_post(post)
167+
154168
prompt = nil
155169
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
156-
described_class.new_post(post)
170+
# force a rebake so we actually scan
171+
post.rebake!
157172
prompt = _prompts.first
158173
end
159174

160175
content = prompt.messages[1][:content]
161176
expect(content).to include(post.topic.title)
162177
expect(content).to include(post.raw)
163178

179+
upload_ids = prompt.messages[1][:upload_ids]
180+
expect(upload_ids).to be_present
181+
expect(upload_ids).to eq(post.upload_ids)
182+
164183
log = AiSpamLog.find_by(post: post)
165184

166185
expect(log.payload).to eq(content)

0 commit comments

Comments
 (0)