Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 47f5da7

Browse files
SamSaffronkeegangeorgemartin-brennan
authored
FEATURE: Add AI-powered spam detection for new user posts (#1004)
This introduces a comprehensive spam detection system that uses LLM models to automatically identify and flag potential spam posts. The system is designed to be both powerful and configurable while preventing false positives. Key Features: * Automatically scans first 3 posts from new users (TL0/TL1) * Creates dedicated AI flagging user to distinguish from system flags * Tracks false positives/negatives for quality monitoring * Supports custom instructions to fine-tune detection * Includes test interface for trying detection on any post Technical Implementation: * New database tables: - ai_spam_logs: Stores scan history and results - ai_moderation_settings: Stores LLM config and custom instructions * Rate limiting and safeguards: - Minimum 10-minute delay between rescans - Only scans significant edits (>10 char difference) - Maximum 3 scans per post - 24-hour maximum age for scannable posts * Admin UI features: - Real-time testing capabilities - 7-day statistics dashboard - Configurable LLM model selection - Custom instruction support Security and Performance: * Respects trust levels - only scans TL0/TL1 users * Skips private messages entirely * Stops scanning users after 3 successful public posts * Includes comprehensive test coverage * Maintains audit log of all scan attempts --------- Co-authored-by: Keegan George <[email protected]> Co-authored-by: Martin Brennan <[email protected]>
1 parent ae80494 commit 47f5da7

File tree

27 files changed

+1801
-6
lines changed

27 files changed

+1801
-6
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import { service } from "@ember/service";
2+
import { ajax } from "discourse/lib/ajax";
3+
import DiscourseRoute from "discourse/routes/discourse";
4+
5+
export default class DiscourseAiSpamRoute extends DiscourseRoute {
6+
@service store;
7+
8+
model() {
9+
return ajax("/admin/plugins/discourse-ai/ai-spam.json");
10+
}
11+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<AiSpam @model={{this.model}} />
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Admin
5+
class AiSpamController < ::Admin::AdminController
6+
requires_plugin "discourse-ai"
7+
8+
def show
9+
render json: AiSpamSerializer.new(spam_config, root: false)
10+
end
11+
12+
def update
13+
updated_params = {}
14+
if allowed_params.key?(:llm_model_id)
15+
llm_model_id = updated_params[:llm_model_id] = allowed_params[:llm_model_id]
16+
if llm_model_id.to_i < 0 &&
17+
!SiteSetting.ai_spam_detection_model_allowed_seeded_models_map.include?(
18+
"custom:#{llm_model_id}",
19+
)
20+
return(
21+
render_json_error(
22+
I18n.t("discourse_ai.llm.configuration.invalid_seeded_model"),
23+
status: 422,
24+
)
25+
)
26+
end
27+
end
28+
updated_params[:data] = {
29+
custom_instructions: allowed_params[:custom_instructions],
30+
} if allowed_params.key?(:custom_instructions)
31+
32+
if updated_params.present?
33+
# not using upsert cause we will not get the correct validation errors
34+
if AiModerationSetting.spam
35+
AiModerationSetting.spam.update!(updated_params)
36+
else
37+
AiModerationSetting.create!(updated_params.merge(setting_type: :spam))
38+
end
39+
end
40+
41+
is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled])
42+
43+
if allowed_params.key?(:is_enabled)
44+
if is_enabled && !AiModerationSetting.spam&.llm_model_id
45+
return(
46+
render_json_error(
47+
I18n.t("discourse_ai.llm.configuration.must_select_model"),
48+
status: 422,
49+
)
50+
)
51+
end
52+
53+
SiteSetting.ai_spam_detection_enabled = is_enabled
54+
end
55+
56+
render json: AiSpamSerializer.new(spam_config, root: false)
57+
end
58+
59+
def test
60+
url = params[:post_url].to_s
61+
post = nil
62+
63+
if url.match?(/^\d+$/)
64+
post_id = url.to_i
65+
post = Post.find_by(id: post_id)
66+
end
67+
68+
route = UrlHelper.rails_route_from_url(url) if !post
69+
70+
if route
71+
if route[:controller] == "topics"
72+
post_number = route[:post_number] || 1
73+
post = Post.with_deleted.find_by(post_number: post_number, topic_id: route[:topic_id])
74+
end
75+
end
76+
77+
raise Discourse::NotFound if !post
78+
79+
result =
80+
DiscourseAi::AiModeration::SpamScanner.test_post(
81+
post,
82+
custom_instructions: params[:custom_instructions],
83+
llm_id: params[:llm_id],
84+
)
85+
86+
render json: result
87+
end
88+
89+
private
90+
91+
def allowed_params
92+
params.permit(:is_enabled, :llm_model_id, :custom_instructions)
93+
end
94+
95+
def spam_config
96+
spam_config = {
97+
enabled: SiteSetting.ai_spam_detection_enabled,
98+
settings: AiModerationSetting.spam,
99+
}
100+
101+
spam_config[:stats] = DiscourseAi::AiModeration::SpamReport.generate(min_date: 1.week.ago)
102+
103+
if spam_config[:stats].scanned_count > 0
104+
spam_config[
105+
:flagging_username
106+
] = DiscourseAi::AiModeration::SpamScanner.flagging_user&.username
107+
end
108+
spam_config
109+
end
110+
end
111+
end
112+
end

app/jobs/regular/ai_spam_scan.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# frozen_string_literal: true
2+
3+
module Jobs
4+
class AiSpamScan < ::Jobs::Base
5+
def execute(args)
6+
return if !args[:post_id]
7+
post = Post.find_by(id: args[:post_id])
8+
return if !post
9+
10+
DiscourseAi::AiModeration::SpamScanner.perform_scan(post)
11+
end
12+
end
13+
end
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# frozen_string_literal: true
2+
class AiModerationSetting < ActiveRecord::Base
3+
belongs_to :llm_model
4+
5+
validates :llm_model_id, presence: true
6+
validates :setting_type, presence: true
7+
validates :setting_type, uniqueness: true
8+
9+
def self.spam
10+
find_by(setting_type: :spam)
11+
end
12+
13+
def custom_instructions
14+
data["custom_instructions"]
15+
end
16+
end
17+
18+
# == Schema Information
19+
#
20+
# Table name: ai_moderation_settings
21+
#
22+
# id :bigint not null, primary key
23+
# setting_type :enum not null
24+
# data :jsonb
25+
# llm_model_id :bigint not null
26+
# created_at :datetime not null
27+
# updated_at :datetime not null
28+
#
29+
# Indexes
30+
#
31+
# index_ai_moderation_settings_on_setting_type (setting_type) UNIQUE
32+
#

app/models/ai_spam_log.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# frozen_string_literal: true
2+
class AiSpamLog < ActiveRecord::Base
3+
belongs_to :post
4+
belongs_to :llm_model
5+
belongs_to :ai_api_audit_log
6+
belongs_to :reviewable
7+
end
8+
9+
# == Schema Information
10+
#
11+
# Table name: ai_spam_logs
12+
#
13+
# id :bigint not null, primary key
14+
# post_id :bigint not null
15+
# llm_model_id :bigint not null
16+
# ai_api_audit_log_id :bigint
17+
# reviewable_id :bigint
18+
# is_spam :boolean not null
19+
# payload :string(20000) default(""), not null
20+
# created_at :datetime not null
21+
# updated_at :datetime not null
22+
#
23+
# Indexes
24+
#
25+
# index_ai_spam_logs_on_post_id (post_id)
26+
#

app/models/llm_model.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,11 @@ def self.provider_params
5656
end
5757

5858
def to_llm
59-
DiscourseAi::Completions::Llm.proxy("custom:#{id}")
59+
DiscourseAi::Completions::Llm.proxy(identifier)
60+
end
61+
62+
def identifier
63+
"custom:#{id}"
6064
end
6165

6266
def toggle_companion_user
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# frozen_string_literal: true
2+
3+
class AiSpamSerializer < ApplicationSerializer
4+
attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username
5+
6+
def is_enabled
7+
object[:enabled]
8+
end
9+
10+
def llm_id
11+
settings&.llm_model&.id
12+
end
13+
14+
def custom_instructions
15+
settings&.custom_instructions
16+
end
17+
18+
def available_llms
19+
DiscourseAi::Configuration::LlmEnumerator
20+
.values(allowed_seeded_llms: SiteSetting.ai_spam_detection_model_allowed_seeded_models_map)
21+
.map { |hash| { id: hash[:value], name: hash[:name] } }
22+
end
23+
24+
def flagging_username
25+
object[:flagging_username]
26+
end
27+
28+
def stats
29+
{
30+
scanned_count: object[:stats].scanned_count.to_i,
31+
spam_detected: object[:stats].spam_detected.to_i,
32+
false_positives: object[:stats].false_positives.to_i,
33+
false_negatives: object[:stats].false_negatives.to_i,
34+
}
35+
end
36+
37+
def settings
38+
object[:settings]
39+
end
40+
end

assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export default {
1818
this.route("new");
1919
this.route("show", { path: "/:id" });
2020
});
21+
this.route("discourse-ai-spam", { path: "ai-spam" });
2122
this.route("discourse-ai-usage", { path: "ai-usage" });
2223
},
2324
};

0 commit comments

Comments
 (0)