Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion app/serializers/ai_spam_serializer.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# frozen_string_literal: true

class AiSpamSerializer < ApplicationSerializer
attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username
attributes :is_enabled,
:llm_id,
:custom_instructions,
:available_llms,
:stats,
:flagging_username,
:spam_score_type

def is_enabled
object[:enabled]
Expand All @@ -25,6 +31,10 @@ def flagging_username
object[:flagging_username]
end

def spam_score_type
ReviewableScore.types[:spam]
end

def stats
{
scanned_count: object[:stats].scanned_count.to_i,
Expand Down
35 changes: 24 additions & 11 deletions assets/javascripts/discourse/components/ai-spam.gjs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,30 @@ export default class AiSpam extends Component {
label: i18n("discourse_ai.spam.spam_detected"),
value: this.stats.spam_detected,
};

const falsePositives = {
label: i18n("discourse_ai.spam.false_positives"),
value: this.stats.false_positives,
tooltip: i18n("discourse_ai.spam.stat_tooltips.incorrectly_flagged"),
};

const falseNegatives = {
label: i18n("discourse_ai.spam.false_negatives"),
value: this.stats.false_negatives,
tooltip: i18n("discourse_ai.spam.stat_tooltips.missed_spam"),
};

if (this.args.model.flagging_username) {
detected.href = getURL(
"/review?flagged_by=" + this.args.model.flagging_username
`/review?flagged_by=${this.args.model.flagging_username}&status=all&sort_order=created_at`
);

falsePositives.href = getURL(
`/review?flagged_by=${this.args.model.flagging_username}&status=rejected&sort_order=created_at`
);

falseNegatives.href = getURL(
`/review?status=approved&sort_order=created_at&additional_filters={"ai_spam_false_negative":true}&order=created&score_type=${this.args.model.spam_score_type}`
);
}
return [
Expand All @@ -136,16 +157,8 @@ export default class AiSpam extends Component {
value: this.stats.scanned_count,
},
detected,
{
label: i18n("discourse_ai.spam.false_positives"),
value: this.stats.false_positives,
tooltip: i18n("discourse_ai.spam.stat_tooltips.incorrectly_flagged"),
},
{
label: i18n("discourse_ai.spam.false_negatives"),
value: this.stats.false_negatives,
tooltip: i18n("discourse_ai.spam.stat_tooltips.missed_spam"),
},
falsePositives,
falseNegatives,
];
}

Expand Down
19 changes: 19 additions & 0 deletions lib/ai_moderation/entry_point.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,25 @@ def inject_into(plugin)
plugin.on(:site_setting_changed) do |name, _old_value, new_value|
SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value
end

custom_filter = [
:ai_spam_false_negative,
Proc.new do |results, value|
if value
results.where(<<~SQL)
EXISTS (
SELECT 1 FROM ai_spam_logs
WHERE NOT is_spam
AND post_id = target_id AND target_type = 'Post'
)
SQL
else
results
end
end,
]

Reviewable.add_custom_filter(custom_filter)
end
end
end
Expand Down
33 changes: 17 additions & 16 deletions lib/ai_moderation/spam_report.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,34 @@ def self.generate(min_date: 1.week.ago)
asl.post_id,
asl.is_spam,
r.status as reviewable_status,
r.target_type,
r.potential_spam
CASE WHEN EXISTS (
SELECT 1 FROM reviewable_scores rs
JOIN reviewables r1 ON r1.id = rs.reviewable_id
WHERE r1.target_id = asl.post_id
AND r1.target_type = 'Post'
AND rs.reviewable_score_type = :spam_score_type
AND NOT is_spam
AND r1.status IN (:spam)
) THEN true ELSE false END AS missed_spam
FROM ai_spam_logs asl
LEFT JOIN reviewables r ON r.id = asl.reviewable_id
WHERE asl.created_at > :min_date
),
post_reviewables AS (
SELECT
target_id post_id,
COUNT(DISTINCT target_id) as false_negative_count
FROM reviewables
WHERE target_type = 'Post'
AND status IN (:spam)
AND potential_spam
AND target_id IN (SELECT post_id FROM spam_stats)
GROUP BY target_id
)
SELECT
COUNT(*) AS scanned_count,
SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected,
COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives,
COALESCE(SUM(pr.false_negative_count), 0) AS false_negatives
COUNT(CASE WHEN missed_spam THEN 1 END) AS false_negatives
FROM spam_stats
LEFT JOIN post_reviewables pr USING (post_id)
SQL

DB.query(sql, spam: spam_status, ham: ham_status, min_date: min_date).first
DB.query(
sql,
spam: spam_status,
ham: ham_status,
min_date: min_date,
spam_score_type: ReviewableScore.types[:spam],
).first
end
end
end
Expand Down
47 changes: 47 additions & 0 deletions spec/requests/admin/reviewable_controller_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# frozen_string_literal: true

RSpec.describe ReviewablesController do
fab!(:post1) { Fabricate(:post) }
fab!(:post2) { Fabricate(:post) }
fab!(:admin)
fab!(:llm_model)

fab!(:reviewable) do
Reviewable.create!(
target: post1,
topic: post2.topic,
type: ReviewablePost,
created_by: admin,
status: Reviewable.statuses[:pending],
)
end

fab!(:reviewable2) do
Reviewable.create!(
target: post2,
topic: post2.topic,
type: ReviewablePost,
created_by: admin,
status: Reviewable.statuses[:pending],
)
end

fab!(:ai_spam_log_missed) do
AiSpamLog.create!(is_spam: false, post_id: post1.id, llm_model_id: llm_model.id)
end
# we amend the behavior with a custom filter so we need to confirm it works
it "properly applies custom filter" do
sign_in(admin)

get '/review.json?additional_filters={"ai_spam_false_negative":true}'
expect(response.status).to eq(200)

json = JSON.parse(response.body)
expect(json["reviewables"].length).to eq(1)

get "/review.json"
expect(response.status).to eq(200)
json = JSON.parse(response.body)
expect(json["reviewables"].length).to eq(2)
end
end
Loading