From e9a6c62d4803e6767599a42907f3e26719436722 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Tue, 21 Jan 2025 16:21:49 +1100 Subject: [PATCH] FEATURE: Add user location info to spam scanner context This adds registration and last known IP information and email to scanning context. This provides another hint for spam scanner about possible malicious users. For example registered in India, replying from Australia or email is clearly a throwaway email address. --- lib/ai_moderation/spam_scanner.rb | 42 ++++++++++++++++--- .../ai_moderation/spam_scanner_spec.rb | 26 ++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/lib/ai_moderation/spam_scanner.rb b/lib/ai_moderation/spam_scanner.rb index 4f0c49b00..e775246cf 100644 --- a/lib/ai_moderation/spam_scanner.rb +++ b/lib/ai_moderation/spam_scanner.rb @@ -288,11 +288,16 @@ def self.build_context(post, topic = nil) end context << "\nPost Author Information:" - if post.user # during test we may not have a user - context << "- Username: #{post.user.username}" - context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days" - context << "- Total posts: #{post.user.post_count}" - context << "- Trust level: #{post.user.trust_level}" + if user = post.user # during test we may not have a user + context << "- Username: #{user.username}\n" + context << "- Email: #{user.email}\n" + context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n" + context << "- Total posts: #{user.post_count}\n" + context << "- Trust level: #{user.trust_level}\n" + if info = location_info(user) + context << "- Registration Location: #{info[:registration]}\n" if info[:registration] + context << "- Last Location: #{info[:last]}\n" if info[:last] + end end context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n" @@ -300,6 +305,33 @@ def self.build_context(post, topic = nil) context.join("\n") end + def self.location_info(user) + registration, last = nil + if user.ip_address.present? + info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true) + last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present? + end + if user.registration_ip_address.present? + info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true) + registration = "#{info[:location]} (#{info[:organization]})" if info && + info[:location].present? + end + + rval = nil + if registration || last + rval = { registration: registration } if registration + if last && last != registration + rval ||= {} + rval[:last] = last + end + end + + rval + rescue => e + Discourse.warn_exception(e, message: "Failed to lookup location info") + nil + end + def self.build_system_prompt(custom_instructions) base_prompt = +<<~PROMPT You are a spam detection system. Analyze the following post content and context. diff --git a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb index ea9c1b972..e3d3d97fd 100644 --- a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb +++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb @@ -289,4 +289,30 @@ expect(post.user.reload.silenced?).to eq(false) end end + + it "includes location information and email in context" do + user.update!(ip_address: "1.2.3.4", registration_ip_address: "5.6.7.8") + + ip_info_registration = { location: "New York", organization: "ISP1" } + ip_info_last = { location: "London", organization: "ISP2" } + + DiscourseIpInfo + .stubs(:get) + .with("5.6.7.8", resolve_hostname: true) + .returns(ip_info_registration) + DiscourseIpInfo.stubs(:get).with("1.2.3.4", resolve_hostname: true).returns(ip_info_last) + + prompts = nil + DiscourseAi::Completions::Llm.with_prepared_responses( + ["spam", "just because"], + ) do |_, _, _prompts| + prompts = _prompts + described_class.test_post(post) + end + + context = prompts.first.messages[1][:content] + expect(context).to include("Registration Location: New York (ISP1)") + expect(context).to include("Last Location: London (ISP2)") + expect(context).to include(user.email) + end end