diff --git a/application/config.json.template b/application/config.json.template index 3e568329a1..02835ca9e0 100644 --- a/application/config.json.template +++ b/application/config.json.template @@ -77,7 +77,9 @@ "crypto", "tele" ], - "isHostSimilarToKeywordDistanceThreshold": 2 + "isHostSimilarToKeywordDistanceThreshold": 2, + "suspiciousAttachmentsThreshold": 3, + "suspiciousAttachmentNamePattern": "(image|\\d{1,2})\\.[^.]{0,5}" }, "wolframAlphaAppId": "79J52T-6239TVXHR7", "helpSystem": { diff --git a/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java b/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java index d95d9f9cce..0321a8a7b8 100644 --- a/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java +++ b/application/src/main/java/org/togetherjava/tjbot/config/ScamBlockerConfig.java @@ -23,6 +23,8 @@ public final class ScamBlockerConfig { private final Set hostBlacklist; private final Set suspiciousHostKeywords; private final int isHostSimilarToKeywordDistanceThreshold; + private final int suspiciousAttachmentsThreshold; + private final String suspiciousAttachmentNamePattern; @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mode, @@ -37,7 +39,11 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo @JsonProperty(value = "suspiciousHostKeywords", required = true) Set suspiciousHostKeywords, @JsonProperty(value = "isHostSimilarToKeywordDistanceThreshold", - required = true) int isHostSimilarToKeywordDistanceThreshold) { + required = true) int isHostSimilarToKeywordDistanceThreshold, + @JsonProperty(value = "suspiciousAttachmentsThreshold", + required = true) int suspiciousAttachmentsThreshold, + @JsonProperty(value = "suspiciousAttachmentNamePattern", + required = true) String suspiciousAttachmentNamePattern) { this.mode = Objects.requireNonNull(mode); this.reportChannelPattern = Objects.requireNonNull(reportChannelPattern); this.botTrapChannelPattern = Objects.requireNonNull(botTrapChannelPattern); @@ -46,6 +52,9 @@ private ScamBlockerConfig(@JsonProperty(value = "mode", required = true) Mode mo this.hostBlacklist = new HashSet<>(Objects.requireNonNull(hostBlacklist)); this.suspiciousHostKeywords = new HashSet<>(Objects.requireNonNull(suspiciousHostKeywords)); this.isHostSimilarToKeywordDistanceThreshold = isHostSimilarToKeywordDistanceThreshold; + this.suspiciousAttachmentsThreshold = suspiciousAttachmentsThreshold; + this.suspiciousAttachmentNamePattern = + Objects.requireNonNull(suspiciousAttachmentNamePattern); } /** @@ -125,6 +134,26 @@ public int getIsHostSimilarToKeywordDistanceThreshold() { return isHostSimilarToKeywordDistanceThreshold; } + /** + * Gets the minimum amount of suspicious attachments that are required in a message to flag it + * as suspicious for its contained attachments. + * + * @return the minimum amount of suspicious attachments + */ + public int getSuspiciousAttachmentsThreshold() { + return suspiciousAttachmentsThreshold; + } + + /** + * Gets the REGEX pattern used to identify an attachment file name that is considered + * suspicious. The file name includes the extension. + * + * @return the attachment file name pattern + */ + public String getSuspiciousAttachmentNamePattern() { + return suspiciousAttachmentNamePattern; + } + /** * Mode of a scam blocker. Controls which actions it takes when detecting scam. */ diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java index 057a71cee6..730d7eef14 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamBlocker.java @@ -137,8 +137,7 @@ public void onMessageReceived(MessageReceivedEvent event) { } Message message = event.getMessage(); - String content = message.getContentDisplay(); - if (isSafe && scamDetector.isScam(content)) { + if (isSafe && scamDetector.isScam(message)) { isSafe = false; } diff --git a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java index 33a9a28743..035de5ca0c 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java @@ -1,11 +1,16 @@ package org.togetherjava.tjbot.features.moderation.scam; +import net.dv8tion.jda.api.entities.Message; + import org.togetherjava.tjbot.config.Config; import org.togetherjava.tjbot.config.ScamBlockerConfig; import org.togetherjava.tjbot.features.utils.StringDistances; import java.net.URI; +import java.util.Collection; +import java.util.List; import java.util.Locale; +import java.util.function.Predicate; import java.util.regex.Pattern; import java.util.stream.Stream; @@ -18,6 +23,7 @@ public final class ScamDetector { private static final Pattern TOKENIZER = Pattern.compile("[\\s,]"); private final ScamBlockerConfig config; + private final Predicate isSuspiciousAttachmentName; /** * Creates a new instance with the given configuration @@ -26,6 +32,26 @@ public final class ScamDetector { */ public ScamDetector(Config config) { this.config = config.getScamBlocker(); + isSuspiciousAttachmentName = + Pattern.compile(config.getScamBlocker().getSuspiciousAttachmentNamePattern()) + .asMatchPredicate(); + } + + /** + * Detects whether the given message classifies as scam or not, using certain heuristics. + * + * @param message the message to analyze + * @return Whether the message classifies as scam + */ + public boolean isScam(Message message) { + String content = message.getContentDisplay(); + List attachments = message.getAttachments(); + + if (content.isBlank()) { + return areAttachmentsSuspicious(attachments); + } + + return isScam(content); } /** @@ -123,6 +149,16 @@ private boolean containsSuspiciousKeyword(String token) { }); } + private boolean areAttachmentsSuspicious(Collection attachments) { + long suspiciousAttachments = + attachments.stream().filter(this::isAttachmentSuspicious).count(); + return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold(); + } + + private boolean isAttachmentSuspicious(Message.Attachment attachment) { + return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName()); + } + private boolean isHostSimilarToKeyword(String host, String keyword) { // NOTE This algorithm is far from optimal. // It is good enough for our purpose though and not that complex. diff --git a/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java b/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java index ba1ea14f08..19127401b6 100644 --- a/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java +++ b/application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java @@ -1,5 +1,6 @@ package org.togetherjava.tjbot.features.moderation.scam; +import net.dv8tion.jda.api.entities.Message; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; @@ -9,6 +10,8 @@ import org.togetherjava.tjbot.config.Config; import org.togetherjava.tjbot.config.ScamBlockerConfig; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; @@ -18,6 +21,9 @@ import static org.mockito.Mockito.when; final class ScamDetectorTest { + private static final int SUSPICIOUS_ATTACHMENTS_THRESHOLD = 3; + private static final String SUSPICIOUS_ATTACHMENT_NAME = "scam.png"; + private ScamDetector scamDetector; @BeforeEach @@ -38,6 +44,10 @@ void setUp() { when(scamConfig.getSuspiciousHostKeywords()) .thenReturn(Set.of("discord", "nitro", "premium", "free", "cheat", "crypto", "tele")); when(scamConfig.getIsHostSimilarToKeywordDistanceThreshold()).thenReturn(2); + when(scamConfig.getSuspiciousAttachmentsThreshold()) + .thenReturn(SUSPICIOUS_ATTACHMENTS_THRESHOLD); + when(scamConfig.getSuspiciousAttachmentNamePattern()) + .thenReturn(SUSPICIOUS_ATTACHMENT_NAME); scamDetector = new ScamDetector(config); } @@ -121,6 +131,94 @@ void websitesWithTooManyDifferencesAreNotSuspicious() { assertFalse(isScamResult); } + @Test + @DisplayName("Messages containing multiple suspicious attachments are flagged as scam") + void detectsSuspiciousAttachments() { + // GIVEN an empty message containing suspicious attachments + String content = ""; + Message.Attachment attachment = createImageAttachmentMock(SUSPICIOUS_ATTACHMENT_NAME); + List attachments = + Collections.nCopies(SUSPICIOUS_ATTACHMENTS_THRESHOLD, attachment); + Message message = createMessageMock(content, attachments); + + // WHEN analyzing it + boolean isScamResult = scamDetector.isScam(message); + + // THEN flags it as scam + assertTrue(isScamResult); + } + + @Test + @DisplayName("Messages containing text content are not flagged for suspicious attachments") + void ignoresAttachmentsIfContentProvided() { + // GIVEN a non-empty message containing suspicious attachments + String content = "Hello World"; + Message.Attachment attachment = createImageAttachmentMock(SUSPICIOUS_ATTACHMENT_NAME); + List attachments = + Collections.nCopies(SUSPICIOUS_ATTACHMENTS_THRESHOLD, attachment); + Message message = createMessageMock(content, attachments); + + // WHEN analyzing it + boolean isScamResult = scamDetector.isScam(message); + + // THEN flags it as harmless + assertFalse(isScamResult); + } + + @Test + @DisplayName("Messages containing not enough suspicious attachments are not flagged") + void ignoresIfNotEnoughSuspiciousAttachments() { + // GIVEN an empty message containing some, but not enough suspicious attachments + String content = ""; + + Message.Attachment badAttachment = createImageAttachmentMock(SUSPICIOUS_ATTACHMENT_NAME); + Message.Attachment goodAttachment = createImageAttachmentMock("good.png"); + int badAttachmentAmount = SUSPICIOUS_ATTACHMENTS_THRESHOLD - 1; + List attachments = + new ArrayList<>(Collections.nCopies(badAttachmentAmount, badAttachment)); + attachments.add(goodAttachment); + + Message message = createMessageMock(content, attachments); + + // WHEN analyzing it + boolean isScamResult = scamDetector.isScam(message); + + // THEN flags it as harmless + assertFalse(isScamResult); + } + + @Test + @DisplayName("Messages containing harmless attachments are not flagged") + void ignoresHarmlessAttachments() { + // GIVEN an empty message containing only harmless attachments + String content = ""; + Message.Attachment attachment = createImageAttachmentMock("good.png"); + List attachments = + Collections.nCopies(SUSPICIOUS_ATTACHMENTS_THRESHOLD, attachment); + Message message = createMessageMock(content, attachments); + + // WHEN analyzing it + boolean isScamResult = scamDetector.isScam(message); + + // THEN flags it as harmless + assertFalse(isScamResult); + } + + private static Message createMessageMock(String content, List attachments) { + Message message = mock(Message.class); + when(message.getContentRaw()).thenReturn(content); + when(message.getContentDisplay()).thenReturn(content); + when(message.getAttachments()).thenReturn(attachments); + return message; + } + + private static Message.Attachment createImageAttachmentMock(String name) { + Message.Attachment attachment = mock(Message.Attachment.class); + when(attachment.isImage()).thenReturn(true); + when(attachment.getFileName()).thenReturn(name); + return attachment; + } + private static List provideRealScamMessages() { return List.of(""" 🤩bro steam gived nitro - https://nitro-ds.online/LfgUfMzqYyx12""",