Skip to content

Commit 379e820

Browse files
committed
Block CDN attachment scam
1 parent 27f8563 commit 379e820

File tree

4 files changed

+121
-26
lines changed

4 files changed

+121
-26
lines changed

application/config.json.template

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@
6060
"gradle.org",
6161
"help.gradle.org",
6262
"youtube.com",
63-
"www.youtube.com"
63+
"www.youtube.com",
64+
"cdn.discordapp.com"
6465
],
6566
"hostBlacklist": [
6667
"bit.ly",

application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java

Lines changed: 100 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,16 @@
88
import org.togetherjava.tjbot.config.ScamBlockerConfig;
99
import org.togetherjava.tjbot.features.utils.StringDistances;
1010

11+
import javax.annotation.Nullable;
12+
1113
import java.net.URI;
14+
import java.util.ArrayList;
1215
import java.util.Collection;
1316
import java.util.List;
1417
import java.util.Locale;
18+
import java.util.Objects;
19+
import java.util.Optional;
20+
import java.util.Set;
1521
import java.util.StringJoiner;
1622
import java.util.function.Predicate;
1723
import java.util.regex.Pattern;
@@ -24,6 +30,8 @@
2430
* {@link #isScam(CharSequence)}.
2531
*/
2632
public final class ScamDetector {
33+
private static final Set<String> IMAGE_EXTENSIONS =
34+
Set.of("jpg", "jpeg", "png", "gif", "webp", "tiff", "svg", "apng");
2735
private static final Pattern TOKENIZER = Pattern.compile("[\\s,]");
2836
private final ScamBlockerConfig config;
2937
private final Predicate<String> isSuspiciousAttachmentName;
@@ -59,7 +67,8 @@ public boolean isScam(Message message) {
5967
}
6068

6169
String content = message.getContentDisplay();
62-
List<Message.Attachment> attachments = message.getAttachments();
70+
List<Attachment> attachments =
71+
message.getAttachments().stream().map(Attachment::fromDiscord).toList();
6372

6473
if (content.isBlank()) {
6574
return areAttachmentsSuspicious(attachments);
@@ -76,21 +85,28 @@ public boolean isScam(Message message) {
7685
*/
7786
public boolean isScam(CharSequence message) {
7887
AnalyseResults results = new AnalyseResults();
88+
results.onlyContainsUrls = true;
7989
TOKENIZER.splitAsStream(message).forEach(token -> analyzeToken(token, results));
8090
return isScam(results);
8191
}
8292

8393
private boolean isScam(AnalyseResults results) {
84-
if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl
94+
if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl()
8595
|| results.containsDollarSign)) {
8696
return true;
8797
}
8898

89-
return Stream
90-
.of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl,
99+
boolean hasTooManySuspiciousFlags = Stream
100+
.of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl(),
91101
results.containsDollarSign)
92102
.filter(flag -> flag)
93103
.count() >= 2;
104+
if (hasTooManySuspiciousFlags) {
105+
return true;
106+
}
107+
108+
return results.onlyContainsUrls && results.areAllUrlsWithAttachments()
109+
&& areAttachmentsSuspicious(results.getUrlAttachments());
94110
}
95111

96112
private void analyzeToken(String token, AnalyseResults results) {
@@ -113,13 +129,18 @@ private void analyzeToken(String token, AnalyseResults results) {
113129

114130
if (token.startsWith("http")) {
115131
analyzeUrl(token, results);
132+
} else {
133+
results.onlyContainsUrls = false;
116134
}
117135
}
118136

119137
private void analyzeUrl(String url, AnalyseResults results) {
120138
String host;
139+
String path;
121140
try {
122-
host = URI.create(url).getHost();
141+
URI uri = URI.create(url);
142+
host = uri.getHost();
143+
path = uri.getPath();
123144
} catch (IllegalArgumentException _) {
124145
// Invalid urls are not scam
125146
return;
@@ -129,20 +150,25 @@ private void analyzeUrl(String url, AnalyseResults results) {
129150
return;
130151
}
131152

132-
results.hasUrl = true;
153+
AnalyseUrlResult result = new AnalyseUrlResult();
154+
results.urls.add(result);
155+
156+
if (path != null && path.startsWith("/attachments")) {
157+
result.containedAttachment = Attachment.fromUrlPath(path);
158+
}
133159

134160
if (config.getHostWhitelist().contains(host)) {
135161
return;
136162
}
137163

138164
if (config.getHostBlacklist().contains(host)) {
139-
results.hasSuspiciousUrl = true;
165+
result.isSuspicious = true;
140166
return;
141167
}
142168

143169
for (String keyword : config.getSuspiciousHostKeywords()) {
144170
if (isHostSimilarToKeyword(host, keyword)) {
145-
results.hasSuspiciousUrl = true;
171+
result.isSuspicious = true;
146172
break;
147173
}
148174
}
@@ -171,14 +197,14 @@ private boolean containsSuspiciousKeyword(String token) {
171197
});
172198
}
173199

174-
private boolean areAttachmentsSuspicious(Collection<? extends Message.Attachment> attachments) {
200+
private boolean areAttachmentsSuspicious(Collection<Attachment> attachments) {
175201
long suspiciousAttachments =
176202
attachments.stream().filter(this::isAttachmentSuspicious).count();
177203
return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold();
178204
}
179205

180-
private boolean isAttachmentSuspicious(Message.Attachment attachment) {
181-
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName());
206+
private boolean isAttachmentSuspicious(Attachment attachment) {
207+
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.fileName());
182208
}
183209

184210
private boolean isHostSimilarToKeyword(String host, String keyword) {
@@ -212,21 +238,79 @@ private static boolean endsWith(CharSequence text, char suffixToTest) {
212238
return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest;
213239
}
214240

215-
private static class AnalyseResults {
241+
private record Attachment(String fileName) {
242+
boolean isImage() {
243+
return getFileExtension().map(IMAGE_EXTENSIONS::contains).orElse(false);
244+
}
245+
246+
private Optional<String> getFileExtension() {
247+
int dot = fileName.lastIndexOf('.');
248+
if (dot == -1) {
249+
return Optional.empty();
250+
}
251+
String extension = fileName.substring(dot + 1);
252+
return Optional.of(extension);
253+
}
254+
255+
static Attachment fromDiscord(Message.Attachment attachment) {
256+
return new Attachment(attachment.getFileName());
257+
}
258+
259+
static Attachment fromUrlPath(String urlPath) {
260+
int fileNameStart = urlPath.lastIndexOf('/');
261+
String fileName = fileNameStart == -1 ? "" : urlPath.substring(fileNameStart + 1);
262+
return new Attachment(fileName);
263+
}
264+
}
265+
266+
private static final class AnalyseUrlResult {
267+
private boolean isSuspicious;
268+
@Nullable
269+
private Attachment containedAttachment;
270+
271+
@Override
272+
public String toString() {
273+
return new StringJoiner(", ", AnalyseUrlResult.class.getSimpleName() + "[", "]")
274+
.add("isSuspicious=" + isSuspicious)
275+
.add("containedAttachment=" + containedAttachment)
276+
.toString();
277+
}
278+
}
279+
280+
private static final class AnalyseResults {
216281
private boolean pingsEveryone;
217282
private boolean containsSuspiciousKeyword;
218283
private boolean containsDollarSign;
219-
private boolean hasUrl;
220-
private boolean hasSuspiciousUrl;
284+
private boolean onlyContainsUrls;
285+
private final Collection<AnalyseUrlResult> urls = new ArrayList<>();
286+
287+
boolean hasUrl() {
288+
return !urls.isEmpty();
289+
}
290+
291+
boolean hasSuspiciousUrl() {
292+
return urls.stream().anyMatch(url -> url.isSuspicious);
293+
}
294+
295+
boolean areAllUrlsWithAttachments() {
296+
return urls.stream().allMatch(url -> url.containedAttachment != null);
297+
}
298+
299+
Collection<Attachment> getUrlAttachments() {
300+
return urls.stream()
301+
.map(url -> url.containedAttachment)
302+
.filter(Objects::nonNull)
303+
.toList();
304+
}
221305

222306
@Override
223307
public String toString() {
224308
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
225309
.add("pingsEveryone=" + pingsEveryone)
226310
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
227311
.add("containsDollarSign=" + containsDollarSign)
228-
.add("hasUrl=" + hasUrl)
229-
.add("hasSuspiciousUrl=" + hasSuspiciousUrl)
312+
.add("onlyContainsUrls=" + onlyContainsUrls)
313+
.add("urls=" + urls)
230314
.toString();
231315
}
232316
}

application/src/main/java/org/togetherjava/tjbot/features/utils/LinkPreviews.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818
import java.util.Collection;
1919
import java.util.List;
2020
import java.util.Optional;
21+
import java.util.Set;
2122
import java.util.concurrent.CompletableFuture;
2223
import java.util.concurrent.TimeUnit;
2324
import java.util.function.Predicate;
2425
import java.util.stream.IntStream;
2526

2627
/**
27-
* Provides means to create previews of links. See
28-
* {@link LinkDetection#extractLinks(String, boolean, boolean)} and
29-
* {@link #createLinkPreviews(List)}.
28+
* Provides means to create previews of links. See {@link LinkDetection#extractLinks(String, Set)}
29+
* and {@link #createLinkPreviews(List)}.
3030
*/
3131
public final class LinkPreviews {
3232
private static final Logger logger = LoggerFactory.getLogger(LinkPreviews.class);

application/src/test/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetectorTest.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
final class ScamDetectorTest {
2626
private static final int SUSPICIOUS_ATTACHMENTS_THRESHOLD = 3;
27-
private static final String SUSPICIOUS_ATTACHMENT_NAME = "scam.png";
27+
private static final String SUSPICIOUS_ATTACHMENT_NAME = "image.png";
2828

2929
private ScamDetector scamDetector;
3030

@@ -38,9 +38,10 @@ void setUp() {
3838
"esex", "steam", "gift", "onlyfans", "bitcoin", "btc", "promo", "trader", "trading",
3939
"whatsapp", "crypto", "^claim", "teen", "adobe", "hack", "steamcommunity",
4040
"freenitro", "^earn$", "^earning", ".exe$", "mrbeast"));
41-
when(scamConfig.getHostWhitelist()).thenReturn(Set.of("discord.com", "discord.media",
42-
"discordapp.com", "discordapp.net", "discordstatus.com", "thehackernews.com",
43-
"gradle.org", "help.gradle.org", "youtube.com", "www.youtube.com"));
41+
when(scamConfig.getHostWhitelist())
42+
.thenReturn(Set.of("discord.com", "discord.media", "discordapp.com", "discordapp.net",
43+
"discordstatus.com", "thehackernews.com", "gradle.org", "help.gradle.org",
44+
"youtube.com", "www.youtube.com", "cdn.discordapp.com"));
4445
when(scamConfig.getHostBlacklist()).thenReturn(Set.of("bit.ly", "discord.gg", "teletype.in",
4546
"t.me", "corematrix.us", "u.to", "steamcommunity.com", "goo.su", "telegra.ph",
4647
"shorturl.at", "cheatings.xyz", "transfer.sh", "tobimoller.space"));
@@ -50,7 +51,7 @@ void setUp() {
5051
when(scamConfig.getSuspiciousAttachmentsThreshold())
5152
.thenReturn(SUSPICIOUS_ATTACHMENTS_THRESHOLD);
5253
when(scamConfig.getSuspiciousAttachmentNamePattern())
53-
.thenReturn(SUSPICIOUS_ATTACHMENT_NAME);
54+
.thenReturn("(image|\\d{1,2})\\.[^.]{0,5}");
5455

5556
when(scamConfig.getTrustedUserRolePattern()).thenReturn("Moderator");
5657

@@ -401,7 +402,16 @@ B2CWorkflow Builder (React Flow)
401402
as a beginner from the digital market, DM me for expert guidance or contact me directly on telegram and start building your financial future.
402403
Telegram username @JohnSmith123""",
403404
"Grab it before it's deleted (available for Windows and macOS): https://www.reddit.com/r/TVBaFreeHub/comments/12345t/ninaatradercrackedfullpowertradingfreefor123/",
404-
"Bro, claim 0.1 BTC now! Use promo code \"mrbeast\" at expmcoins.com screen @everyone");
405+
"Bro, claim 0.1 BTC now! Use promo code \"mrbeast\" at expmcoins.com screen @everyone",
406+
"""
407+
https://cdn.discordapp.com/attachments/1234/5678/image.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
408+
https://cdn.discordapp.com/attachments/1234/5678/image.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
409+
https://cdn.discordapp.com/attachments/1234/5678/image.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&""",
410+
"""
411+
https://cdn.discordapp.com/attachments/1234/5678/1.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
412+
https://cdn.discordapp.com/attachments/1234/5678/2.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
413+
https://cdn.discordapp.com/attachments/1234/5678/3.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&
414+
https://cdn.discordapp.com/attachments/1234/5678/4.png?ex=688cd552&is=688b83d2&hm=5787b53f08a488a22df6e3d2d43b4445ed0ced5f790e4f6e6e82810e38dba2aa&""");
405415
}
406416

407417
private static List<String> provideRealFalsePositiveMessages() {

0 commit comments

Comments
 (0)