88import org .togetherjava .tjbot .config .ScamBlockerConfig ;
99import org .togetherjava .tjbot .features .utils .StringDistances ;
1010
11+ import javax .annotation .Nullable ;
12+
1113import java .net .URI ;
14+ import java .util .ArrayList ;
1215import java .util .Collection ;
1316import java .util .List ;
1417import java .util .Locale ;
18+ import java .util .Objects ;
19+ import java .util .Optional ;
20+ import java .util .Set ;
1521import java .util .StringJoiner ;
1622import java .util .function .Predicate ;
1723import java .util .regex .Pattern ;
2430 * {@link #isScam(CharSequence)}.
2531 */
2632public final class ScamDetector {
33+ private static final Set <String > IMAGE_EXTENSIONS =
34+ Set .of ("jpg" , "jpeg" , "png" , "gif" , "webp" , "tiff" , "svg" , "apng" );
2735 private static final Pattern TOKENIZER = Pattern .compile ("[\\ s,]" );
2836 private final ScamBlockerConfig config ;
2937 private final Predicate <String > isSuspiciousAttachmentName ;
@@ -59,7 +67,8 @@ public boolean isScam(Message message) {
5967 }
6068
6169 String content = message .getContentDisplay ();
62- List <Message .Attachment > attachments = message .getAttachments ();
70+ List <Attachment > attachments =
71+ message .getAttachments ().stream ().map (Attachment ::fromDiscord ).toList ();
6372
6473 if (content .isBlank ()) {
6574 return areAttachmentsSuspicious (attachments );
@@ -76,21 +85,28 @@ public boolean isScam(Message message) {
7685 */
7786 public boolean isScam (CharSequence message ) {
7887 AnalyseResults results = new AnalyseResults ();
88+ results .onlyContainsUrls = true ;
7989 TOKENIZER .splitAsStream (message ).forEach (token -> analyzeToken (token , results ));
8090 return isScam (results );
8191 }
8292
8393 private boolean isScam (AnalyseResults results ) {
84- if (results .pingsEveryone && (results .containsSuspiciousKeyword || results .hasUrl
94+ if (results .pingsEveryone && (results .containsSuspiciousKeyword || results .hasUrl ()
8595 || results .containsDollarSign )) {
8696 return true ;
8797 }
8898
89- return Stream
90- .of (results .containsSuspiciousKeyword , results .hasSuspiciousUrl ,
99+ boolean hasTooManySuspiciousFlags = Stream
100+ .of (results .containsSuspiciousKeyword , results .hasSuspiciousUrl () ,
91101 results .containsDollarSign )
92102 .filter (flag -> flag )
93103 .count () >= 2 ;
104+ if (hasTooManySuspiciousFlags ) {
105+ return true ;
106+ }
107+
108+ return results .onlyContainsUrls && results .areAllUrlsWithAttachments ()
109+ && areAttachmentsSuspicious (results .getUrlAttachments ());
94110 }
95111
96112 private void analyzeToken (String token , AnalyseResults results ) {
@@ -113,13 +129,18 @@ private void analyzeToken(String token, AnalyseResults results) {
113129
114130 if (token .startsWith ("http" )) {
115131 analyzeUrl (token , results );
132+ } else {
133+ results .onlyContainsUrls = false ;
116134 }
117135 }
118136
119137 private void analyzeUrl (String url , AnalyseResults results ) {
120138 String host ;
139+ String path ;
121140 try {
122- host = URI .create (url ).getHost ();
141+ URI uri = URI .create (url );
142+ host = uri .getHost ();
143+ path = uri .getPath ();
123144 } catch (IllegalArgumentException _) {
124145 // Invalid urls are not scam
125146 return ;
@@ -129,20 +150,25 @@ private void analyzeUrl(String url, AnalyseResults results) {
129150 return ;
130151 }
131152
132- results .hasUrl = true ;
153+ AnalyseUrlResult result = new AnalyseUrlResult ();
154+ results .urls .add (result );
155+
156+ if (path != null && path .startsWith ("/attachments" )) {
157+ result .containedAttachment = Attachment .fromUrlPath (path );
158+ }
133159
134160 if (config .getHostWhitelist ().contains (host )) {
135161 return ;
136162 }
137163
138164 if (config .getHostBlacklist ().contains (host )) {
139- results . hasSuspiciousUrl = true ;
165+ result . isSuspicious = true ;
140166 return ;
141167 }
142168
143169 for (String keyword : config .getSuspiciousHostKeywords ()) {
144170 if (isHostSimilarToKeyword (host , keyword )) {
145- results . hasSuspiciousUrl = true ;
171+ result . isSuspicious = true ;
146172 break ;
147173 }
148174 }
@@ -171,14 +197,14 @@ private boolean containsSuspiciousKeyword(String token) {
171197 });
172198 }
173199
174- private boolean areAttachmentsSuspicious (Collection <? extends Message . Attachment > attachments ) {
200+ private boolean areAttachmentsSuspicious (Collection <Attachment > attachments ) {
175201 long suspiciousAttachments =
176202 attachments .stream ().filter (this ::isAttachmentSuspicious ).count ();
177203 return suspiciousAttachments >= config .getSuspiciousAttachmentsThreshold ();
178204 }
179205
180- private boolean isAttachmentSuspicious (Message . Attachment attachment ) {
181- return attachment .isImage () && isSuspiciousAttachmentName .test (attachment .getFileName ());
206+ private boolean isAttachmentSuspicious (Attachment attachment ) {
207+ return attachment .isImage () && isSuspiciousAttachmentName .test (attachment .fileName ());
182208 }
183209
184210 private boolean isHostSimilarToKeyword (String host , String keyword ) {
@@ -212,21 +238,79 @@ private static boolean endsWith(CharSequence text, char suffixToTest) {
212238 return !text .isEmpty () && text .charAt (text .length () - 1 ) == suffixToTest ;
213239 }
214240
215- private static class AnalyseResults {
241+ private record Attachment (String fileName ) {
242+ boolean isImage () {
243+ return getFileExtension ().map (IMAGE_EXTENSIONS ::contains ).orElse (false );
244+ }
245+
246+ private Optional <String > getFileExtension () {
247+ int dot = fileName .lastIndexOf ('.' );
248+ if (dot == -1 ) {
249+ return Optional .empty ();
250+ }
251+ String extension = fileName .substring (dot + 1 );
252+ return Optional .of (extension );
253+ }
254+
255+ static Attachment fromDiscord (Message .Attachment attachment ) {
256+ return new Attachment (attachment .getFileName ());
257+ }
258+
259+ static Attachment fromUrlPath (String urlPath ) {
260+ int fileNameStart = urlPath .lastIndexOf ('/' );
261+ String fileName = fileNameStart == -1 ? "" : urlPath .substring (fileNameStart + 1 );
262+ return new Attachment (fileName );
263+ }
264+ }
265+
266+ private static final class AnalyseUrlResult {
267+ private boolean isSuspicious ;
268+ @ Nullable
269+ private Attachment containedAttachment ;
270+
271+ @ Override
272+ public String toString () {
273+ return new StringJoiner (", " , AnalyseUrlResult .class .getSimpleName () + "[" , "]" )
274+ .add ("isSuspicious=" + isSuspicious )
275+ .add ("containedAttachment=" + containedAttachment )
276+ .toString ();
277+ }
278+ }
279+
280+ private static final class AnalyseResults {
216281 private boolean pingsEveryone ;
217282 private boolean containsSuspiciousKeyword ;
218283 private boolean containsDollarSign ;
219- private boolean hasUrl ;
220- private boolean hasSuspiciousUrl ;
284+ private boolean onlyContainsUrls ;
285+ private final Collection <AnalyseUrlResult > urls = new ArrayList <>();
286+
287+ boolean hasUrl () {
288+ return !urls .isEmpty ();
289+ }
290+
291+ boolean hasSuspiciousUrl () {
292+ return urls .stream ().anyMatch (url -> url .isSuspicious );
293+ }
294+
295+ boolean areAllUrlsWithAttachments () {
296+ return urls .stream ().allMatch (url -> url .containedAttachment != null );
297+ }
298+
299+ Collection <Attachment > getUrlAttachments () {
300+ return urls .stream ()
301+ .map (url -> url .containedAttachment )
302+ .filter (Objects ::nonNull )
303+ .toList ();
304+ }
221305
222306 @ Override
223307 public String toString () {
224308 return new StringJoiner (", " , AnalyseResults .class .getSimpleName () + "[" , "]" )
225309 .add ("pingsEveryone=" + pingsEveryone )
226310 .add ("containsSuspiciousKeyword=" + containsSuspiciousKeyword )
227311 .add ("containsDollarSign=" + containsDollarSign )
228- .add ("hasUrl =" + hasUrl )
229- .add ("hasSuspiciousUrl =" + hasSuspiciousUrl )
312+ .add ("onlyContainsUrls =" + onlyContainsUrls )
313+ .add ("urls =" + urls )
230314 .toString ();
231315 }
232316 }
0 commit comments