|
31 | 31 | "badoo": ["badoo.com"], |
32 | 32 | "binance": ["binance.com"], |
33 | 33 | "sharechat": ["sharechat.com"], |
| 34 | + "paypal": ["paypal.com", "paypal.me"], |
| 35 | + "stripe": ["stripe.com", "stripe.io"], |
| 36 | + "payoneer": ["payoneer.com"], |
| 37 | + "banks": [ # General list, can be expanded |
| 38 | + "bankofamerica.com", "chase.com", "wellsfargo.com", "citibank.com", |
| 39 | + "hsbc.com", "barclays.com", "deutsche-bank.com", "santander.com" |
| 40 | + ], |
34 | 41 | "general": ["google.com"] |
35 | 42 | } |
36 | 43 |
|
|
86 | 93 | PAYMENT_KEYWORDS = [ |
87 | 94 | "payment", "invoice", "bill", "outstanding balance", "transfer funds", |
88 | 95 | "wire transfer", "gift card", "cryptocurrency", "bitcoin", "western union", "moneygram", |
89 | | - "urgent payment needed", "settle your account" |
| 96 | + "urgent payment needed", "settle your account", |
| 97 | + # Fintech specific |
| 98 | + "paypal", "stripe", "payoneer", "cash app", "venmo", "zelle", |
| 99 | + # Bank transfer specific |
| 100 | + "bank transfer", "wire details", "account details", "iban", "swift code", "bic" |
90 | 101 | ] |
91 | 102 |
|
92 | 103 |
|
|
107 | 118 | '.link', '.click', '.site', '.live', '.buzz', '.stream', '.download', |
108 | 119 | ] |
109 | 120 |
|
110 | | -# Pattern for detecting strings that look like cryptocurrency addresses |
111 | | -CRYPTO_ADDRESS_PATTERNS = { |
| 121 | +# Pattern for detecting strings that look like financial identifiers |
| 122 | +FINANCIAL_ADDRESS_PATTERNS = { |
112 | 123 | "BTC": re.compile(r'\b(1[a-km-zA-HJ-NP-Z1-9]{25,34}|3[a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-zA-HJ-NP-Z0-9]{25,90})\b'), |
113 | 124 | "ETH": re.compile(r'\b(0x[a-fA-F0-9]{40})\b'), |
| 125 | + "IBAN": re.compile(r'\b([A-Z]{2}\d{2}[A-Z0-9]{11,30})\b'), |
| 126 | + "SWIFT_BIC": re.compile(r'\b([A-Z]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?)\b'), |
114 | 127 | } |
115 | 128 |
|
116 | 129 | # Pattern for phone numbers |
|
121 | 134 | # Suspicious URL Patterns |
122 | 135 | # These patterns aim to catch URLs that impersonate legitimate domains. |
123 | 136 | SUSPICIOUS_URL_PATTERNS = [ |
124 | | - # Impersonation using subdomains or hyphens |
| 137 | + # Impersonation using subdomains or hyphens for social media and general platforms |
125 | 138 | r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp|tiktok|tinder|snapchat|wechat|telegram|twitter|pinterest|linkedin|line|discord|teams|zoom|amazon|alibaba|youtube|skype|vk|reddit|viber|signal|badoo|binance|sharechat)\.com\.[a-z0-9\-]+\.[a-z]+", |
126 | 139 | r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp|tiktok|tinder|snapchat|wechat|telegram|twitter|pinterest|linkedin|line|discord|teams|zoom|amazon|alibaba|youtube|skype|vk|reddit|viber|signal|badoo|binance|sharechat)-[a-z0-9\-]+\.[a-z]+", |
| 140 | + |
| 141 | + # Impersonation for fintech and banks |
| 142 | + r"https?://(?:[a-z0-9\-]+\.)*(?:paypal|stripe|payoneer|bankofamerica|chase|wellsfargo|citibank|hsbc|barclays)\.com\.[a-z0-9\-]+\.[a-z]+", |
| 143 | + r"https?://(?:[a-z0-9\-]+\.)*(?:paypal|stripe|payoneer|bankofamerica|chase|wellsfargo|citibank|hsbc|barclays)-[a-z0-9\-]+\.[a-z]+", |
| 144 | + |
127 | 145 | # Common URL shorteners |
128 | 146 | r"https?://bit\.ly", |
129 | 147 | r"https?://goo\.gl", |
|
148 | 166 | "PAYMENT_REQUEST": 1.5, |
149 | 167 | "SUSPICIOUS_URL_KEYWORD": 1.0, |
150 | 168 | "SUSPICIOUS_TLD": 2.0, |
151 | | - "CRYPTO_ADDRESS": 2.5, |
| 169 | + "BTC_ADDRESS": 2.5, |
| 170 | + "ETH_ADDRESS": 2.5, |
| 171 | + "IBAN_ADDRESS": 3.0, |
| 172 | + "SWIFT_BIC_ADDRESS": 3.0, |
152 | 173 | "PHONE_NUMBER_UNSOLICITED": 1.0, |
153 | 174 | "SUSPICIOUS_URL_PATTERN": 3.0, # High weight for matching a suspicious URL pattern |
154 | 175 | } |
|
0 commit comments