Skip to content

Commit 60a2997

Browse files
feat: Expand fraud prevention to fintechs and banks
This commit expands the fraud prevention capabilities of the social media analyzer to include heuristics for detecting scams related to fintech platforms (Stripe, PayPal, Payoneer) and bank transfers. The changes include: - Updating `LEGITIMATE_DOMAINS` to include the official domains of the new platforms. - Expanding `PAYMENT_KEYWORDS` with terms specific to these fintechs and bank transfers. - Adding regex patterns for IBAN and SWIFT/BIC codes to a newly renamed `FINANCIAL_ADDRESS_PATTERNS` dictionary. - Updating `SUSPICIOUS_URL_PATTERNS` to catch phishing attempts impersonating these services. - Adding corresponding weights to `HEURISTIC_WEIGHTS` for the new patterns. - Creating a `test_runner.py` script with a comprehensive set of test cases to verify the new functionality.
1 parent adbf3ea commit 60a2997

File tree

7 files changed

+73
-29
lines changed

7 files changed

+73
-29
lines changed
0 Bytes
Binary file not shown.
898 Bytes
Binary file not shown.
-1.09 KB
Binary file not shown.
2.22 KB
Binary file not shown.

social_media_analyzer/heuristics.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@
3131
"badoo": ["badoo.com"],
3232
"binance": ["binance.com"],
3333
"sharechat": ["sharechat.com"],
34+
"paypal": ["paypal.com", "paypal.me"],
35+
"stripe": ["stripe.com", "stripe.io"],
36+
"payoneer": ["payoneer.com"],
37+
"banks": [ # General list, can be expanded
38+
"bankofamerica.com", "chase.com", "wellsfargo.com", "citibank.com",
39+
"hsbc.com", "barclays.com", "deutsche-bank.com", "santander.com"
40+
],
3441
"general": ["google.com"]
3542
}
3643

@@ -86,7 +93,11 @@
8693
PAYMENT_KEYWORDS = [
8794
"payment", "invoice", "bill", "outstanding balance", "transfer funds",
8895
"wire transfer", "gift card", "cryptocurrency", "bitcoin", "western union", "moneygram",
89-
"urgent payment needed", "settle your account"
96+
"urgent payment needed", "settle your account",
97+
# Fintech specific
98+
"paypal", "stripe", "payoneer", "cash app", "venmo", "zelle",
99+
# Bank transfer specific
100+
"bank transfer", "wire details", "account details", "iban", "swift code", "bic"
90101
]
91102

92103

@@ -107,10 +118,12 @@
107118
'.link', '.click', '.site', '.live', '.buzz', '.stream', '.download',
108119
]
109120

110-
# Pattern for detecting strings that look like cryptocurrency addresses
111-
CRYPTO_ADDRESS_PATTERNS = {
121+
# Pattern for detecting strings that look like financial identifiers
122+
FINANCIAL_ADDRESS_PATTERNS = {
112123
"BTC": re.compile(r'\b(1[a-km-zA-HJ-NP-Z1-9]{25,34}|3[a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-zA-HJ-NP-Z0-9]{25,90})\b'),
113124
"ETH": re.compile(r'\b(0x[a-fA-F0-9]{40})\b'),
125+
"IBAN": re.compile(r'\b([A-Z]{2}\d{2}[A-Z0-9]{11,30})\b'),
126+
"SWIFT_BIC": re.compile(r'\b([A-Z]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?)\b'),
114127
}
115128

116129
# Pattern for phone numbers
@@ -121,9 +134,14 @@
121134
# Suspicious URL Patterns
122135
# These patterns aim to catch URLs that impersonate legitimate domains.
123136
SUSPICIOUS_URL_PATTERNS = [
124-
# Impersonation using subdomains or hyphens
137+
# Impersonation using subdomains or hyphens for social media and general platforms
125138
r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp|tiktok|tinder|snapchat|wechat|telegram|twitter|pinterest|linkedin|line|discord|teams|zoom|amazon|alibaba|youtube|skype|vk|reddit|viber|signal|badoo|binance|sharechat)\.com\.[a-z0-9\-]+\.[a-z]+",
126139
r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp|tiktok|tinder|snapchat|wechat|telegram|twitter|pinterest|linkedin|line|discord|teams|zoom|amazon|alibaba|youtube|skype|vk|reddit|viber|signal|badoo|binance|sharechat)-[a-z0-9\-]+\.[a-z]+",
140+
141+
# Impersonation for fintech and banks
142+
r"https?://(?:[a-z0-9\-]+\.)*(?:paypal|stripe|payoneer|bankofamerica|chase|wellsfargo|citibank|hsbc|barclays)\.com\.[a-z0-9\-]+\.[a-z]+",
143+
r"https?://(?:[a-z0-9\-]+\.)*(?:paypal|stripe|payoneer|bankofamerica|chase|wellsfargo|citibank|hsbc|barclays)-[a-z0-9\-]+\.[a-z]+",
144+
127145
# Common URL shorteners
128146
r"https?://bit\.ly",
129147
r"https?://goo\.gl",
@@ -148,7 +166,10 @@
148166
"PAYMENT_REQUEST": 1.5,
149167
"SUSPICIOUS_URL_KEYWORD": 1.0,
150168
"SUSPICIOUS_TLD": 2.0,
151-
"CRYPTO_ADDRESS": 2.5,
169+
"BTC_ADDRESS": 2.5,
170+
"ETH_ADDRESS": 2.5,
171+
"IBAN_ADDRESS": 3.0,
172+
"SWIFT_BIC_ADDRESS": 3.0,
152173
"PHONE_NUMBER_UNSOLICITED": 1.0,
153174
"SUSPICIOUS_URL_PATTERN": 3.0, # High weight for matching a suspicious URL pattern
154175
}

social_media_analyzer/scam_detector.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
PAYMENT_KEYWORDS,
1010
URL_PATTERN,
1111
SUSPICIOUS_TLDS,
12-
CRYPTO_ADDRESS_PATTERNS,
12+
FINANCIAL_ADDRESS_PATTERNS,
1313
PHONE_NUMBER_PATTERN,
1414
HEURISTIC_WEIGHTS,
1515
LEGITIMATE_DOMAINS,
@@ -109,13 +109,13 @@ def analyze_text_for_scams(text_content, platform=None):
109109
indicators_found.append(f"Suspicious URL found: {url_str} (Reason: {reason})")
110110
urls_analyzed_details.append(url_analysis)
111111

112-
# 3. Crypto Addresses
113-
for crypto_name, pattern in CRYPTO_ADDRESS_PATTERNS.items():
112+
# 3. Financial Identifiers
113+
for id_name, pattern in FINANCIAL_ADDRESS_PATTERNS.items():
114114
if pattern.search(text_content):
115-
message = f"Potential {crypto_name} cryptocurrency address found."
115+
message = f"Potential {id_name} identifier found."
116116
if message not in indicators_found:
117117
indicators_found.append(message)
118-
score += HEURISTIC_WEIGHTS.get("CRYPTO_ADDRESS", 2.5)
118+
score += HEURISTIC_WEIGHTS.get(f"{id_name}_ADDRESS", 2.5)
119119

120120
# 4. Phone Numbers
121121
if PHONE_NUMBER_PATTERN.search(text_content):
@@ -130,22 +130,3 @@ def analyze_text_for_scams(text_content, platform=None):
130130
"urls_analyzed": urls_analyzed_details
131131
}
132132

133-
if __name__ == '__main__':
134-
# Example Usage
135-
test_message = "URGENT: Your Instagram account has unusual activity. Please verify your account now by clicking http://instagram.security-update.com/login to avoid suspension."
136-
analysis_result = analyze_text_for_scams(test_message, platform="instagram")
137-
print(f"--- Analyzing Instagram Scam Message ---")
138-
print(f"Message: {test_message}")
139-
print(f"Score: {analysis_result['score']}")
140-
print("Indicators:")
141-
for indicator in analysis_result['indicators_found']:
142-
print(f" - {indicator}")
143-
144-
test_message_whatsapp = "Hey, check out this link: http://wa.me/1234567890. Also, please send money to my bitcoin wallet 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"
145-
analysis_result_whatsapp = analyze_text_for_scams(test_message_whatsapp, platform="whatsapp")
146-
print(f"\n--- Analyzing WhatsApp Message ---")
147-
print(f"Message: {test_message_whatsapp}")
148-
print(f"Score: {analysis_result_whatsapp['score']}")
149-
print("Indicators:")
150-
for indicator in analysis_result_whatsapp['indicators_found']:
151-
print(f" - {indicator}")
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from social_media_analyzer.scam_detector import analyze_text_for_scams
2+
3+
if __name__ == '__main__':
4+
# Example Usage
5+
test_cases = {
6+
"Instagram Phishing": {
7+
"message": "URGENT: Your Instagram account has unusual activity. Please verify your account now by clicking http://instagram.security-update.com/login to avoid suspension.",
8+
"platform": "instagram"
9+
},
10+
"WhatsApp Crypto Scam": {
11+
"message": "Hey, check out this link: http://wa.me/1234567890. Also, please send money to my bitcoin wallet 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa",
12+
"platform": "whatsapp"
13+
},
14+
"PayPal Phishing": {
15+
"message": "Your PayPal account has been limited. Please log in to http://paypal-support-uk.com/login to restore access. Your invoice is attached.",
16+
"platform": "paypal"
17+
},
18+
"Stripe Phishing": {
19+
"message": "There was a problem with your recent payment on Stripe. Please update your payment information at http://stripe-billing-update.com",
20+
"platform": "stripe"
21+
},
22+
"Payoneer Scam": {
23+
"message": "Congratulations! You've received a payment of $5000 via Payoneer. Please confirm your details at http://payoneer.rewards.xyz to claim your funds.",
24+
"platform": "payoneer"
25+
},
26+
"Bank Transfer Fraud": {
27+
"message": "Urgent invoice payment required. Please transfer the amount to IBAN DE89370400440532013000, SWIFT/BIC COBADEFFXXX. Your account will be suspended otherwise.",
28+
"platform": "banks"
29+
}
30+
}
31+
32+
for name, data in test_cases.items():
33+
analysis_result = analyze_text_for_scams(data["message"], platform=data["platform"])
34+
print(f"\n--- Analyzing: {name} ---")
35+
print(f"Message: {data['message']}")
36+
print(f"Score: {analysis_result['score']}")
37+
print("Indicators:")
38+
for indicator in analysis_result['indicators_found']:
39+
print(f" - {indicator}")
40+
print("URLs Analyzed:")
41+
for url_info in analysis_result['urls_analyzed']:
42+
print(f" - URL: {url_info['url']}, Suspicious: {url_info['is_suspicious']}, Reason: {url_info['reason']}")

0 commit comments

Comments
 (0)