Skip to content

Commit fbb601e

Browse files
authored
Merge pull request #21 from GYFX35/feat/google-safe-browsing
feat: Integrate Google Safe Browsing API
2 parents 3e2a126 + 9a9ff51 commit fbb601e

File tree

5 files changed

+129
-16
lines changed

5 files changed

+129
-16
lines changed

social_media_analyzer/heuristics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ def generate_suspicious_url_patterns(legitimate_domains):
239239
"SWIFT_BIC_ADDRESS": 3.0,
240240
"PHONE_NUMBER_UNSOLICITED": 1.0,
241241
"SUSPICIOUS_URL_PATTERN": 3.0, # High weight for matching a suspicious URL pattern
242+
"GOOGLE_SAFE_BROWSING_HIT": 10.0, # Very high weight for a positive Google Safe Browsing match
242243
}
243244

244245
if __name__ == '__main__':

social_media_analyzer/main.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1+
import os
12
from . import fake_profile_detector
23
from . import scam_detector
34
from . import fake_news_detector
45

6+
def get_api_key():
7+
"""Gets the Google API key from environment variables."""
8+
return os.environ.get("GOOGLE_API_KEY")
9+
510
def analyze_news_url():
611
"""Analyzes a news URL for potential fake news."""
712
url_to_check = input("Please enter the full URL of the news article you want to analyze: ").strip()
@@ -22,7 +27,7 @@ def analyze_news_url():
2227
for indicator in result['indicators_found']:
2328
print(f"- {indicator}")
2429

25-
def analyze_website_url():
30+
def analyze_website_url(api_key):
2631
"""Analyzes a website URL for potential scams."""
2732
url_to_check = input("Please enter the full URL you want to analyze: ").strip()
2833
if not url_to_check:
@@ -34,7 +39,7 @@ def analyze_website_url():
3439
url_to_check = 'http://' + url_to_check
3540

3641
print("\n--- Analyzing URL ---")
37-
is_susp, reason = scam_detector.is_url_suspicious(url_to_check, platform="general_web")
42+
is_susp, reason = scam_detector.is_url_suspicious(url_to_check, platform="general_web", api_key=api_key)
3843
if is_susp:
3944
print(f"\n[!] The URL '{url_to_check}' is flagged as IMMEDIATELY SUSPICIOUS.")
4045
print(f"Reason: {reason}")
@@ -58,7 +63,7 @@ def analyze_website_url():
5863
for indicator in content_result['indicators_found']:
5964
print(f"- {indicator}")
6065

61-
def analyze_social_media():
66+
def analyze_social_media(api_key):
6267
"""Handles the analysis of social media platforms."""
6368
platforms = sorted([
6469
"facebook", "instagram", "whatsapp", "tiktok", "tinder", "snapchat",
@@ -108,7 +113,7 @@ def analyze_social_media():
108113
elif analysis_choice == 3:
109114
message = input("Paste the message you want to analyze: ").strip()
110115
if message:
111-
result = scam_detector.analyze_text_for_scams(message, platform)
116+
result = scam_detector.analyze_text_for_scams(message, platform, api_key=api_key)
112117
print("\n--- Scam Analysis Results ---")
113118
print(f"Score: {result['score']} (Higher is more suspicious)")
114119
print("Indicators Found:")
@@ -127,8 +132,13 @@ def analyze_social_media():
127132

128133
def main():
129134
"""Main function to run the security analyzer."""
135+
api_key = get_api_key()
130136
print("--- Universal Security Analyzer ---")
131137
print("This tool helps you analyze social media, messages, and websites for potential scams and fake news.")
138+
if not api_key:
139+
print("\n[!] Google Safe Browsing API key not found.")
140+
print(" To enable real-time URL checking against Google's threat database,")
141+
print(" please set the GOOGLE_API_KEY environment variable.")
132142

133143
while True:
134144
print("\n--- Main Menu ---")
@@ -140,9 +150,9 @@ def main():
140150
try:
141151
choice = int(input("Enter your choice (1-4): "))
142152
if choice == 1:
143-
analyze_social_media()
153+
analyze_social_media(api_key)
144154
elif choice == 2:
145-
analyze_website_url()
155+
analyze_website_url(api_key)
146156
elif choice == 3:
147157
analyze_news_url()
148158
elif choice == 4:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests

social_media_analyzer/scam_detector.py

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import re
22
import urllib.request
3+
import requests
4+
import os
35
from urllib.parse import urlparse
46
from .heuristics import (
57
URGENCY_KEYWORDS,
@@ -17,6 +19,41 @@
1719
SUSPICIOUS_URL_PATTERNS
1820
)
1921

22+
def check_google_safe_browsing(url, api_key):
23+
"""
24+
Checks a URL against the Google Safe Browsing API.
25+
Returns a tuple: (is_suspicious, reason)
26+
"""
27+
if not api_key:
28+
return False, "Google Safe Browsing API key not configured."
29+
30+
api_url = f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={api_key}"
31+
payload = {
32+
"client": {
33+
"clientId": "social-media-analyzer",
34+
"clientVersion": "1.0.0"
35+
},
36+
"threatInfo": {
37+
"threatTypes": ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"],
38+
"platformTypes": ["ANY_PLATFORM"],
39+
"threatEntryTypes": ["URL"],
40+
"threatEntries": [{"url": url}]
41+
}
42+
}
43+
try:
44+
response = requests.post(api_url, json=payload, timeout=10)
45+
if response.status_code == 200:
46+
data = response.json()
47+
if "matches" in data:
48+
threat_type = data["matches"][0]["threatType"]
49+
return True, f"Flagged by Google Safe Browsing as {threat_type}."
50+
else:
51+
return False, "Clean according to Google Safe Browsing."
52+
else:
53+
return False, f"Google Safe Browsing API error: {response.status_code}"
54+
except requests.RequestException as e:
55+
return False, f"Could not connect to Google Safe Browsing: {e}"
56+
2057
def get_legitimate_domains(platform=None):
2158
"""
2259
Returns a list of legitimate domains for a given platform,
@@ -35,16 +72,24 @@ def get_domain_from_url(url):
3572
domain = url.split("/")[0].split("?")[0]
3673
return domain.lower()
3774

38-
def is_url_suspicious(url, platform=None):
75+
def is_url_suspicious(url, platform=None, api_key=None):
3976
"""
40-
Checks if a URL is suspicious based on various patterns and lists.
77+
Checks if a URL is suspicious based on various patterns and lists,
78+
including Google Safe Browsing.
4179
Returns a tuple: (bool_is_suspicious, reason_string)
4280
"""
81+
# 1. Google Safe Browsing Check
82+
if api_key:
83+
is_susp, reason = check_google_safe_browsing(url, api_key)
84+
if is_susp:
85+
return True, reason
86+
87+
# 2. Local Heuristics
4388
normalized_url = url.lower()
4489
domain = get_domain_from_url(url)
4590
legitimate_domains = get_legitimate_domains(platform)
4691

47-
# 1. Check if the domain is in the legitimate list for the platform
92+
# Check if the domain is in the legitimate list for the platform
4893
if domain in legitimate_domains:
4994
# Still check for impersonation patterns that might include the legit domain
5095
for pattern in SUSPICIOUS_URL_PATTERNS:
@@ -53,24 +98,24 @@ def is_url_suspicious(url, platform=None):
5398
return True, f"URL impersonates a legitimate domain: {pattern}"
5499
return False, "URL domain is on the legitimate list."
55100

56-
# 2. Check against known suspicious patterns
101+
# Check against known suspicious patterns
57102
for pattern in SUSPICIOUS_URL_PATTERNS:
58103
if re.search(pattern, normalized_url, re.IGNORECASE):
59104
return True, f"URL matches suspicious pattern: {pattern}"
60105

61-
# 3. Check for suspicious TLDs
106+
# Check for suspicious TLDs
62107
suspicious_tld_regex = re.compile(r"\.(" + "|".join(tld.lstrip('.') for tld in SUSPICIOUS_TLDS) + r")$", re.IGNORECASE)
63108
if suspicious_tld_regex.search(domain):
64109
return True, f"URL uses a potentially suspicious TLD."
65110

66-
# 4. Check if a known legitimate service name is part of the domain, but it's not official
111+
# Check if a known legitimate service name is part of the domain, but it's not official
67112
for service in LEGITIMATE_DOMAINS.keys():
68113
if service != "general" and service in domain:
69114
return True, f"URL contains the name of a legitimate service ('{service}') but is not an official domain."
70115

71116
return False, "URL does not match common suspicious patterns."
72117

73-
def analyze_text_for_scams(text_content, platform=None):
118+
def analyze_text_for_scams(text_content, platform=None, api_key=None):
74119
"""
75120
Analyzes a block of text content for various scam indicators.
76121
"""
@@ -103,10 +148,14 @@ def analyze_text_for_scams(text_content, platform=None):
103148
# 2. Regex-based checks
104149
found_urls = URL_PATTERN.findall(text_content)
105150
for url_str in found_urls:
106-
is_susp, reason = is_url_suspicious(url_str, platform)
151+
is_susp, reason = is_url_suspicious(url_str, platform, api_key)
107152
url_analysis = {"url": url_str, "is_suspicious": is_susp, "reason": reason}
108153
if is_susp:
109-
score += HEURISTIC_WEIGHTS.get("SUSPICIOUS_URL_PATTERN", 3.0)
154+
# Increase score significantly if flagged by Google
155+
if "Google Safe Browsing" in reason:
156+
score += HEURISTIC_WEIGHTS.get("GOOGLE_SAFE_BROWSING_HIT", 10.0)
157+
else:
158+
score += HEURISTIC_WEIGHTS.get("SUSPICIOUS_URL_PATTERN", 3.0)
110159
indicators_found.append(f"Suspicious URL found: {url_str} (Reason: {reason})")
111160
urls_analyzed_details.append(url_analysis)
112161

social_media_analyzer/test_runner.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import unittest
2+
from unittest.mock import patch, Mock
13
from social_media_analyzer.scam_detector import analyze_text_for_scams
24

3-
if __name__ == '__main__':
5+
def run_manual_tests():
46
# Example Usage
57
test_cases = {
68
"Instagram Phishing": {
@@ -48,3 +50,53 @@
4850
print("URLs Analyzed:")
4951
for url_info in analysis_result['urls_analyzed']:
5052
print(f" - URL: {url_info['url']}, Suspicious: {url_info['is_suspicious']}, Reason: {url_info['reason']}")
53+
54+
class TestScamDetector(unittest.TestCase):
55+
@patch('social_media_analyzer.scam_detector.requests.post')
56+
def test_google_safe_browsing_malicious(self, mock_post):
57+
# Mock the API response for a malicious URL
58+
mock_response = Mock()
59+
mock_response.status_code = 200
60+
mock_response.json.return_value = {
61+
"matches": [
62+
{
63+
"threatType": "MALWARE",
64+
"platformType": "ANY_PLATFORM",
65+
"threat": {"url": "http://malware.testing.google.test/testing/malware/"},
66+
}
67+
]
68+
}
69+
mock_post.return_value = mock_response
70+
71+
message = "check this out http://malware.testing.google.test/testing/malware/"
72+
result = analyze_text_for_scams(message, api_key="fake_key")
73+
74+
self.assertTrue(any("Google Safe Browsing" in reason for reason in result["indicators_found"]))
75+
self.assertEqual(result['urls_analyzed'][0]['is_suspicious'], True)
76+
77+
@patch('social_media_analyzer.scam_detector.requests.post')
78+
def test_google_safe_browsing_clean(self, mock_post):
79+
# Mock the API response for a clean URL
80+
mock_response = Mock()
81+
mock_response.status_code = 200
82+
mock_response.json.return_value = {}
83+
mock_post.return_value = mock_response
84+
85+
message = "this is a clean site http://www.google.com"
86+
result = analyze_text_for_scams(message, api_key="fake_key")
87+
88+
self.assertFalse(any("Google Safe Browsing" in reason for reason in result["indicators_found"]))
89+
self.assertEqual(result['urls_analyzed'][0]['is_suspicious'], False)
90+
91+
if __name__ == '__main__':
92+
run_manual_tests()
93+
# Run unit tests
94+
suite = unittest.TestSuite()
95+
suite.addTest(unittest.makeSuite(TestScamDetector))
96+
runner = unittest.TextTestRunner()
97+
print("\n--- Running Unit Tests for Google Safe Browsing Integration ---")
98+
result = runner.run(suite)
99+
if result.wasSuccessful():
100+
print("All tests passed!")
101+
else:
102+
print("Some tests failed.")

0 commit comments

Comments
 (0)