GYFX35
diff --git a/‎social_media_analyzer/__pycache__/__init__.cpython-312.pyc‎
0 Bytes b/‎social_media_analyzer/__pycache__/__init__.cpython-312.pyc‎
0 Bytes
diff --git a/‎social_media_analyzer/__pycache__/fake_profile_detector.cpython-312.pyc‎
9.94 KB b/‎social_media_analyzer/__pycache__/fake_profile_detector.cpython-312.pyc‎
9.94 KB
diff --git a/‎social_media_analyzer/__pycache__/heuristics.cpython-312.pyc‎
1.66 KB b/‎social_media_analyzer/__pycache__/heuristics.cpython-312.pyc‎
1.66 KB
diff --git a/‎social_media_analyzer/__pycache__/main.cpython-312.pyc‎
2 KB b/‎social_media_analyzer/__pycache__/main.cpython-312.pyc‎
2 KB
diff --git a/‎social_media_analyzer/__pycache__/scam_detector.cpython-312.pyc‎
1.24 KB b/‎social_media_analyzer/__pycache__/scam_detector.cpython-312.pyc‎
1.24 KB
diff --git a/‎social_media_analyzer/heuristics.py‎
Lines changed: 58 additions & 22 deletions b/‎social_media_analyzer/heuristics.py‎
Lines changed: 58 additions & 22 deletions
diff --git a/‎social_media_analyzer/main.py‎
Lines changed: 107 additions & 57 deletions b/‎social_media_analyzer/main.py‎
Lines changed: 107 additions & 57 deletions
diff --git a/‎social_media_analyzer/scam_detector.py‎
Lines changed: 19 additions & 0 deletions b/‎social_media_analyzer/scam_detector.py‎
Lines changed: 19 additions & 0 deletions
@@ -43,7 +43,22 @@
         "bankofamerica.com", "chase.com", "wellsfargo.com", "citibank.com",
         "hsbc.com", "barclays.com", "deutsche-bank.com", "santander.com"
     ],
-    "general": ["google.com"]
+    "general": ["google.com"],
+    "general_web": [
+        "wikipedia.org", "yahoo.com", "live.com", "microsoft.com",
+        "apple.com", "netflix.com", "twitch.tv", "ebay.com",
+        "craigslist.org", "imdb.com", "nytimes.com", "theguardian.com",
+        "bbc.com", "cnn.com", "espn.com", "walmart.com", "target.com",
+        "bestbuy.com", "homedepot.com", "lowes.com", "costco.com",
+        "stackoverflow.com", "github.com", "gitlab.com", "wordpress.org",
+        "wordpress.com", "blogger.com", "tumblr.com", "medium.com",
+        "quora.com", "flickr.com", "adobe.com", "soundcloud.com",
+        "spotify.com", "dropbox.com", "box.com", "slack.com",
+        "salesforce.com", "oracle.com", "sap.com", "ibm.com", "dell.com",
+        "hp.com", "intel.com", "amd.com", "nvidia.com", "booking.com",
+        "airbnb.com", "expedia.com", "tripadvisor.com", "fedex.com",
+        "ups.com", "usps.com", "dhl.com"
+    ]
 }
 
 
@@ -138,27 +153,48 @@
 
 # Suspicious URL Patterns
 # These patterns aim to catch URLs that impersonate legitimate domains.
-SUSPICIOUS_URL_PATTERNS = [
-    # Impersonation using subdomains or hyphens for social media and general platforms
-    r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp|tiktok|tinder|snapchat|wechat|telegram|twitter|pinterest|linkedin|line|discord|teams|zoom|amazon|alibaba|youtube|skype|vk|reddit|viber|signal|badoo|binance|sharechat)\.com\.[a-z0-9\-]+\.[a-z]+",
-    r"https?://(?:[a-z0-9\-]+\.)*(?:facebook|fb|instagram|whatsapp|tiktok|tinder|snapchat|wechat|telegram|twitter|pinterest|linkedin|line|discord|teams|zoom|amazon|alibaba|youtube|skype|vk|reddit|viber|signal|badoo|binance|sharechat)-[a-z0-9\-]+\.[a-z]+",
-
-    # Impersonation for fintech and banks
-    r"https?://(?:[a-z0-9\-]+\.)*(?:paypal|stripe|payoneer|bankofamerica|chase|wellsfargo|citibank|hsbc|barclays)\.com\.[a-z0-9\-]+\.[a-z]+",
-    r"https?://(?:[a-z0-9\-]+\.)*(?:paypal|stripe|payoneer|bankofamerica|chase|wellsfargo|citibank|hsbc|barclays)-[a-z0-9\-]+\.[a-z]+",
-
-    # Common URL shorteners
-    r"https?://bit\.ly",
-    r"https?://goo\.gl",
-    r"https?://t\.co",
-    # IP Address URLs
-    r"https?://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}",
-    # Generic suspicious keywords in the domain
-    r"https?://[^/]*(?:login|secure|account|update|verify|support|admin)[^/]*\.(?:biz|info|tk|ml|ga|cf|gq|xyz|club|top|loan|work|online|site)",
-    # Very long subdomains or many hyphens
-    r"https?://(?:[a-z0-9\-]+\.){4,}",
-    r"https?://[^/]*\-.*\-.*\-.*[a-z]+",
-]
+def generate_suspicious_url_patterns(legitimate_domains):
+    """
+    Generates regex patterns to detect URLs impersonating legitimate domains.
+    """
+    all_service_keywords = set()
+    for platform, domains in legitimate_domains.items():
+        if platform not in ["general", "general_web", "banks"]:
+            all_service_keywords.add(platform)
+        for domain in domains:
+            # Add the core part of the domain, e.g., "facebook" from "facebook.com"
+            keyword = domain.split('.')[0]
+            if keyword != "com" and len(keyword) > 2:
+                all_service_keywords.add(keyword)
+
+    # Remove very generic keywords that might cause false positives
+    all_service_keywords -= {'google', 'apple', 'microsoft'}
+
+    # Create a regex group of all keywords
+    keyword_group = "|".join(re.escape(k) for k in sorted(list(all_service_keywords), key=len, reverse=True))
+
+    patterns = [
+        # Impersonation using subdomains or hyphens, e.g., "facebook.security-alert.com" or "facebook-login.com"
+        # This now uses the dynamically generated keyword group
+        r"https?://(?:[a-z0-9\-]+\.)*(?:" + keyword_group + r")\.(?:[a-z0-9\-]+)\.(?:[a-z]+)",
+        r"https?://(?:[a-z0-9\-]+\.)*(?:" + keyword_group + r")-(?:[a-z0-9\-]+)\.(?:[a-z]+)",
+
+        # Common URL shorteners
+        r"https?://bit\.ly",
+        r"https?://goo\.gl",
+        r"https?://t\.co",
+        # IP Address URLs
+        r"https?://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}",
+        # Generic suspicious keywords in the domain combined with suspicious TLDs
+        r"https?://[^/]*(?:login|secure|account|update|verify|support|admin)[^/]*\.(?:biz|info|tk|ml|ga|cf|gq|xyz|club|top|loan|work|online|site)",
+        # Very long subdomains (potential phishing)
+        r"https?://(?:[a-z0-9\-]+\.){4,}",
+        # Multiple hyphens in the domain (potential phishing)
+        r"https?://[^/]*\-.*\-.*\-.*[a-z]+",
+    ]
+    return patterns
+
+SUSPICIOUS_URL_PATTERNS = generate_suspicious_url_patterns(LEGITIMATE_DOMAINS)
 
 
 # --- Scoring Weights ---
 
@@ -1,12 +1,51 @@
 from . import fake_profile_detector
 from . import scam_detector
 
-def main():
-    """Main function to run the social media analyzer."""
-    print("--- Social Media Analyzer ---")
-    print("This tool helps you analyze social media profiles and messages for potential scams.")
+def analyze_website_url():
+    """Analyzes a website URL for potential scams."""
+    url_to_check = input("Please enter the full URL you want to analyze: ").strip()
+    if not url_to_check:
+        print("No URL entered.")
+        return
+
+    # Ensure the URL has a scheme
+    if not url_to_check.startswith(('http://', 'https://')):
+        url_to_check = 'http://' + url_to_check
 
-    platforms = ["facebook", "instagram", "whatsapp", "tiktok", "tinder", "snapchat", "wechat", "telegram", "twitter", "pinterest", "linkedin", "line", "discord", "teams", "zoom", "amazon", "alibaba", "youtube", "skype", "vk", "reddit", "email", "viber", "signal", "badoo", "binance", "sharechat", "browser", "messenger", "qzone", "qq", "vimeo", "musical.ly"]
+    print("\n--- Analyzing URL ---")
+    is_susp, reason = scam_detector.is_url_suspicious(url_to_check, platform="general_web")
+    if is_susp:
+        print(f"\n[!] The URL '{url_to_check}' is flagged as IMMEDIATELY SUSPICIOUS.")
+        print(f"Reason: {reason}")
+        # We can stop here as the URL itself is a major red flag
+        return
+    else:
+        print(f"\n[+] The URL '{url_to_check}' does not match common suspicious patterns.")
+        print("Now analyzing the website's content...")
+
+    # Analyze the content of the website
+    content_result = scam_detector.analyze_url_content(url_to_check)
+
+    print("\n--- Website Content Analysis Results ---")
+    if "error" in content_result:
+        print(f"Could not analyze website content: {content_result['error']}")
+    elif not content_result.get("indicators_found"):
+        print("No specific scam indicators were found in the website content.")
+    else:
+        print(f"Score: {content_result['score']} (Higher is more suspicious)")
+        print("Indicators Found:")
+        for indicator in content_result['indicators_found']:
+            print(f"- {indicator}")
+
+def analyze_social_media():
+    """Handles the analysis of social media platforms."""
+    platforms = sorted([
+        "facebook", "instagram", "whatsapp", "tiktok", "tinder", "snapchat",
+        "wechat", "telegram", "twitter", "pinterest", "linkedin", "line",
+        "discord", "teams", "zoom", "amazon", "alibaba", "youtube", "skype",
+        "vk", "reddit", "email", "viber", "signal", "badoo", "binance",
+        "sharechat", "messenger", "qzone", "qq", "vimeo", "musical.ly"
+    ])
 
     while True:
         print("\nSelect the social media platform you want to analyze:")
@@ -23,61 +62,72 @@ def main():
         except ValueError:
             print("Invalid input. Please enter a number.")
 
-    if platform == "browser":
-        url_to_check = input("Please enter the URL you want to analyze: ").strip()
-        if url_to_check:
-            is_susp, reason = scam_detector.is_url_suspicious(url_to_check)
-            print("\n--- URL Analysis Results ---")
-            if is_susp:
-                print(f"The URL '{url_to_check}' is SUSPICIOUS.")
-                print(f"Reason: {reason}")
-            else:
-                print(f"The URL '{url_to_check}' does not seem suspicious.")
-                print(f"Details: {reason}")
-        else:
-            print("No URL entered.")
-    else:
-        while True:
-            print(f"\nWhat do you want to do for {platform.capitalize()}?")
-            print("1. Analyze a profile for signs of being fake.")
-            print("2. Analyze a profile for identity usurpation.")
-            print("3. Analyze a message for phishing or scam attempts.")
+    while True:
+        print(f"\nWhat do you want to do for {platform.capitalize()}?")
+        print("1. Analyze a profile for signs of being fake.")
+        print("2. Analyze a profile for identity usurpation.")
+        print("3. Analyze a message for phishing or scam attempts.")
 
-            try:
-                analysis_choice = int(input("Enter your choice (1-3): "))
-                if analysis_choice == 1:
-                    profile_url = input(f"Enter the {platform.capitalize()} profile URL to analyze: ").strip()
-                    if profile_url:
-                        fake_profile_detector.analyze_profile_based_on_user_input(profile_url, platform)
-                    else:
-                        print("No profile URL entered.")
-                    break
-                elif analysis_choice == 2:
-                    profile_url = input(f"Enter the {platform.capitalize()} profile URL to analyze for impersonation: ").strip()
-                    if profile_url:
-                        fake_profile_detector.analyze_identity_usurpation(profile_url, platform)
-                    else:
-                        print("No profile URL entered.")
-                    break
-                elif analysis_choice == 3:
-                    message = input("Paste the message you want to analyze: ").strip()
-                    if message:
-                        result = scam_detector.analyze_text_for_scams(message, platform)
-                        print("\n--- Scam Analysis Results ---")
-                        print(f"Score: {result['score']} (Higher is more suspicious)")
-                        print("Indicators Found:")
-                        if result['indicators_found']:
-                            for indicator in result['indicators_found']:
-                                print(f"- {indicator}")
-                        else:
-                            print("No specific scam indicators were found.")
+        try:
+            analysis_choice = int(input("Enter your choice (1-3): "))
+            if analysis_choice == 1:
+                profile_url = input(f"Enter the {platform.capitalize()} profile URL to analyze: ").strip()
+                if profile_url:
+                    fake_profile_detector.analyze_profile_based_on_user_input(profile_url, platform)
+                else:
+                    print("No profile URL entered.")
+                break
+            elif analysis_choice == 2:
+                profile_url = input(f"Enter the {platform.capitalize()} profile URL to analyze for impersonation: ").strip()
+                if profile_url:
+                    fake_profile_detector.analyze_identity_usurpation(profile_url, platform)
+                else:
+                    print("No profile URL entered.")
+                break
+            elif analysis_choice == 3:
+                message = input("Paste the message you want to analyze: ").strip()
+                if message:
+                    result = scam_detector.analyze_text_for_scams(message, platform)
+                    print("\n--- Scam Analysis Results ---")
+                    print(f"Score: {result['score']} (Higher is more suspicious)")
+                    print("Indicators Found:")
+                    if result['indicators_found']:
+                        for indicator in result['indicators_found']:
+                            print(f"- {indicator}")
                     else:
-                        print("No message entered.")
-                    break
+                        print("No specific scam indicators were found.")
                 else:
-                    print("Invalid choice. Please try again.")
-            except ValueError:
-                print("Invalid input. Please enter a number.")
+                    print("No message entered.")
+                break
+            else:
+                print("Invalid choice. Please try again.")
+        except ValueError:
+            print("Invalid input. Please enter a number.")
+
+def main():
+    """Main function to run the security analyzer."""
+    print("--- Universal Security Analyzer ---")
+    print("This tool helps you analyze social media, messages, and websites for potential scams.")
+
+    while True:
+        print("\n--- Main Menu ---")
+        print("1. Analyze a Social Media Platform")
+        print("2. Analyze a Website URL")
+        print("3. Exit")
+
+        try:
+            choice = int(input("Enter your choice (1-3): "))
+            if choice == 1:
+                analyze_social_media()
+            elif choice == 2:
+                analyze_website_url()
+            elif choice == 3:
+                print("Exiting. Stay safe!")
+                break
+            else:
+                print("Invalid choice. Please try again.")
+        except ValueError:
+            print("Invalid input. Please enter a number.")
 
 if __name__ == '__main__':
     main()
@@ -1,4 +1,5 @@
 import re
+import urllib.request
 from urllib.parse import urlparse
 from .heuristics import (
     URGENCY_KEYWORDS,
@@ -130,3 +131,21 @@ def analyze_text_for_scams(text_content, platform=None):
         "urls_analyzed": urls_analyzed_details
     }
 
+def analyze_url_content(url):
+    """
+    Fetches the content of a URL and analyzes it for scams.
+    """
+    try:
+        # Add a user-agent to avoid being blocked by some websites
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        request = urllib.request.Request(url, headers=headers)
+        with urllib.request.urlopen(request, timeout=10) as response:
+            if response.status == 200:
+                html_content = response.read().decode('utf-8', errors='ignore')
+                # Simple regex to strip HTML tags, not perfect but avoids new dependencies
+                text_content = re.sub(r'<[^>]+>', '', html_content)
+                return analyze_text_for_scams(text_content, platform="general_web")
+            else:
+                return {"error": f"Failed to fetch URL: HTTP status code {response.status}"}
+    except Exception as e:
+        return {"error": f"An error occurred: {e}"}