Merge pull request #23 from GYFX35/teen-protection-features

GYFX35 · web-flow · commit 4f21e9c748ff · 2025-09-29T07:54:30.000Z
Integrate teenager protection tools to social media analyzer.
diff --git a/social_media_analyzer/heuristics.py b/social_media_analyzer/heuristics.py
@@ -122,6 +122,37 @@
     "bank transfer", "wire details", "account details", "iban", "swift code", "bic"
 ]
 
+# --- Teenager Protection Heuristics ---
+
+# Keywords/phrases related to cyberbullying
+CYBERBULLYING_KEYWORDS = [
+    "loser", "stupid", "idiot", "hate you", "ugly", "fat",
+    "kill yourself", "kys", "go die", "nobody likes you", "freak",
+    "weirdo", "everyone hates you", "you're worthless", "pathetic",
+    "troll", "noob", "poser", "wannabe", "go away",
+    "social reject", "outcast", "misfit", "dork", "nerd"
+]
+
+# Keywords/phrases related to inappropriate content (sexual, violent, etc.)
+INAPPROPRIATE_CONTENT_KEYWORDS = [
+    # Sexually suggestive
+    "nude", "sexting", "send nudes", "horny", "slut", "whore", "dick", "pussy",
+    "porn", "sexy pic", "private parts", "hook up",
+    # Violence
+    "kill", "murder", "blood", "gun", "knife", "fight me",
+    "i will hurt you", "beat you up", "gonna get you",
+    # Drugs/Alcohol
+    "drugs", "weed", "cocaine", "pills", "get high", "drunk", "wasted"
+]
+
+# Keywords/phrases indicating oversharing of personal information
+PRIVACY_RISK_KEYWORDS = [
+    "my address is", "i live at", "my phone number is", "call me at",
+    "my full name is", "my school is", "i go to [school_name]",
+    "my mom's name is", "my dad's name is",
+    "i'm home alone", "parents are out", "my password is"
+]
+
 
 # --- Fake News Heuristics ---
 
@@ -240,6 +271,10 @@ def generate_suspicious_url_patterns(legitimate_domains):
     "PHONE_NUMBER_UNSOLICITED": 1.0,
     "SUSPICIOUS_URL_PATTERN": 3.0, # High weight for matching a suspicious URL pattern
     "GOOGLE_SAFE_BROWSING_HIT": 10.0, # Very high weight for a positive Google Safe Browsing match
+    # Teenager Protection Weights
+    "CYBERBULLYING": 2.5,
+    "INAPPROPRIATE_CONTENT": 3.0,
+    "PRIVACY_RISK": 3.5,
 }
 
 if __name__ == '__main__':
diff --git a/social_media_analyzer/main.py b/social_media_analyzer/main.py
@@ -2,6 +2,7 @@
 from . import fake_profile_detector
 from . import scam_detector
 from . import fake_news_detector
+from . import teen_protection
 
 def get_api_key():
     """Gets the Google API key from environment variables."""
@@ -130,6 +131,48 @@ def analyze_social_media(api_key):
         except ValueError:
             print("Invalid input. Please enter a number.")
 
+def analyze_for_teen_risks():
+    """Handles analysis for risks relevant to teenagers."""
+    print("\n--- Teenager Protection Tools ---")
+    print("Select the type of analysis you want to perform:")
+    print("1. Analyze text for Cyberbullying")
+    print("2. Analyze text for Inappropriate Content")
+    print("3. Analyze text for Privacy Risks (Oversharing)")
+
+    try:
+        choice = int(input("Enter your choice (1-3): "))
+        if choice not in [1, 2, 3]:
+            print("Invalid choice. Please try again.")
+            return
+    except ValueError:
+        print("Invalid input. Please enter a number.")
+        return
+
+    text_to_analyze = input("Please paste the text you want to analyze: ").strip()
+    if not text_to_analyze:
+        print("No text entered.")
+        return
+
+    result = {}
+    if choice == 1:
+        print("\n--- Analyzing for Cyberbullying ---")
+        result = teen_protection.analyze_for_cyberbullying(text_to_analyze)
+    elif choice == 2:
+        print("\n--- Analyzing for Inappropriate Content ---")
+        result = teen_protection.analyze_for_inappropriate_content(text_to_analyze)
+    elif choice == 3:
+        print("\n--- Analyzing for Privacy Risks ---")
+        result = teen_protection.analyze_for_privacy_risks(text_to_analyze)
+
+    print(f"Score: {result['score']} (Higher is more suspicious)")
+    if result['indicators_found']:
+        print("Indicators Found:")
+        for indicator in result['indicators_found']:
+            print(f"- {indicator}")
+    else:
+        print("No specific risk indicators were found.")
+
+
 def main():
     """Main function to run the security analyzer."""
     api_key = get_api_key()
@@ -145,17 +188,20 @@ def main():
         print("1. Analyze a Social Media Platform")
         print("2. Analyze a Website URL for Scams")
         print("3. Analyze a News URL for Fake News")
-        print("4. Exit")
+        print("4. Teenager Protection Tools")
+        print("5. Exit")
 
         try:
-            choice = int(input("Enter your choice (1-4): "))
+            choice = int(input("Enter your choice (1-5): "))
             if choice == 1:
                 analyze_social_media(api_key)
             elif choice == 2:
                 analyze_website_url(api_key)
             elif choice == 3:
                 analyze_news_url()
             elif choice == 4:
+                analyze_for_teen_risks()
+            elif choice == 5:
                 print("Exiting. Stay safe!")
                 break
             else:
diff --git a/social_media_analyzer/teen_protection.py b/social_media_analyzer/teen_protection.py
@@ -0,0 +1,58 @@
+from .heuristics import (
+    CYBERBULLYING_KEYWORDS,
+    INAPPROPRIATE_CONTENT_KEYWORDS,
+    PRIVACY_RISK_KEYWORDS,
+    HEURISTIC_WEIGHTS
+)
+
+def analyze_text_for_teen_risks(text, analysis_type):
+    """
+    Analyzes text for a specific type of risk to teenagers.
+
+    :param text: The text content to analyze.
+    :param analysis_type: The type of analysis to perform ('cyberbullying',
+                          'inappropriate_content', 'privacy_risk').
+    :return: A dictionary with the score and indicators found.
+    """
+    if not text:
+        return {"score": 0.0, "indicators_found": []}
+
+    text_lower = text.lower()
+    score = 0.0
+    indicators_found = []
+
+    keyword_map = {
+        'cyberbullying': ('CYBERBULLYING', CYBERBULLYING_KEYWORDS),
+        'inappropriate_content': ('INAPPROPRIATE_CONTENT', INAPPROPRIATE_CONTENT_KEYWORDS),
+        'privacy_risk': ('PRIVACY_RISK', PRIVACY_RISK_KEYWORDS),
+    }
+
+    if analysis_type not in keyword_map:
+        return {"error": "Invalid analysis type specified."}
+
+    category, keywords = keyword_map[analysis_type]
+    weight = HEURISTIC_WEIGHTS.get(category.upper(), 1.0)
+
+    for keyword in keywords:
+        if keyword in text_lower:
+            message = f"Detected potential {category.replace('_', ' ').lower()} keyword: '{keyword}'"
+            if message not in indicators_found:
+                indicators_found.append(message)
+                score += weight
+
+    return {
+        "score": round(score, 2),
+        "indicators_found": indicators_found
+    }
+
+def analyze_for_cyberbullying(text):
+    """Analyzes text for signs of cyberbullying."""
+    return analyze_text_for_teen_risks(text, 'cyberbullying')
+
+def analyze_for_inappropriate_content(text):
+    """Analyzes text for inappropriate content."""
+    return analyze_text_for_teen_risks(text, 'inappropriate_content')
+
+def analyze_for_privacy_risks(text):
+    """Analyzes text for privacy risks (oversharing)."""
+    return analyze_text_for_teen_risks(text, 'privacy_risk')
diff --git a/social_media_analyzer/test_runner.py b/social_media_analyzer/test_runner.py
@@ -1,6 +1,7 @@
 import unittest
 from unittest.mock import patch, Mock
-from social_media_analyzer.scam_detector import analyze_text_for_scams
+from .scam_detector import analyze_text_for_scams
+from .test_teen_protection import TestTeenProtection
 
 def run_manual_tests():
     # Example Usage
@@ -91,11 +92,13 @@ def test_google_safe_browsing_clean(self, mock_post):
 if __name__ == '__main__':
     run_manual_tests()
     # Run unit tests
-    suite = unittest.TestSuite()
-    suite.addTest(unittest.makeSuite(TestScamDetector))
+    scam_suite = unittest.makeSuite(TestScamDetector)
+    teen_suite = unittest.makeSuite(TestTeenProtection)
+    all_tests = unittest.TestSuite([scam_suite, teen_suite])
+
     runner = unittest.TextTestRunner()
-    print("\n--- Running Unit Tests for Google Safe Browsing Integration ---")
-    result = runner.run(suite)
+    print("\n--- Running All Unit Tests ---")
+    result = runner.run(all_tests)
     if result.wasSuccessful():
         print("All tests passed!")
     else:
diff --git a/social_media_analyzer/test_teen_protection.py b/social_media_analyzer/test_teen_protection.py
@@ -0,0 +1,70 @@
+import unittest
+from .teen_protection import (
+    analyze_for_cyberbullying,
+    analyze_for_inappropriate_content,
+    analyze_for_privacy_risks
+)
+
+class TestTeenProtection(unittest.TestCase):
+
+    def test_cyberbullying(self):
+        """Test the cyberbullying detection."""
+        # Test case with bullying keywords
+        text1 = "You are such a loser and an idiot."
+        result1 = analyze_for_cyberbullying(text1)
+        self.assertGreater(result1['score'], 0)
+        self.assertIn("Detected potential cyberbullying keyword: 'loser'", result1['indicators_found'])
+        self.assertIn("Detected potential cyberbullying keyword: 'idiot'", result1['indicators_found'])
+
+        # Test case with no bullying keywords
+        text2 = "Have a great day!"
+        result2 = analyze_for_cyberbullying(text2)
+        self.assertEqual(result2['score'], 0)
+        self.assertEqual(len(result2['indicators_found']), 0)
+
+    def test_inappropriate_content(self):
+        """Test the inappropriate content detection."""
+        # Test case with inappropriate keywords
+        text1 = "Don't send nudes or talk about drugs."
+        result1 = analyze_for_inappropriate_content(text1)
+        self.assertGreater(result1['score'], 0)
+        self.assertIn("Detected potential inappropriate content keyword: 'send nudes'", result1['indicators_found'])
+        self.assertIn("Detected potential inappropriate content keyword: 'drugs'", result1['indicators_found'])
+
+        # Test case with no inappropriate keywords
+        text2 = "This is a perfectly normal conversation."
+        result2 = analyze_for_inappropriate_content(text2)
+        self.assertEqual(result2['score'], 0)
+        self.assertEqual(len(result2['indicators_found']), 0)
+
+    def test_privacy_risks(self):
+        """Test the privacy risk detection."""
+        # Test case with privacy risk keywords
+        text1 = "My address is 123 Main St and my phone number is 555-1234."
+        result1 = analyze_for_privacy_risks(text1)
+        self.assertGreater(result1['score'], 0)
+        self.assertIn("Detected potential privacy risk keyword: 'my address is'", result1['indicators_found'])
+        self.assertIn("Detected potential privacy risk keyword: 'my phone number is'", result1['indicators_found'])
+
+        # Test case with no privacy risk keywords
+        text2 = "I like to talk about my hobbies."
+        result2 = analyze_for_privacy_risks(text2)
+        self.assertEqual(result2['score'], 0)
+        self.assertEqual(len(result2['indicators_found']), 0)
+
+    def test_empty_input(self):
+        """Test empty input for all analysis types."""
+        result_cb = analyze_for_cyberbullying("")
+        self.assertEqual(result_cb['score'], 0)
+        self.assertEqual(len(result_cb['indicators_found']), 0)
+
+        result_ic = analyze_for_inappropriate_content("")
+        self.assertEqual(result_ic['score'], 0)
+        self.assertEqual(len(result_ic['indicators_found']), 0)
+
+        result_pr = analyze_for_privacy_risks("")
+        self.assertEqual(result_pr['score'], 0)
+        self.assertEqual(len(result_pr['indicators_found']), 0)
+
+if __name__ == '__main__':
+    unittest.main()