diff --git a/social_media_analyzer/fake_news_detector.py b/social_media_analyzer/fake_news_detector.py new file mode 100644 index 0000000..8369574 --- /dev/null +++ b/social_media_analyzer/fake_news_detector.py @@ -0,0 +1,63 @@ +import re +import urllib.request +from urllib.parse import urlparse +from .heuristics import ( + FAKE_NEWS_DOMAINS, + SENSATIONALIST_KEYWORDS, + CLICKBAIT_PATTERNS, + HEURISTIC_WEIGHTS +) + +def analyze_url_for_fake_news(url): + """ + Analyzes a URL for indicators of fake news. + """ + if not url.startswith(('http://', 'https://')): + url = 'http://' + url + + domain = urlparse(url).netloc.lower() + + score = 0.0 + indicators_found = [] + + # 1. Check against known fake news domains + if domain in FAKE_NEWS_DOMAINS: + score += HEURISTIC_WEIGHTS.get("KNOWN_FAKE_NEWS_DOMAIN", 5.0) + indicators_found.append(f"Domain '{domain}' is a known source of fake news.") + return { + "url": url, + "score": round(score, 2), + "indicators_found": indicators_found + } + + # 2. Fetch and analyze content + try: + headers = {'User-Agent': 'Mozilla/5.0'} + request = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(request, timeout=10) as response: + if response.status == 200: + html_content = response.read().decode('utf-8', errors='ignore') + text_content = re.sub(r'<[^>]+>', '', html_content).lower() + + # 3. Analyze text for sensationalist keywords + for keyword in SENSATIONALIST_KEYWORDS: + if keyword in text_content: + score += HEURISTIC_WEIGHTS.get("SENSATIONALIST_KEYWORD", 1.0) + indicators_found.append(f"Found sensationalist keyword: '{keyword}'") + + # 4. Analyze text for clickbait patterns + for pattern in CLICKBAIT_PATTERNS: + if re.search(pattern, text_content, re.IGNORECASE): + score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5) + indicators_found.append(f"Found clickbait pattern: '{pattern}'") + + else: + return {"error": f"Failed to fetch URL: HTTP status code {response.status}"} + except Exception as e: + return {"error": f"An error occurred: {e}"} + + return { + "url": url, + "score": round(score, 2), + "indicators_found": indicators_found + } diff --git a/social_media_analyzer/heuristics.py b/social_media_analyzer/heuristics.py index 35ebc71..36cb54d 100644 --- a/social_media_analyzer/heuristics.py +++ b/social_media_analyzer/heuristics.py @@ -123,6 +123,27 @@ ] +# --- Fake News Heuristics --- + +FAKE_NEWS_DOMAINS = [ + "abcnews.com.co", "cnn.com.de", "daily-mail.com.de", + "infowars.com", "naturalnews.com", "breitbart.com", + "worldnewsdailyreport.com", "theonion.com", # The Onion is satire, but often mistaken for real news + "yournewswire.com", "davidwolfe.com" +] + +SENSATIONALIST_KEYWORDS = [ + "shocking", "bombshell", "secret", "cover-up", + "miracle", "cure", "unbelievable", "outrageous", + "conspiracy", "hidden truth", "what they don't want you to know", "fake news" +] + +CLICKBAIT_PATTERNS = [ + r"you won't believe", r"will shock you", r"number \d will amaze you", + r"this one weird trick", r"doctors hate him", r"the truth about", + r"scientists baffled", r"what happened next", r"secret to" +] + # --- Regular Expression Patterns --- # Basic URL detection @@ -201,6 +222,9 @@ def generate_suspicious_url_patterns(legitimate_domains): # --- Scoring Weights --- HEURISTIC_WEIGHTS = { + "KNOWN_FAKE_NEWS_DOMAIN": 5.0, + "SENSATIONALIST_KEYWORD": 1.0, + "CLICKBAIT_PATTERN": 1.5, "URGENCY": 1.5, "SENSITIVE_INFO": 2.5, "TOO_GOOD_TO_BE_TRUE": 2.0, diff --git a/social_media_analyzer/main.py b/social_media_analyzer/main.py index c6c8de3..e12e7f3 100644 --- a/social_media_analyzer/main.py +++ b/social_media_analyzer/main.py @@ -1,5 +1,26 @@ from . import fake_profile_detector from . import scam_detector +from . import fake_news_detector + +def analyze_news_url(): + """Analyzes a news URL for potential fake news.""" + url_to_check = input("Please enter the full URL of the news article you want to analyze: ").strip() + if not url_to_check: + print("No URL entered.") + return + + print("\n--- Analyzing News URL for Fake News ---") + result = fake_news_detector.analyze_url_for_fake_news(url_to_check) + + if "error" in result: + print(f"Could not analyze news URL: {result['error']}") + elif not result.get("indicators_found"): + print("No specific fake news indicators were found.") + else: + print(f"Score: {result['score']} (Higher is more suspicious)") + print("Indicators Found:") + for indicator in result['indicators_found']: + print(f"- {indicator}") def analyze_website_url(): """Analyzes a website URL for potential scams.""" @@ -107,21 +128,24 @@ def analyze_social_media(): def main(): """Main function to run the security analyzer.""" print("--- Universal Security Analyzer ---") - print("This tool helps you analyze social media, messages, and websites for potential scams.") + print("This tool helps you analyze social media, messages, and websites for potential scams and fake news.") while True: print("\n--- Main Menu ---") print("1. Analyze a Social Media Platform") - print("2. Analyze a Website URL") - print("3. Exit") + print("2. Analyze a Website URL for Scams") + print("3. Analyze a News URL for Fake News") + print("4. Exit") try: - choice = int(input("Enter your choice (1-3): ")) + choice = int(input("Enter your choice (1-4): ")) if choice == 1: analyze_social_media() elif choice == 2: analyze_website_url() elif choice == 3: + analyze_news_url() + elif choice == 4: print("Exiting. Stay safe!") break else: diff --git a/social_media_analyzer/test_fake_news_detector.py b/social_media_analyzer/test_fake_news_detector.py new file mode 100644 index 0000000..75d8fd9 --- /dev/null +++ b/social_media_analyzer/test_fake_news_detector.py @@ -0,0 +1,34 @@ +import unittest +from unittest.mock import patch, Mock +from .fake_news_detector import analyze_url_for_fake_news + +class TestFakeNewsDetector(unittest.TestCase): + + @patch('urllib.request.urlopen') + def test_fake_news_url(self, mock_urlopen): + # Mock the response for a fake news URL + mock_response = Mock() + mock_response.status = 200 + mock_response.read.return_value = b'Fake NewsThis is a shocking story!' + mock_urlopen.return_value.__enter__.return_value = mock_response + + url = "http://abcnews.com.co/news/breaking-news-report.html" + result = analyze_url_for_fake_news(url) + self.assertGreater(result["score"], 0) + self.assertIn("Domain 'abcnews.com.co' is a known source of fake news.", result["indicators_found"]) + + @patch('urllib.request.urlopen') + def test_legitimate_news_url(self, mock_urlopen): + # Mock the response for a legitimate news URL + mock_response = Mock() + mock_response.status = 200 + mock_response.read.return_value = b'Real NewsThis is a real news story.' + mock_urlopen.return_value.__enter__.return_value = mock_response + + url = "https://www.bbc.com/news" + result = analyze_url_for_fake_news(url) + self.assertEqual(result["score"], 0) + self.assertEqual(len(result["indicators_found"]), 0) + +if __name__ == '__main__': + unittest.main()