feat: Add NLP techniques and GUI to Social Media Analyzer

google-labs-jules[bot] · google-labs-jules[bot] · commit dae626076108 · 2025-09-23T10:15:11.000Z
This commit introduces two major enhancements to the Social Media Analyzer:
- Integration of NLP techniques for more sophisticated scam and fake news detection.
- A new web-based GUI built with React to replace the command-line interface.

Backend changes:
- Added `nltk` and `textblob` for NLP tasks.
- Integrated sentiment analysis into the scam detector to identify messages with strong negative sentiment.
- Enhanced the fake news detector with Named Entity Recognition (NER) to identify organizations and people mentioned in articles.
- Created a Flask API to expose the analyzer's functionality to the frontend.

Frontend changes:
- Created a new React application with components for:
  - Scam Analyzer
  - Fake News Analyzer
- The GUI allows users to analyze text and URLs in a user-friendly interface.
diff --git a/social_media_analyzer/fake_news_detector.py b/social_media_analyzer/fake_news_detector.py
@@ -1,6 +1,7 @@
 import re
 import urllib.request
 from urllib.parse import urlparse
+import nltk
 from .heuristics import (
     FAKE_NEWS_DOMAINS,
     SENSATIONALIST_KEYWORDS,
@@ -11,6 +12,12 @@
 def analyze_url_for_fake_news(url):
     """
     Analyzes a URL for indicators of fake news.
+
+    NOTE: This function requires the following NLTK data to be downloaded:
+    - 'punkt'
+    - 'averaged_perceptron_tagger'
+    - 'maxent_ne_chunker'
+    - 'words'
     """
     if not url.startswith(('http://', 'https://')):
         url = 'http://' + url
@@ -19,6 +26,10 @@ def analyze_url_for_fake_news(url):
 
     score = 0.0
     indicators_found = []
+    named_entities = {
+        "organizations": [],
+        "persons": [],
+    }
 
     # 1. Check against known fake news domains
     if domain in FAKE_NEWS_DOMAINS:
@@ -51,6 +62,21 @@ def analyze_url_for_fake_news(url):
                         score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5)
                         indicators_found.append(f"Found clickbait pattern: '{pattern}'")
 
+                # 5. Named Entity Recognition
+                tokens = nltk.word_tokenize(text_content)
+                tagged = nltk.pos_tag(tokens)
+                entities = nltk.ne_chunk(tagged)
+
+                for entity in entities:
+                    if isinstance(entity, nltk.Tree):
+                        entity_text = " ".join([word for word, tag in entity.leaves()])
+                        if entity.label() == 'ORGANIZATION':
+                            if entity_text not in named_entities["organizations"]:
+                                named_entities["organizations"].append(entity_text)
+                        elif entity.label() == 'PERSON':
+                            if entity_text not in named_entities["persons"]:
+                                named_entities["persons"].append(entity_text)
+
             else:
                 return {"error": f"Failed to fetch URL: HTTP status code {response.status}"}
     except Exception as e:
@@ -59,5 +85,6 @@ def analyze_url_for_fake_news(url):
     return {
         "url": url,
         "score": round(score, 2),
-        "indicators_found": indicators_found
+        "indicators_found": indicators_found,
+        "named_entities": named_entities
     }
diff --git a/social_media_analyzer/requirements.txt b/social_media_analyzer/requirements.txt
@@ -1 +1,3 @@
 requests
+nltk
+textblob
diff --git a/social_media_analyzer/scam_detector.py b/social_media_analyzer/scam_detector.py
@@ -3,6 +3,7 @@
 import requests
 import os
 from urllib.parse import urlparse
+from textblob import TextBlob
 from .heuristics import (
     URGENCY_KEYWORDS,
     SENSITIVE_INFO_KEYWORDS,
@@ -127,7 +128,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
     indicators_found = []
     urls_analyzed_details = []
 
-    # 1. Keyword-based checks
+    # 1. Sentiment Analysis
+    blob = TextBlob(text_content)
+    if blob.sentiment.polarity < -0.5:
+        message = "Strong negative sentiment detected in text."
+        if message not in indicators_found:
+            indicators_found.append(message)
+            score += HEURISTIC_WEIGHTS.get("NEGATIVE_SENTIMENT", 2.0)
+
+    # 2. Keyword-based checks
     keyword_checks = {
         "URGENCY": URGENCY_KEYWORDS,
         "SENSITIVE_INFO": SENSITIVE_INFO_KEYWORDS,
@@ -145,7 +154,7 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
                     indicators_found.append(message)
                     score += HEURISTIC_WEIGHTS.get(category, 1.0)
 
-    # 2. Regex-based checks
+    # 3. Regex-based checks
     found_urls = URL_PATTERN.findall(text_content)
     for url_str in found_urls:
         is_susp, reason = is_url_suspicious(url_str, platform, api_key)
@@ -159,15 +168,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
             indicators_found.append(f"Suspicious URL found: {url_str} (Reason: {reason})")
         urls_analyzed_details.append(url_analysis)
 
-    # 3. Financial Identifiers
+    # 4. Financial Identifiers
     for id_name, pattern in FINANCIAL_ADDRESS_PATTERNS.items():
         if pattern.search(text_content):
             message = f"Potential {id_name} identifier found."
             if message not in indicators_found:
                 indicators_found.append(message)
                 score += HEURISTIC_WEIGHTS.get(f"{id_name}_ADDRESS", 2.5)
 
-    # 4. Phone Numbers
+    # 5. Phone Numbers
     if PHONE_NUMBER_PATTERN.search(text_content):
         message = "Phone number detected in text."
         if message not in indicators_found:
diff --git a/social_media_analyzer/test_scam_detector.py b/social_media_analyzer/test_scam_detector.py
@@ -0,0 +1,29 @@
+import unittest
+from .scam_detector import analyze_text_for_scams
+
+class TestScamDetector(unittest.TestCase):
+
+    def test_sentiment_analysis(self):
+        # Test case for negative sentiment
+        text_negative = "This is a terrible, awful, no good, very bad message."
+        result_negative = analyze_text_for_scams(text_negative)
+        self.assertIn("Strong negative sentiment detected in text.", [indicator for indicator in result_negative["indicators_found"]])
+
+        # Test case for positive sentiment
+        text_positive = "This is a wonderful, amazing, great message."
+        result_positive = analyze_text_for_scams(text_positive)
+        self.assertNotIn("Strong negative sentiment detected in text.", [indicator for indicator in result_positive["indicators_found"]])
+
+    def test_keyword_matching(self):
+        # Test case for urgency keyword
+        text_urgency = "URGENT: Your account has been compromised."
+        result_urgency = analyze_text_for_scams(text_urgency)
+        self.assertIn("Presence of 'Urgency' keyword: 'urgent'", [indicator for indicator in result_urgency["indicators_found"]])
+
+        # Test case for stemming
+        text_stemming = "I need you to verify your account immediately."
+        result_stemming = analyze_text_for_scams(text_stemming)
+        self.assertIn("Presence of 'Sensitive Info' keyword: 'verify your account'", [indicator for indicator in result_stemming["indicators_found"]])
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/App.jsx b/src/App.jsx
@@ -1,27 +1,24 @@
+import React, { useState } from 'react';
 import './App.css';
+import ScamAnalyzer from './ScamAnalyzer';
+import FakeNewsAnalyzer from './FakeNewsAnalyzer';
 
 function App() {
+  const [view, setView] = useState('scam');
+
   return (
     <div className="App">
       <header className="App-header">
-        <img src="Octocat.png" className="App-logo" alt="logo" />
-        <p>
-          GitHub Codespaces <span className="heart">♥️</span> React
-        </p>
-        <p className="small">
-          Edit <code>src/App.jsx</code> and save to reload.
-        </p>
-        <p>
-          <a
-            className="App-link"
-            href="https://reactjs.org"
-            target="_blank"
-            rel="noopener noreferrer"
-          >
-            Learn React
-          </a>
-        </p>
+        <h1>Universal Security Analyzer</h1>
+        <nav>
+          <button onClick={() => setView('scam')}>Scam Analyzer</button>
+          <button onClick={() => setView('fake-news')}>Fake News Analyzer</button>
+        </nav>
       </header>
+      <main>
+        {view === 'scam' && <ScamAnalyzer />}
+        {view === 'fake-news' && <FakeNewsAnalyzer />}
+      </main>
     </div>
   );
 }
diff --git a/src/FakeNewsAnalyzer.jsx b/src/FakeNewsAnalyzer.jsx
@@ -0,0 +1,81 @@
+import React, { useState } from 'react';
+
+function FakeNewsAnalyzer() {
+    const [url, setUrl] = useState('');
+    const [result, setResult] = useState(null);
+    const [loading, setLoading] = useState(false);
+
+    const handleAnalyze = () => {
+        setLoading(true);
+        fetch('/analyze/fake-news', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({ url }),
+        })
+            .then((res) => res.json())
+            .then((data) => {
+                setResult(data);
+                setLoading(false);
+            })
+            .catch((error) => {
+                console.error('Error:', error);
+                setLoading(false);
+            });
+    };
+
+    return (
+        <div>
+            <h2>Fake News Analyzer</h2>
+            <input
+                type="text"
+                value={url}
+                onChange={(e) => setUrl(e.target.value)}
+                placeholder="Enter a news URL to analyze..."
+                size="50"
+            />
+            <br />
+            <button onClick={handleAnalyze} disabled={loading}>
+                {loading ? 'Analyzing...' : 'Analyze'}
+            </button>
+            {result && (
+                <div>
+                    <h3>Analysis Results</h3>
+                    {result.error ? (
+                        <p>Error: {result.error}</p>
+                    ) : (
+                        <>
+                            <p>Score: {result.score}</p>
+                            <h4>Indicators Found:</h4>
+                            <ul>
+                                {result.indicators_found.map((indicator, index) => (
+                                    <li key={index}>{indicator}</li>
+                                ))}
+                            </ul>
+                            {result.named_entities && (
+                                <>
+                                    <h4>Named Entities Found:</h4>
+                                    <h5>Organizations:</h5>
+                                    <ul>
+                                        {result.named_entities.organizations.map((org, index) => (
+                                            <li key={index}>{org}</li>
+                                        ))}
+                                    </ul>
+                                    <h5>Persons:</h5>
+                                    <ul>
+                                        {result.named_entities.persons.map((person, index) => (
+                                            <li key={index}>{person}</li>
+                                        ))}
+                                    </ul>
+                                </>
+                            )}
+                        </>
+                    )}
+                </div>
+            )}
+        </div>
+    );
+}
+
+export default FakeNewsAnalyzer;
diff --git a/src/ScamAnalyzer.jsx b/src/ScamAnalyzer.jsx
@@ -0,0 +1,58 @@
+import React, { useState } from 'react';
+
+function ScamAnalyzer() {
+    const [text, setText] = useState('');
+    const [result, setResult] = useState(null);
+    const [loading, setLoading] = useState(false);
+
+    const handleAnalyze = () => {
+        setLoading(true);
+        fetch('/analyze/scam', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({ text }),
+        })
+            .then((res) => res.json())
+            .then((data) => {
+                setResult(data);
+                setLoading(false);
+            })
+            .catch((error) => {
+                console.error('Error:', error);
+                setLoading(false);
+            });
+    };
+
+    return (
+        <div>
+            <h2>Scam Analyzer</h2>
+            <textarea
+                rows="10"
+                cols="50"
+                value={text}
+                onChange={(e) => setText(e.target.value)}
+                placeholder="Paste a message to analyze for scams..."
+            />
+            <br />
+            <button onClick={handleAnalyze} disabled={loading}>
+                {loading ? 'Analyzing...' : 'Analyze'}
+            </button>
+            {result && (
+                <div>
+                    <h3>Analysis Results</h3>
+                    <p>Score: {result.score}</p>
+                    <h4>Indicators Found:</h4>
+                    <ul>
+                        {result.indicators_found.map((indicator, index) => (
+                            <li key={index}>{indicator}</li>
+                        ))}
+                    </ul>
+                </div>
+            )}
+        </div>
+    );
+}
+
+export default ScamAnalyzer;
diff --git a/text_message_analyzer/app.py b/text_message_analyzer/app.py
@@ -1,28 +1,36 @@
 from flask import Flask, request, jsonify
+from social_media_analyzer import scam_detector, fake_news_detector
+import os
 
 app = Flask(__name__)
 
-@app.route("/")
-def hello():
-    return "Hello, World!"
+def get_api_key():
+    """Gets the Google API key from environment variables."""
+    return os.environ.get("GOOGLE_API_KEY")
 
-@app.route('/analyze', methods=['POST'])
-def analyze():
+@app.route('/analyze/scam', methods=['POST'])
+def analyze_scam():
     data = request.get_json()
     if not data or 'text' not in data:
-        return jsonify({'error': 'Invalid input, "text" field is required.'}), 400
+        return jsonify({"error": "Missing 'text' in request body"}), 400
 
     text_to_analyze = data['text']
+    api_key = get_api_key()
 
-    # Placeholder analysis logic
-    is_suspicious = 'phishing' in text_to_analyze.lower()
+    result = scam_detector.analyze_text_for_scams(text_to_analyze, api_key=api_key)
+    return jsonify(result)
+
+@app.route('/analyze/fake-news', methods=['POST'])
+def analyze_fake_news():
+    data = request.get_json()
+    if not data or 'url' not in data:
+        return jsonify({"error": "Missing 'url' in request body"}), 400
+
+    url_to_analyze = data['url']
+
+    result = fake_news_detector.analyze_url_for_fake_news(url_to_analyze)
+    return jsonify(result)
 
-    return jsonify({
-        'text': text_to_analyze,
-        'analysis': {
-            'is_suspicious': is_suspicious
-        }
-    })
 
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=8080)
+if __name__ == '__main__':
+    app.run(debug=True)