Skip to content

Commit dae6260

Browse files
feat: Add NLP techniques and GUI to Social Media Analyzer
This commit introduces two major enhancements to the Social Media Analyzer: - Integration of NLP techniques for more sophisticated scam and fake news detection. - A new web-based GUI built with React to replace the command-line interface. Backend changes: - Added `nltk` and `textblob` for NLP tasks. - Integrated sentiment analysis into the scam detector to identify messages with strong negative sentiment. - Enhanced the fake news detector with Named Entity Recognition (NER) to identify organizations and people mentioned in articles. - Created a Flask API to expose the analyzer's functionality to the frontend. Frontend changes: - Created a new React application with components for: - Scam Analyzer - Fake News Analyzer - The GUI allows users to analyze text and URLs in a user-friendly interface.
1 parent fbb601e commit dae6260

File tree

8 files changed

+249
-38
lines changed

8 files changed

+249
-38
lines changed

social_media_analyzer/fake_news_detector.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
import urllib.request
33
from urllib.parse import urlparse
4+
import nltk
45
from .heuristics import (
56
FAKE_NEWS_DOMAINS,
67
SENSATIONALIST_KEYWORDS,
@@ -11,6 +12,12 @@
1112
def analyze_url_for_fake_news(url):
1213
"""
1314
Analyzes a URL for indicators of fake news.
15+
16+
NOTE: This function requires the following NLTK data to be downloaded:
17+
- 'punkt'
18+
- 'averaged_perceptron_tagger'
19+
- 'maxent_ne_chunker'
20+
- 'words'
1421
"""
1522
if not url.startswith(('http://', 'https://')):
1623
url = 'http://' + url
@@ -19,6 +26,10 @@ def analyze_url_for_fake_news(url):
1926

2027
score = 0.0
2128
indicators_found = []
29+
named_entities = {
30+
"organizations": [],
31+
"persons": [],
32+
}
2233

2334
# 1. Check against known fake news domains
2435
if domain in FAKE_NEWS_DOMAINS:
@@ -51,6 +62,21 @@ def analyze_url_for_fake_news(url):
5162
score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5)
5263
indicators_found.append(f"Found clickbait pattern: '{pattern}'")
5364

65+
# 5. Named Entity Recognition
66+
tokens = nltk.word_tokenize(text_content)
67+
tagged = nltk.pos_tag(tokens)
68+
entities = nltk.ne_chunk(tagged)
69+
70+
for entity in entities:
71+
if isinstance(entity, nltk.Tree):
72+
entity_text = " ".join([word for word, tag in entity.leaves()])
73+
if entity.label() == 'ORGANIZATION':
74+
if entity_text not in named_entities["organizations"]:
75+
named_entities["organizations"].append(entity_text)
76+
elif entity.label() == 'PERSON':
77+
if entity_text not in named_entities["persons"]:
78+
named_entities["persons"].append(entity_text)
79+
5480
else:
5581
return {"error": f"Failed to fetch URL: HTTP status code {response.status}"}
5682
except Exception as e:
@@ -59,5 +85,6 @@ def analyze_url_for_fake_news(url):
5985
return {
6086
"url": url,
6187
"score": round(score, 2),
62-
"indicators_found": indicators_found
88+
"indicators_found": indicators_found,
89+
"named_entities": named_entities
6390
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
requests
2+
nltk
3+
textblob

social_media_analyzer/scam_detector.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import requests
44
import os
55
from urllib.parse import urlparse
6+
from textblob import TextBlob
67
from .heuristics import (
78
URGENCY_KEYWORDS,
89
SENSITIVE_INFO_KEYWORDS,
@@ -127,7 +128,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
127128
indicators_found = []
128129
urls_analyzed_details = []
129130

130-
# 1. Keyword-based checks
131+
# 1. Sentiment Analysis
132+
blob = TextBlob(text_content)
133+
if blob.sentiment.polarity < -0.5:
134+
message = "Strong negative sentiment detected in text."
135+
if message not in indicators_found:
136+
indicators_found.append(message)
137+
score += HEURISTIC_WEIGHTS.get("NEGATIVE_SENTIMENT", 2.0)
138+
139+
# 2. Keyword-based checks
131140
keyword_checks = {
132141
"URGENCY": URGENCY_KEYWORDS,
133142
"SENSITIVE_INFO": SENSITIVE_INFO_KEYWORDS,
@@ -145,7 +154,7 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
145154
indicators_found.append(message)
146155
score += HEURISTIC_WEIGHTS.get(category, 1.0)
147156

148-
# 2. Regex-based checks
157+
# 3. Regex-based checks
149158
found_urls = URL_PATTERN.findall(text_content)
150159
for url_str in found_urls:
151160
is_susp, reason = is_url_suspicious(url_str, platform, api_key)
@@ -159,15 +168,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
159168
indicators_found.append(f"Suspicious URL found: {url_str} (Reason: {reason})")
160169
urls_analyzed_details.append(url_analysis)
161170

162-
# 3. Financial Identifiers
171+
# 4. Financial Identifiers
163172
for id_name, pattern in FINANCIAL_ADDRESS_PATTERNS.items():
164173
if pattern.search(text_content):
165174
message = f"Potential {id_name} identifier found."
166175
if message not in indicators_found:
167176
indicators_found.append(message)
168177
score += HEURISTIC_WEIGHTS.get(f"{id_name}_ADDRESS", 2.5)
169178

170-
# 4. Phone Numbers
179+
# 5. Phone Numbers
171180
if PHONE_NUMBER_PATTERN.search(text_content):
172181
message = "Phone number detected in text."
173182
if message not in indicators_found:
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import unittest
2+
from .scam_detector import analyze_text_for_scams
3+
4+
class TestScamDetector(unittest.TestCase):
5+
6+
def test_sentiment_analysis(self):
7+
# Test case for negative sentiment
8+
text_negative = "This is a terrible, awful, no good, very bad message."
9+
result_negative = analyze_text_for_scams(text_negative)
10+
self.assertIn("Strong negative sentiment detected in text.", [indicator for indicator in result_negative["indicators_found"]])
11+
12+
# Test case for positive sentiment
13+
text_positive = "This is a wonderful, amazing, great message."
14+
result_positive = analyze_text_for_scams(text_positive)
15+
self.assertNotIn("Strong negative sentiment detected in text.", [indicator for indicator in result_positive["indicators_found"]])
16+
17+
def test_keyword_matching(self):
18+
# Test case for urgency keyword
19+
text_urgency = "URGENT: Your account has been compromised."
20+
result_urgency = analyze_text_for_scams(text_urgency)
21+
self.assertIn("Presence of 'Urgency' keyword: 'urgent'", [indicator for indicator in result_urgency["indicators_found"]])
22+
23+
# Test case for stemming
24+
text_stemming = "I need you to verify your account immediately."
25+
result_stemming = analyze_text_for_scams(text_stemming)
26+
self.assertIn("Presence of 'Sensitive Info' keyword: 'verify your account'", [indicator for indicator in result_stemming["indicators_found"]])
27+
28+
if __name__ == '__main__':
29+
unittest.main()

src/App.jsx

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,24 @@
1+
import React, { useState } from 'react';
12
import './App.css';
3+
import ScamAnalyzer from './ScamAnalyzer';
4+
import FakeNewsAnalyzer from './FakeNewsAnalyzer';
25

36
function App() {
7+
const [view, setView] = useState('scam');
8+
49
return (
510
<div className="App">
611
<header className="App-header">
7-
<img src="Octocat.png" className="App-logo" alt="logo" />
8-
<p>
9-
GitHub Codespaces <span className="heart">♥️</span> React
10-
</p>
11-
<p className="small">
12-
Edit <code>src/App.jsx</code> and save to reload.
13-
</p>
14-
<p>
15-
<a
16-
className="App-link"
17-
href="https://reactjs.org"
18-
target="_blank"
19-
rel="noopener noreferrer"
20-
>
21-
Learn React
22-
</a>
23-
</p>
12+
<h1>Universal Security Analyzer</h1>
13+
<nav>
14+
<button onClick={() => setView('scam')}>Scam Analyzer</button>
15+
<button onClick={() => setView('fake-news')}>Fake News Analyzer</button>
16+
</nav>
2417
</header>
18+
<main>
19+
{view === 'scam' && <ScamAnalyzer />}
20+
{view === 'fake-news' && <FakeNewsAnalyzer />}
21+
</main>
2522
</div>
2623
);
2724
}

src/FakeNewsAnalyzer.jsx

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import React, { useState } from 'react';
2+
3+
function FakeNewsAnalyzer() {
4+
const [url, setUrl] = useState('');
5+
const [result, setResult] = useState(null);
6+
const [loading, setLoading] = useState(false);
7+
8+
const handleAnalyze = () => {
9+
setLoading(true);
10+
fetch('/analyze/fake-news', {
11+
method: 'POST',
12+
headers: {
13+
'Content-Type': 'application/json',
14+
},
15+
body: JSON.stringify({ url }),
16+
})
17+
.then((res) => res.json())
18+
.then((data) => {
19+
setResult(data);
20+
setLoading(false);
21+
})
22+
.catch((error) => {
23+
console.error('Error:', error);
24+
setLoading(false);
25+
});
26+
};
27+
28+
return (
29+
<div>
30+
<h2>Fake News Analyzer</h2>
31+
<input
32+
type="text"
33+
value={url}
34+
onChange={(e) => setUrl(e.target.value)}
35+
placeholder="Enter a news URL to analyze..."
36+
size="50"
37+
/>
38+
<br />
39+
<button onClick={handleAnalyze} disabled={loading}>
40+
{loading ? 'Analyzing...' : 'Analyze'}
41+
</button>
42+
{result && (
43+
<div>
44+
<h3>Analysis Results</h3>
45+
{result.error ? (
46+
<p>Error: {result.error}</p>
47+
) : (
48+
<>
49+
<p>Score: {result.score}</p>
50+
<h4>Indicators Found:</h4>
51+
<ul>
52+
{result.indicators_found.map((indicator, index) => (
53+
<li key={index}>{indicator}</li>
54+
))}
55+
</ul>
56+
{result.named_entities && (
57+
<>
58+
<h4>Named Entities Found:</h4>
59+
<h5>Organizations:</h5>
60+
<ul>
61+
{result.named_entities.organizations.map((org, index) => (
62+
<li key={index}>{org}</li>
63+
))}
64+
</ul>
65+
<h5>Persons:</h5>
66+
<ul>
67+
{result.named_entities.persons.map((person, index) => (
68+
<li key={index}>{person}</li>
69+
))}
70+
</ul>
71+
</>
72+
)}
73+
</>
74+
)}
75+
</div>
76+
)}
77+
</div>
78+
);
79+
}
80+
81+
export default FakeNewsAnalyzer;

src/ScamAnalyzer.jsx

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import React, { useState } from 'react';
2+
3+
function ScamAnalyzer() {
4+
const [text, setText] = useState('');
5+
const [result, setResult] = useState(null);
6+
const [loading, setLoading] = useState(false);
7+
8+
const handleAnalyze = () => {
9+
setLoading(true);
10+
fetch('/analyze/scam', {
11+
method: 'POST',
12+
headers: {
13+
'Content-Type': 'application/json',
14+
},
15+
body: JSON.stringify({ text }),
16+
})
17+
.then((res) => res.json())
18+
.then((data) => {
19+
setResult(data);
20+
setLoading(false);
21+
})
22+
.catch((error) => {
23+
console.error('Error:', error);
24+
setLoading(false);
25+
});
26+
};
27+
28+
return (
29+
<div>
30+
<h2>Scam Analyzer</h2>
31+
<textarea
32+
rows="10"
33+
cols="50"
34+
value={text}
35+
onChange={(e) => setText(e.target.value)}
36+
placeholder="Paste a message to analyze for scams..."
37+
/>
38+
<br />
39+
<button onClick={handleAnalyze} disabled={loading}>
40+
{loading ? 'Analyzing...' : 'Analyze'}
41+
</button>
42+
{result && (
43+
<div>
44+
<h3>Analysis Results</h3>
45+
<p>Score: {result.score}</p>
46+
<h4>Indicators Found:</h4>
47+
<ul>
48+
{result.indicators_found.map((indicator, index) => (
49+
<li key={index}>{indicator}</li>
50+
))}
51+
</ul>
52+
</div>
53+
)}
54+
</div>
55+
);
56+
}
57+
58+
export default ScamAnalyzer;

text_message_analyzer/app.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,36 @@
11
from flask import Flask, request, jsonify
2+
from social_media_analyzer import scam_detector, fake_news_detector
3+
import os
24

35
app = Flask(__name__)
46

5-
@app.route("/")
6-
def hello():
7-
return "Hello, World!"
7+
def get_api_key():
8+
"""Gets the Google API key from environment variables."""
9+
return os.environ.get("GOOGLE_API_KEY")
810

9-
@app.route('/analyze', methods=['POST'])
10-
def analyze():
11+
@app.route('/analyze/scam', methods=['POST'])
12+
def analyze_scam():
1113
data = request.get_json()
1214
if not data or 'text' not in data:
13-
return jsonify({'error': 'Invalid input, "text" field is required.'}), 400
15+
return jsonify({"error": "Missing 'text' in request body"}), 400
1416

1517
text_to_analyze = data['text']
18+
api_key = get_api_key()
1619

17-
# Placeholder analysis logic
18-
is_suspicious = 'phishing' in text_to_analyze.lower()
20+
result = scam_detector.analyze_text_for_scams(text_to_analyze, api_key=api_key)
21+
return jsonify(result)
22+
23+
@app.route('/analyze/fake-news', methods=['POST'])
24+
def analyze_fake_news():
25+
data = request.get_json()
26+
if not data or 'url' not in data:
27+
return jsonify({"error": "Missing 'url' in request body"}), 400
28+
29+
url_to_analyze = data['url']
30+
31+
result = fake_news_detector.analyze_url_for_fake_news(url_to_analyze)
32+
return jsonify(result)
1933

20-
return jsonify({
21-
'text': text_to_analyze,
22-
'analysis': {
23-
'is_suspicious': is_suspicious
24-
}
25-
})
2634

27-
if __name__ == "__main__":
28-
app.run(host="0.0.0.0", port=8080)
35+
if __name__ == '__main__':
36+
app.run(debug=True)

0 commit comments

Comments
 (0)