-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_sentiment_classifier.py
More file actions
77 lines (63 loc) · 2.91 KB
/
test_sentiment_classifier.py
File metadata and controls
77 lines (63 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
"""Test sentiment classifier predictions"""
import pandas as pd
import joblib
import sys
# Load the classifier
print("Loading sentiment classifier...")
classifier_path = "models/predictive/sentiment_classifier.joblib"
classifier_data = joblib.load(classifier_path)
print("\nClassifier details:")
print(f" Model type: {type(classifier_data['model'])}")
print(f" Classes: {classifier_data['label_encoder'].classes_}")
print(f" Feature names: {len(classifier_data['feature_names'])} features")
print(f" Class distribution during training: {classifier_data.get('class_distribution', 'N/A')}")
# Load articles and prepare features
print("\nLoading articles...")
articles_df = pd.read_csv("data/processed/articles_with_sentiment.csv")
print(f" Total articles: {len(articles_df)}")
# Import the classifier class
from models.predictive.sentiment_classifier import SentimentClassifier
print("\nPreparing daily features...")
classifier = SentimentClassifier.load(classifier_path)
daily_df = classifier.prepare_features_from_articles(articles_df)
print(f" Daily data: {len(daily_df)} days")
# Show last few days of actual data
print("\nLast 5 days actual sentiment:")
for _, row in daily_df.tail(5).iterrows():
date = row['date'].strftime('%Y-%m-%d')
dominant = row['dominant_sentiment']
pos_pct = row['positive_pct']
neu_pct = row['neutral_pct']
neg_pct = row['negative_pct']
score = row['sentiment_score']
print(f" {date}: {dominant:8s} (score={score:+.3f}) P={pos_pct:.0%} N={neu_pct:.0%} Neg={neg_pct:.0%}")
# Make predictions
print("\nPredicting next 7 days...")
predictions = classifier.predict_next_days(daily_df, days=7)
print("\nPredictions:")
for pred in predictions:
date_str = pred['date'].strftime('%Y-%m-%d (%A)')
sentiment = pred['predicted_sentiment']
conf = pred['confidence']
probs = pred['probabilities']
emoji = {'positive': '😊', 'neutral': '😐', 'negative': '😞'}[sentiment]
print(f"\n{date_str}:")
print(f" {emoji} {sentiment.upper()} ({conf:.0%} confidence)")
print(f" Probs: P={probs.get('positive',0):.0%} | N={probs.get('neutral',0):.0%} | Neg={probs.get('negative',0):.0%}")
print("\n" + "="*60)
print("Analysis:")
print("="*60)
# Count predicted classes
from collections import Counter
pred_classes = [p['predicted_sentiment'] for p in predictions]
pred_counts = Counter(pred_classes)
print(f"Predicted class distribution: {dict(pred_counts)}")
# Check if model is just predicting majority class
majority_class = max(pred_counts, key=pred_counts.get)
if pred_counts[majority_class] == len(predictions):
print(f"\n⚠️ WARNING: Model predicts only '{majority_class}' - possible issues:")
print(" 1. Model may be overfitting to majority class")
print(" 2. Features may not have enough variation")
print(" 3. Class imbalance during training")
print(f" 4. Training class distribution was: {classifier.class_distribution}")