-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsidequest_analysis.py
More file actions
114 lines (86 loc) · 3.51 KB
/
sidequest_analysis.py
File metadata and controls
114 lines (86 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
from collections import Counter
file_path = 'sidequest_review_scrape.csv'
df = pd.read_csv(file_path)
#Extract ratings
def extract_ratings(df):
ratings = {}
for _, row in df.iterrows():
url, review = row['url'], row['review']
if "Rating :" in review:
try:
rating = float(review.split(":")[1].strip().split()[0])
ratings[url] = rating
except ValueError:
ratings[url] = None
elif url not in ratings:
ratings[url] = None
return ratings
#Sentiment Analysis
def analyze_sentiment(text):
blob = TextBlob(text)
polarity = blob.sentiment.polarity
if polarity > 0:
return 'Positive'
elif polarity < 0:
return 'Negative'
else:
return 'Neutral'
def extract_sentiments(df):
sentiments = {}
for url, group in df.groupby('url'):
game_reviews = group['review'].tolist()[2:] # Skip title and rating
sentiments[url] = [analyze_sentiment(review) for review in game_reviews if "Rating :" not in review]
return sentiments
#Plot Ratings onto a Graph
def plot_ratings(ratings):
filtered_ratings = {url: rating for url, rating in ratings.items() if rating is not None}
unrated_count = len(ratings) - len(filtered_ratings)
print(f"Number of unrated games: {unrated_count}")
rating_values = list(filtered_ratings.values())
rating_counts = Counter(rating_values)
ratings = list(rating_counts.keys())
counts = list(rating_counts.values())
plt.figure(figsize=(8, 5))
plt.scatter(ratings, counts, color='skyblue', alpha=0.7, edgecolors="w", s=100)
plt.xlabel("Rating (Stars)")
plt.ylabel("Number of Games")
plt.title("Distribution of Game Ratings on Sidequest")
plt.grid(axis='y', linestyle='--', alpha=0.7)
max_y = max(counts) if counts else 0
plt.yticks(range(0, max_y + 2, 2))
plt.show()
def print_sentiment_counts(sentiments):
all_sentiments = [sentiment for sentiment_list in sentiments.values() for sentiment in sentiment_list]
sentiment_counts = Counter(all_sentiments)
print(f"Positive: {sentiment_counts.get('Positive', 0)}")
print(f"Negative: {sentiment_counts.get('Negative', 0)}")
print(f"Neutral: {sentiment_counts.get('Neutral', 0)}")
# Add sentiment, title, or rating as a new column to the csv
def add_sentiment_column(df):
def label_review_type(row, is_first_row):
review = row['review']
if is_first_row:
return "Game Title"
elif "Rating :" in review:
try:
rating = float(review.split(":")[1].strip().split()[0])
return f"{rating} stars"
except ValueError:
return "Rating not parsed"
else:
return analyze_sentiment(review)
df['Review Type'] = [
label_review_type(row, (i == 0 or df.at[i - 1, 'url'] != row['url']))
for i, row in df.iterrows()
]
output_file_path = 'sidequest_review_with_sentiment.csv'
df.to_csv(output_file_path, index=False)
print(f"New CSV created with added sentiment information at: {output_file_path}")
ratings = extract_ratings(df)
sentiments = extract_sentiments(df)
plot_ratings(ratings)
print_sentiment_counts(sentiments)
add_sentiment_column(df)