-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCollaborativeFiltering.py
More file actions
38 lines (34 loc) · 1.97 KB
/
CollaborativeFiltering.py
File metadata and controls
38 lines (34 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
class CollaborativeFiltering:
def __init__(self, user_behavior, games):
self.user_behavior = user_behavior
self.games = games
self.model = SVD()
self.user_encoder = LabelEncoder()
self.game_encoder = LabelEncoder()
self.df_long = None
self.trainset = None
self._prepare_data()
self._train_model()
def _prepare_data(self):
self.user_behavior = self.user_behavior.pivot_table(index=["user-id", "game_title"], columns="action", values="value", fill_value=0).reset_index()
merged_data = self.user_behavior.merge(self.games, left_on="game_title", right_on="name", how="left")
merged_data.dropna(subset=["appid", "name"], inplace=True)
merged_data["user_id_encoded"] = self.user_encoder.fit_transform(merged_data["user-id"])
merged_data["game_id_encoded"] = self.game_encoder.fit_transform(merged_data["game_title"])
self.df_long = merged_data[["user_id_encoded", "game_id_encoded", "play"]]
def _train_model(self):
reader = Reader(rating_scale=(self.df_long["play"].min(), self.df_long["play"].max()))
data = Dataset.load_from_df(self.df_long, reader)
self.trainset, _ = train_test_split(data, test_size=0.2)
self.model.fit(self.trainset)
def recommend_games(self, user_id, top_n=5):
user_games = self.df_long[self.df_long["user_id_encoded"] == user_id]["game_id_encoded"].unique()
all_games = self.df_long["game_id_encoded"].unique()
games_to_predict = [game for game in all_games if game not in user_games]
predictions = [self.model.predict(user_id, game) for game in games_to_predict]
predictions.sort(key=lambda x: x.est, reverse=True)
top_games = [pred.iid for pred in predictions[:top_n]]
return self.game_encoder.inverse_transform(top_games)