-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathContentBasedFiltering.py
More file actions
29 lines (26 loc) · 1.49 KB
/
ContentBasedFiltering.py
File metadata and controls
29 lines (26 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer, normalize
import faiss
class ContentBasedFiltering:
def __init__(self, games):
self.games = games
self.index = None
self._prepare_data()
def _prepare_data(self):
mlb_genres = MultiLabelBinarizer()
mlb_categories = MultiLabelBinarizer()
self.games["genres_list"] = self.games["genres"].apply(lambda x: x.split(";") if isinstance(x, str) else [])
genres_encoded = mlb_genres.fit_transform(self.games["genres"])
categories_encoded = mlb_categories.fit_transform(self.games["categories"])
self.games["developer_encoded"] = self.games["developer"].astype("category").cat.codes
self.games["publisher_encoded"] = self.games["publisher"].astype("category").cat.codes
game_features = np.hstack([genres_encoded, categories_encoded, self.games[["developer_encoded", "publisher_encoded"]].values])
game_features = game_features.astype(np.float32)
normalize(game_features, norm="l2")
self.index = faiss.IndexFlatIP(game_features.shape[1])
faiss.normalize_L2(game_features)
self.index.add(game_features)
def find_similar_games(self, game_name, top_n=5):
game_idx = self.games.index[self.games["name"] == game_name].tolist()[0]
distances, indices = self.index.search(self.index.reconstruct(game_idx).reshape(1, -1), top_n + 1)
return self.games.iloc[indices[0][1:]][["name", "appid"]].values.tolist()