Skip to content

Commit 39d94c9

Browse files
Create santhipriya
1 parent e8d6fe2 commit 39d94c9

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed

santhipriya

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import pandas as pd
2+
from sklearn.feature_extraction.text import TfidfVectorizer
3+
from sklearn.metrics.pairwise import cosine_similarity
4+
5+
# Sample movie dataset
6+
movies = pd.DataFrame({
7+
'movieId': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
8+
'title': ['Inception', 'Interstellar', 'The Dark Knight', 'Parasite', 'Joker',
9+
'The Shawshank Redemption', 'The Godfather', 'Pulp Fiction', 'Forrest Gump', 'The Lord of the Rings: The Return of the King'],
10+
'genre': ['Sci-Fi', 'Sci-Fi', 'Action', 'Thriller', 'Drama',
11+
'Drama', 'Crime', 'Crime', 'Drama', 'Fantasy']
12+
})
13+
14+
# Sample user ratings
15+
ratings = pd.DataFrame({
16+
'userId': [1, 2, 1, 2, 3, 1, 3, 2, 1, 3, 2, 3],
17+
'movieId': [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3], # Added more ratings for demonstration
18+
'rating': [5, 4, 4, 5, 3, 5, 4, 5, 4, 3, 4, 5]
19+
})
20+
21+
# 1. Collaborative Filtering (Matrix Factorization)
22+
reader = Reader(rating_scale=(1, 5))
23+
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
24+
trainset, testset = train_test_split(data, test_size=0.25) # Increased test size for better evaluation
25+
26+
model = SVD()
27+
model.fit(trainset)
28+
29+
# Evaluate the model (optional)
30+
predictions = model.test(testset)
31+
print(f"Collaborative Filtering RMSE: {accuracy.rmse(predictions)}")
32+
33+
34+
# 2. Content-Based Filtering
35+
tfidf = TfidfVectorizer(stop_words="english")
36+
movie_matrix = tfidf.fit_transform(movies["genre"])
37+
cosine_sim = cosine_similarity(movie_matrix)
38+
39+
40+
# 3. Hybrid Recommendation System
41+
def get_recommendations(user_id, top_n=5):
42+
"""
43+
Gets movie recommendations for a user using a hybrid approach.
44+
45+
Args:
46+
user_id (int): The ID of the user.
47+
top_n (int, optional): The number of recommendations to return. Defaults to 5.
48+
49+
Returns:
50+
list: A list of movie titles recommended for the user.
51+
"""
52+
53+
# 1. Get Collaborative Filtering Recommendations
54+
# Get a list of movies the user has not rated
55+
unrated_movies = movies[~movies['movieId'].isin(ratings[ratings['userId'] == user_id]['movieId'])]['movieId']
56+
57+
# Predict ratings for unrated movies
58+
predicted_ratings = [model.predict(user_id, movie_id).est for movie_id in unrated_movies]
59+
60+
# Create a DataFrame with predicted ratings
61+
cf_recs = pd.DataFrame({'movieId': unrated_movies, 'predicted_rating': predicted_ratings})
62+
cf_recs = cf_recs.sort_values(by=['predicted_rating'], ascending=False).head(top_n)
63+
64+
# 2. Get Content-Based Recommendations (based on user's highest-rated movie)
65+
# Find the user's highest-rated movie
66+
user_ratings = ratings[ratings['userId'] == user_id]
67+
highest_rated_movie_id = user_ratings.loc[user_ratings['rating'].idxmax(), 'movieId']
68+
highest_rated_movie_title = movies[movies['movieId'] == highest_rated_movie_id]['title'].values[0]
69+
70+
# Get content-based recommendations for the highest-rated movie
71+
cb_recs_titles = recommend_content(highest_rated_movie_title)
72+
cb_recs = movies[movies['title'].isin(cb_recs_titles)]
73+
74+
# 3. Combine and Rank Recommendations
75+
# Combine recommendations from both methods
76+
all_recs = pd.concat([cf_recs, cb_recs]).drop_duplicates(subset=['movieId'])
77+
78+
# Rank recommendations by predicted rating (CF) or similarity (CB)
79+
# You can adjust the ranking logic based on your preferences
80+
# Here, we prioritize CF recommendations
81+
final_recs = all_recs.sort_values(by=['predicted_rating'], ascending=False).head(top_n)
82+
83+
return final_recs['title'].tolist()
84+
85+
86+
# Content-Based Recommendation Function (unchanged)
87+
def recommend_content(movie_title):
88+
index = movies[movies["title"] == movie_title].index[0]
89+
similar_movies = list(enumerate(cosine_sim[index]))
90+
sorted_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:4] # Get top 3 similar movies
91+
return [movies.iloc[i[0]]["title"] for i in sorted_movies]
92+
93+
94+
# Example Usage
95+
user_id = 1
96+
recommendations = get_recommendations(user_id, top_n=5)
97+
print(f"Movie recommendations for user {user_id}: {recommendations}")

0 commit comments

Comments
 (0)