16
16
# In[2]:
17
17
18
18
19
- data = pd .read_csv ('naukri_com-jobs__20190701_20190830__30k_data.csv' )
19
+ data = pd .read_csv ('naukri_com-jobs__20190701_20190830__30k_data.csv' )
20
20
21
21
22
22
# In[3]:
28
28
# In[4]:
29
29
30
30
31
- user_profiles = data [['Uniq Id' , 'Role Category' , 'Location' , 'Job Experience Required' , 'Key Skills' ]]
32
- job_postings = data [['Uniq Id' , 'Role Category' , 'Location' , 'Job Experience Required' , 'Key Skills' ]]
31
+ user_profiles = data [['Uniq Id' , 'Role Category' ,
32
+ 'Location' , 'Job Experience Required' , 'Key Skills' ]]
33
+ job_postings = data [['Uniq Id' , 'Role Category' ,
34
+ 'Location' , 'Job Experience Required' , 'Key Skills' ]]
33
35
34
36
35
37
# ### User Profile
48
50
49
51
user_profiles_matrix = pd .get_dummies (user_profiles .drop ('Uniq Id' , axis = 1 ))
50
52
user_profiles_matrix = normalize (user_profiles_matrix ) # Normalize the matrix
51
- similarity_matrix = cosine_similarity (user_profiles_matrix , user_profiles_matrix )
53
+ similarity_matrix = cosine_similarity (
54
+ user_profiles_matrix , user_profiles_matrix )
52
55
53
56
54
57
# ### Job recommendation
59
62
# Define the number of nearest neighbors to consider
60
63
k = 5
61
64
65
+
62
66
def get_job_recommendations (user_id ):
63
67
user_index = user_profiles [user_profiles ['Uniq Id' ] == user_id ].index [0 ]
64
- similar_users = similarity_matrix [user_index ].argsort ()[::- 1 ][1 :k + 1 ] # Exclude the user itself
68
+ similar_users = similarity_matrix [user_index ].argsort (
69
+ )[::- 1 ][1 :k + 1 ] # Exclude the user itself
65
70
66
71
# Get job postings from similar users
67
72
recommended_roles = []
68
73
for user in similar_users :
69
74
similar_user_id = user_profiles .iloc [user ]['Uniq Id' ]
70
- similar_user_roles = data [data ['Uniq Id' ] == similar_user_id ]['Role Category' ].values
75
+ similar_user_roles = data [data ['Uniq Id' ] ==
76
+ similar_user_id ]['Role Category' ].values
71
77
recommended_roles .extend (similar_user_roles )
72
78
73
79
# Filter out already interacted job roles
74
- user_interacted_roles = data [data ['Uniq Id' ] == user_id ]['Role Category' ].values
75
- recommended_roles = list (set (recommended_roles ) - set (user_interacted_roles ))
80
+ user_interacted_roles = data [data ['Uniq Id' ]
81
+ == user_id ]['Role Category' ].values
82
+ recommended_roles = list (set (recommended_roles ) -
83
+ set (user_interacted_roles ))
76
84
77
85
# Rank recommended roles based on frequency
78
- recommended_roles = pd .Series (recommended_roles ).value_counts ().sort_values (ascending = False )
86
+ recommended_roles = pd .Series (
87
+ recommended_roles ).value_counts ().sort_values (ascending = False )
79
88
80
89
return recommended_roles .index .tolist ()
81
90
91
+
82
92
# Example usage
83
93
user_id = '9be62c49a0b7ebe982a4af1edaa7bc5f'
84
94
recommended_roles = get_job_recommendations (user_id )
@@ -90,11 +100,4 @@ def get_job_recommendations(user_id):
90
100
# In[ ]:
91
101
92
102
93
-
94
-
95
-
96
103
# In[ ]:
97
-
98
-
99
-
100
-
0 commit comments