Skip to content

Commit 64ab802

Browse files
fixed zero skills card error, fixed encoding error while file read write, updated models
1 parent 315693a commit 64ab802

File tree

5 files changed

+450
-44
lines changed

5 files changed

+450
-44
lines changed

Backend/KNN_user_domains.pkl

-21 KB
Binary file not shown.

Backend/KNN_user_skills.pkl

-5.8 KB
Binary file not shown.

Backend/pipelining.py

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from sklearn.neighbors import NearestNeighbors
66
import joblib
77

8+
89
def get_skills_n_domains(skill_domain_dict):
910
json.dump(skill_domain_dict, open('skill_domain_dict.json', 'w'))
1011
skills = set()
@@ -28,19 +29,19 @@ def get_skills_n_domains(skill_domain_dict):
2829
for skill in skills:
2930
oneHotSkillDomainList = []
3031
for domain in domains:
31-
if(skill in skill_domain_map[domain]):
32-
oneHotSkillDomainList.append(1)
33-
else:
34-
oneHotSkillDomainList.append(0)
35-
oneHotSkillDomainList.insert(0,skill)
32+
if(skill in skill_domain_map[domain]):
33+
oneHotSkillDomainList.append(1)
34+
else:
35+
oneHotSkillDomainList.append(0)
36+
oneHotSkillDomainList.insert(0, skill)
3637
SkillDomains.append(oneHotSkillDomainList)
3738
# print(SkillDomains)
38-
39+
3940
columns = []
4041
columns.extend(domains)
41-
columns.insert(0,'SkillName')
42-
df_skill_n_domains = pd.DataFrame(SkillDomains,columns=columns)
43-
df_skill_n_domains.to_csv('skill_n_domain.csv',index=False)
42+
columns.insert(0, 'SkillName')
43+
df_skill_n_domains = pd.DataFrame(SkillDomains, columns=columns)
44+
df_skill_n_domains.to_csv('skill_n_domain.csv', index=False)
4445

4546
SkillDomainsOneHot = []
4647
for skill in skills:
@@ -53,45 +54,50 @@ def get_skills_n_domains(skill_domain_dict):
5354
SkillDomainsOneHot.append(onehot)
5455
# print(SkillDomainsOneHot)
5556

56-
5757
return skills, SkillDomainsOneHot
5858

59+
5960
def get_user_names(user_skill_dict):
6061
UserNames = []
61-
for username,skill_list in user_skill_dict.items():
62+
for username, skill_list in user_skill_dict.items():
6263
UserNames.append(username)
6364
return UserNames
6465

66+
6567
def create_models(UserSkills, UserDomains):
6668
user_skill_features = csr_matrix(UserSkills)
6769
user_domain_features = csr_matrix(UserDomains)
6870

69-
skill_based_model = NearestNeighbors(metric='cosine',n_neighbors=10, n_jobs=-1)
70-
domain_based_model = NearestNeighbors(metric='cosine',n_neighbors=10, n_jobs=-1)
71+
skill_based_model = NearestNeighbors(
72+
metric='cosine', n_neighbors=10, n_jobs=-1)
73+
domain_based_model = NearestNeighbors(
74+
metric='cosine', n_neighbors=10, n_jobs=-1)
7175

7276
skill_based_model.fit(user_skill_features)
7377
domain_based_model.fit(user_domain_features)
7478

75-
joblib.dump(skill_based_model,'KNN_user_skills.pkl')
76-
joblib.dump(domain_based_model,'KNN_user_domains.pkl')
79+
joblib.dump(skill_based_model, 'KNN_user_skills.pkl')
80+
joblib.dump(domain_based_model, 'KNN_user_domains.pkl')
7781

7882

7983
def save_usernames_insequence(usernames):
80-
with open('usernames.txt', 'w') as f:
84+
with open('usernames.txt', 'w', encoding="utf-8") as f:
8185
for username in usernames:
8286
f.write(username + '\n')
8387

88+
8489
def read_usernames_insequence():
85-
with open('usernames.txt', 'r') as f:
86-
usernames = list(map(lambda x:x.strip('\n'),f.readlines()))
90+
with open('usernames.txt', 'r', encoding="utf-8") as f:
91+
usernames = list(map(lambda x: x.strip('\n'), f.readlines()))
8792
return usernames
8893

94+
8995
def user_data_matrix(user_skill_dict, Allskills):
90-
# user_skill_dict = sorted(user_skill_dict)
96+
# user_skill_dict = sorted(user_skill_dict)
9197
UserSkills = []
9298
UserNames = []
93-
for username,skill_list in user_skill_dict.items():
94-
if(username!=None):
99+
for username, skill_list in user_skill_dict.items():
100+
if(username != None):
95101
UserNames.append(username.lower())
96102
oneHotSkillList = []
97103
for skill in Allskills:
@@ -100,16 +106,18 @@ def user_data_matrix(user_skill_dict, Allskills):
100106
else:
101107
oneHotSkillList.append(0)
102108
UserSkills.append(oneHotSkillList)
103-
#write usernames in same sequence to text file and read also from text file
109+
# write usernames in same sequence to text file and read also from text file
104110
save_usernames_insequence(UserNames)
105111
return UserSkills
106112

113+
107114
def weights(UserSkills, SkillDomains):
108-
UserSkills = np.array(UserSkills,dtype=np.float64)
109-
SkillDomains = np.array(SkillDomains,dtype=np.float64)
115+
UserSkills = np.array(UserSkills, dtype=np.float64)
116+
SkillDomains = np.array(SkillDomains, dtype=np.float64)
110117
UserDomains = np.dot(UserSkills, SkillDomains)
111118
return UserDomains
112119

120+
113121
def get_target_user_data(target_user_skills):
114122
target_skills = []
115123
df_skill_n_domain = pd.read_csv('skill_n_domain.csv')
@@ -119,18 +127,22 @@ def get_target_user_data(target_user_skills):
119127
target_skills.append(1)
120128
else:
121129
target_skills.append(0)
122-
target_skills = np.array(target_skills,dtype=np.float64)
123-
SkillDomains = np.array(df_skill_n_domain.iloc[:,1:].values,dtype=np.float64)
130+
target_skills = np.array(target_skills, dtype=np.float64)
131+
SkillDomains = np.array(
132+
df_skill_n_domain.iloc[:, 1:].values, dtype=np.float64)
124133
target_domains = np.dot(target_skills, SkillDomains)
125134
return target_skills, target_domains
126135

136+
127137
def recommendUsers(target_user_skills, target_user_domains, UserNames):
128138
skill_based_model = joblib.load('KNN_user_skills.pkl', mmap_mode='r')
129139
domain_based_model = joblib.load('KNN_user_domains.pkl', mmap_mode='r')
130140

131-
#TODO: See this return distances parameter and how to access these distances
132-
skills_based_similar_user_distances, skills_based_similar_users = skill_based_model.kneighbors([target_user_skills],10)
133-
domains_based_similar_user_distances, domains_based_similar_users = domain_based_model.kneighbors([target_user_domains],10)
141+
# TODO: See this return distances parameter and how to access these distances
142+
skills_based_similar_user_distances, skills_based_similar_users = skill_based_model.kneighbors([
143+
target_user_skills], 10)
144+
domains_based_similar_user_distances, domains_based_similar_users = domain_based_model.kneighbors([
145+
target_user_domains], 10)
134146

135147
# skill_based_user_names = []
136148
# domain_based_user_names = []
@@ -140,7 +152,7 @@ def recommendUsers(target_user_skills, target_user_domains, UserNames):
140152
# skill_based_user_names.append(UserNames[usr_indx])
141153
# for usr_indx in domains_based_similar_users[0]:
142154
# domain_based_user_names.append(UserNames[usr_indx])
143-
155+
144156
Suggestions = list()
145157
for usr_indx in skills_based_similar_users[0]:
146158
if(UserNames[usr_indx] not in Suggestions):
@@ -150,24 +162,30 @@ def recommendUsers(target_user_skills, target_user_domains, UserNames):
150162
Suggestions.append(UserNames[usr_indx])
151163
return Suggestions
152164

153-
#=======================================================================================================================
165+
# =======================================================================================================================
154166
# MAIN LOGIC
155-
#=======================================================================================================================
167+
# =======================================================================================================================
168+
169+
156170
def update_models(skill_domain_dict, user_skill_dict):
157171
skills, SkillDomains = get_skills_n_domains(skill_domain_dict)
158-
UserSkills = user_data_matrix(user_skill_dict, skills) #user-skill-data-matrix
172+
UserSkills = user_data_matrix(
173+
user_skill_dict, skills) # user-skill-data-matrix
159174
UserDomains = weights(UserSkills, SkillDomains)
160175
create_models(UserSkills, UserDomains)
161-
#add return statement to see if everything went down properly
176+
# add return statement to see if everything went down properly
177+
162178

163179
def predict(target_user_skills):
164-
"""
165-
input: target_user_skills - dict{"username": [skill1, skill2, ...]}
166-
"""
167-
UserNames = read_usernames_insequence() #[1]
168-
encoded_target_user_skills, encoded_target_user_domains = get_target_user_data(target_user_skills)
169-
suggestions = recommendUsers(encoded_target_user_skills, encoded_target_user_domains, UserNames)
170-
return suggestions
180+
"""
181+
input: target_user_skills - dict{"username": [skill1, skill2, ...]}
182+
"""
183+
UserNames = read_usernames_insequence() # [1]
184+
encoded_target_user_skills, encoded_target_user_domains = get_target_user_data(
185+
target_user_skills)
186+
suggestions = recommendUsers(
187+
encoded_target_user_skills, encoded_target_user_domains, UserNames)
188+
return suggestions
171189

172190

173191
"""

0 commit comments

Comments
 (0)