-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdata_structure.py
More file actions
113 lines (79 loc) · 1.99 KB
/
data_structure.py
File metadata and controls
113 lines (79 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from collections import defaultdict
import math
import sys
import numpy as np
class Paper:
def Paper(self):
self.title = ""
self.date = 0
self.abstract = ""
class Professor:
def Professor(self):
self.name = ""
self.homepage_url = ""
self.university = ""
self.field = []
prof_to_info_map = {}
prof_to_paperids_map = {}
profs_to_score_map = {}
paperid_to_vector_map = {}
paperid_to_words_of_title = {}
inverted_index = {}
docLen = {}
def getData():
#fron test;
def preprocesss(doc_str):
#output: list of tokens
pass
def inverted_index(doc):
pass
def construct_vector_space_model(doc_str):
#output: word_weight_vector
pass
def get_score_for_paper(query, paperid):
#output: score for paperid
paperVector = paperid_to_vector_map[paperid]
titleSet = paperid_to_words_of_title[paperid]
query = preprocesss(query)
queryLen = float(0)
queryDict = defaultdict(lambda: 0)
# recorded the tf and max tf
maxTf_ = 1
lenQuery = len(query)
for i in range(lenQuery):
word = query[i]
if(word in titleSet):
query.append(word)
# for normalization::
newQuery = ' '.join(query)
vecQuery = construct_vector_space_model(newQuery)
queryVector = np.array(vecQuery)
paperVector = np.array(paperVector)
return np.dot(queryVector, paperVector)
def handle_query(query):
query = ""
university = ""
field = ""
get_score_for_profs(query)
rankedProfs = list({k: v for k, v in sorted(profs_to_score_map.items(), key=lambda x: x[1], reverse= True)}.items())
count = 0
for prof, score in rankedProfs:
if prof_in_constraints(prof, university, field):
count += 1
printProf(prof)
if count == 10:
break
def printProf(profName):
# f = open(profName, 'r')
print(profName)
profInfo = prof_to_info_map[profName]
print(profInfo.home)
# f.close()
def main():
getData()
# inverted_index()
# construct_vector_space_model()
query = ""
while(query != ""):
handle_query(query)
pass