Skip to content

Commit 542e0a5

Browse files
committed
refactored
1 parent dfe5b53 commit 542e0a5

File tree

2 files changed

+30
-12
lines changed

2 files changed

+30
-12
lines changed

db_manager.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,32 @@ def get_similarity_search_with_average_score(db_dir: str, doc_id: str, terms: st
1919
average_score = scores_series.mean()
2020
return average_score
2121

22-
if __name__ == "__main__":
23-
if len(sys.argv) != 4:
24-
print("USAGE: " + sys.argv[0] + " <db_dir> <doclist> <terms>")
25-
sys.exit(1)
26-
27-
db_dir=sys.argv[1]
28-
doclist=sys.argv[2]
29-
terms=sys.argv[3]
22+
def get_similarity_search_results(doclist: str, db_dir: str, terms: str, top_k: int):
3023
with open(doclist, "r") as file:
3124
lines = file.readlines()
3225
total_list = [line.strip() for line in lines]
3326

3427
scores = []
3528
entries = []
3629
for entry in total_list:
37-
score = get_similarity_search_with_average_score(db_dir, entry, terms, 10)
30+
score = get_similarity_search_with_average_score(db_dir, entry, terms, top_k)
3831
scores.append(score)
3932
entries.append(entry)
4033

41-
df = pd.DataFrame({'entry': entries, 'score': scores})
42-
top_5 = df.sort_values(by='score', ascending=True).head(5)
43-
print(top_5)
34+
df = pd.DataFrame({'title': entries, 'score': scores})
35+
top = df.sort_values(by='score', ascending=True).head(top_k)['title'].tolist()
36+
return top
37+
38+
39+
if __name__ == "__main__":
40+
if len(sys.argv) != 5:
41+
print("USAGE: " + sys.argv[0] + " <db_dir> <doclist> <terms> <num>")
42+
sys.exit(1)
43+
44+
db_dir=sys.argv[1]
45+
doclist=sys.argv[2]
46+
terms=sys.argv[3]
47+
num=int(sys.argv[4])
48+
49+
docs = get_similarity_search_results(doclist, db_dir, terms, num)
50+
print(docs)

tools/get_terms.bash

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
3+
if [ $# -ne 1 ]
4+
then
5+
echo "Usage: $0 <reflection path>"
6+
exit 1
7+
fi
8+
9+
reflection_path=${1}
10+
11+
grep Term ${reflection_path} | awk -F: '{print $2}' | sed 's/\"//g'| sed ':a;N;$!ba;s/\n//g'

0 commit comments

Comments
 (0)