-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_incoming.py
More file actions
37 lines (28 loc) · 1.14 KB
/
process_incoming.py
File metadata and controls
37 lines (28 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import joblib
import requests
def create_embedding(text_list):
# https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings
r = requests.post("http://localhost:11434/api/embed", json={
"model": "bge-m3",
"input": text_list
})
embedding = r.json()["embeddings"]
return embedding
df = joblib.load('embeddings.joblib')
incoming_query = input("Ask a Question: ")
question_embedding = create_embedding([incoming_query])[0]
# Find similarities of question_embedding with other embeddings
# print(np.vstack(df['embedding'].values))
# print(np.vstack(df['embedding']).shape)
similarities = cosine_similarity(np.vstack(df['embedding']), [question_embedding]).flatten()
# print(similarities)
top_results = 30
max_indx = similarities.argsort()[::-1][0:top_results]
# print(max_indx)
new_df = df.loc[max_indx]
# print(new_df[["title", "number", "text"]])
for index, item in new_df.iterrows():
print(index, item["title"], item["number"], item["text"], item["start"], item["end"])