Skip to content

Commit 4e2c4a2

Browse files
committed
Used for making dummy text for example #1333
1 parent 15ee937 commit 4e2c4a2

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

NLP/dummysentence.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
from sentence_transformers import SentenceTransformer, util
3+
4+
MODEL_NAME = 'all-MiniLM-L6-v2'
5+
MODEL_FOLDER = 'model'
6+
7+
def load_file(file_path):
8+
with open(file_path, 'r', encoding='utf-8') as file:
9+
return [line.strip() for line in file if line.strip()]
10+
11+
def load_or_download_model():
12+
model_path = os.path.join(MODEL_FOLDER, MODEL_NAME)
13+
if os.path.exists(model_path):
14+
print(f"Loading model from {model_path}")
15+
return SentenceTransformer(model_path)
16+
else:
17+
print(f"Downloading model {MODEL_NAME}")
18+
model = SentenceTransformer(MODEL_NAME)
19+
os.makedirs(MODEL_FOLDER, exist_ok=True)
20+
model.save(model_path)
21+
print(f"Model saved to {model_path}")
22+
return model
23+
24+
def find_similar_sentences(query, file_path, top_n=5):
25+
# Load the pre-trained model
26+
model = load_or_download_model()
27+
28+
# Load and encode the sentences from the file
29+
sentences = load_file(file_path)
30+
sentence_embeddings = model.encode(sentences)
31+
32+
# Encode the query
33+
query_embedding = model.encode([query])
34+
35+
# Calculate cosine similarities
36+
cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
37+
38+
# Get top N results
39+
top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n]
40+
41+
return top_results
42+
43+
def main():
44+
print("Welcome to the Sentence Similarity Search Tool!")
45+
46+
# Get user input for query
47+
query = input("Enter your query: ")
48+
49+
# Get user input for file path
50+
file_name = input("Enter the name of your text file (without .txt extension): ")
51+
file_path = f"{file_name}.txt"
52+
53+
try:
54+
results = find_similar_sentences(query, file_path)
55+
56+
print(f"\nTop 5 similar sentences for query: '{query}'\n")
57+
for sentence, score in results:
58+
print(f"Similarity: {score:.4f}")
59+
print(f"Sentence: {sentence}\n")
60+
except FileNotFoundError:
61+
print(f"Error: The file '{file_path}' was not found. Please check the file name and try again.")
62+
except Exception as e:
63+
print(f"An error occurred: {str(e)}")
64+
65+
if __name__ == "__main__":
66+
main()

0 commit comments

Comments
 (0)