Skip to content

Commit e05a0bd

Browse files
committed
Now it can filter the sentence #1333
1 parent a6abccd commit e05a0bd

File tree

1 file changed

+48
-0
lines changed

1 file changed

+48
-0
lines changed

NLP/Sentence_Similarity.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from sentence_transformers import SentenceTransformer, util
2+
3+
def load_file(file_path):
4+
with open(file_path, 'r', encoding='utf-8') as file:
5+
return [line.strip() for line in file if line.strip()]
6+
7+
def find_similar_sentences(query, file_path, top_n=5):
8+
# Load the pre-trained model
9+
model = SentenceTransformer('all-MiniLM-L6-v2')
10+
11+
# Load and encode the sentences from the file
12+
sentences = load_file(file_path)
13+
sentence_embeddings = model.encode(sentences)
14+
15+
# Encode the query
16+
query_embedding = model.encode([query])
17+
18+
# Calculate cosine similarities
19+
cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
20+
21+
# Get top N results
22+
top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n]
23+
24+
return top_results
25+
26+
def main():
27+
print("Welcome to the Sentence Similarity Search Tool!")
28+
29+
# Get user input for query
30+
query = input("Enter your query: ")
31+
32+
# Get user input for file path
33+
file_path = input("Enter the path to your text file: ")
34+
35+
try:
36+
results = find_similar_sentences(query, file_path)
37+
38+
print(f"\nTop 5 similar sentences for query: '{query}'\n")
39+
for sentence, score in results:
40+
print(f"Similarity: {score:.4f}")
41+
print(f"Sentence: {sentence}\n")
42+
except FileNotFoundError:
43+
print(f"Error: The file '{file_path}' was not found. Please check the file path and try again.")
44+
except Exception as e:
45+
print(f"An error occurred: {str(e)}")
46+
47+
if __name__ == "__main__":
48+
main()

0 commit comments

Comments
 (0)