1
+ import os
1
2
from sentence_transformers import SentenceTransformer , util
2
3
4
+ MODEL_NAME = 'all-MiniLM-L6-v2'
5
+ MODEL_FOLDER = 'model'
6
+
3
7
def load_file (file_path ):
4
8
with open (file_path , 'r' , encoding = 'utf-8' ) as file :
5
9
return [line .strip () for line in file if line .strip ()]
6
10
11
+ def load_or_download_model ():
12
+ model_path = os .path .join (MODEL_FOLDER , MODEL_NAME )
13
+ if os .path .exists (model_path ):
14
+ print (f"Loading model from { model_path } " )
15
+ return SentenceTransformer (model_path )
16
+ else :
17
+ print (f"Downloading model { MODEL_NAME } " )
18
+ model = SentenceTransformer (MODEL_NAME )
19
+ os .makedirs (MODEL_FOLDER , exist_ok = True )
20
+ model .save (model_path )
21
+ print (f"Model saved to { model_path } " )
22
+ return model
23
+
7
24
def find_similar_sentences (query , file_path , top_n = 5 ):
8
25
# Load the pre-trained model
9
- model = SentenceTransformer ( 'all-MiniLM-L6-v2' )
26
+ model = load_or_download_model ( )
10
27
11
28
# Load and encode the sentences from the file
12
29
sentences = load_file (file_path )
@@ -23,14 +40,27 @@ def find_similar_sentences(query, file_path, top_n=5):
23
40
24
41
return top_results
25
42
43
+ def validate_file_path (file_path ):
44
+ if not file_path .endswith ('.txt' ):
45
+ file_path += '.txt'
46
+ if not os .path .exists (file_path ):
47
+ raise FileNotFoundError (f"The file '{ file_path } ' does not exist." )
48
+ return file_path
49
+
26
50
def main ():
27
51
print ("Welcome to the Sentence Similarity Search Tool!" )
28
52
29
53
# Get user input for query
30
54
query = input ("Enter your query: " )
31
55
32
- # Get user input for file path
33
- file_path = input ("Enter the path to your text file: " )
56
+ # Get user input for file path and validate it
57
+ while True :
58
+ file_path = input ("Enter the path to your text file without extension: " )
59
+ try :
60
+ file_path = validate_file_path (file_path )
61
+ break
62
+ except FileNotFoundError as e :
63
+ print (f"Error: { str (e )} Please try again." )
34
64
35
65
try :
36
66
results = find_similar_sentences (query , file_path )
@@ -39,8 +69,6 @@ def main():
39
69
for sentence , score in results :
40
70
print (f"Similarity: { score :.4f} " )
41
71
print (f"Sentence: { sentence } \n " )
42
- except FileNotFoundError :
43
- print (f"Error: The file '{ file_path } ' was not found. Please check the file path and try again." )
44
72
except Exception as e :
45
73
print (f"An error occurred: { str (e )} " )
46
74
0 commit comments