From e05a0bd510661f381cb607394c8bcdab1c634133 Mon Sep 17 00:00:00 2001 From: A-Akhil Date: Sun, 20 Oct 2024 16:29:27 +0530 Subject: [PATCH 1/5] Now it can filter the sentence #1333 --- NLP/Sentence_Similarity.py | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 NLP/Sentence_Similarity.py diff --git a/NLP/Sentence_Similarity.py b/NLP/Sentence_Similarity.py new file mode 100644 index 000000000..686f8672a --- /dev/null +++ b/NLP/Sentence_Similarity.py @@ -0,0 +1,48 @@ +from sentence_transformers import SentenceTransformer, util + +def load_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return [line.strip() for line in file if line.strip()] + +def find_similar_sentences(query, file_path, top_n=5): + # Load the pre-trained model + model = SentenceTransformer('all-MiniLM-L6-v2') + + # Load and encode the sentences from the file + sentences = load_file(file_path) + sentence_embeddings = model.encode(sentences) + + # Encode the query + query_embedding = model.encode([query]) + + # Calculate cosine similarities + cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0] + + # Get top N results + top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n] + + return top_results + +def main(): + print("Welcome to the Sentence Similarity Search Tool!") + + # Get user input for query + query = input("Enter your query: ") + + # Get user input for file path + file_path = input("Enter the path to your text file: ") + + try: + results = find_similar_sentences(query, file_path) + + print(f"\nTop 5 similar sentences for query: '{query}'\n") + for sentence, score in results: + print(f"Similarity: {score:.4f}") + print(f"Sentence: {sentence}\n") + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found. Please check the file path and try again.") + except Exception as e: + print(f"An error occurred: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file From 15ee93732b422c3fd562ef8bb18218d3da946f54 Mon Sep 17 00:00:00 2001 From: A-Akhil Date: Sun, 20 Oct 2024 16:33:12 +0530 Subject: [PATCH 2/5] now it saves the model in local directory #1333 --- NLP/Sentence_Similarity.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/NLP/Sentence_Similarity.py b/NLP/Sentence_Similarity.py index 686f8672a..d3d536092 100644 --- a/NLP/Sentence_Similarity.py +++ b/NLP/Sentence_Similarity.py @@ -1,12 +1,29 @@ +import os from sentence_transformers import SentenceTransformer, util +MODEL_NAME = 'all-MiniLM-L6-v2' +MODEL_FOLDER = 'model' + def load_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: return [line.strip() for line in file if line.strip()] +def load_or_download_model(): + model_path = os.path.join(MODEL_FOLDER, MODEL_NAME) + if os.path.exists(model_path): + print(f"Loading model from {model_path}") + return SentenceTransformer(model_path) + else: + print(f"Downloading model {MODEL_NAME}") + model = SentenceTransformer(MODEL_NAME) + os.makedirs(MODEL_FOLDER, exist_ok=True) + model.save(model_path) + print(f"Model saved to {model_path}") + return model + def find_similar_sentences(query, file_path, top_n=5): # Load the pre-trained model - model = SentenceTransformer('all-MiniLM-L6-v2') + model = load_or_download_model() # Load and encode the sentences from the file sentences = load_file(file_path) @@ -23,14 +40,27 @@ def find_similar_sentences(query, file_path, top_n=5): return top_results +def validate_file_path(file_path): + if not file_path.endswith('.txt'): + file_path += '.txt' + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file '{file_path}' does not exist.") + return file_path + def main(): print("Welcome to the Sentence Similarity Search Tool!") # Get user input for query query = input("Enter your query: ") - # Get user input for file path - file_path = input("Enter the path to your text file: ") + # Get user input for file path and validate it + while True: + file_path = input("Enter the path to your text file without extension: ") + try: + file_path = validate_file_path(file_path) + break + except FileNotFoundError as e: + print(f"Error: {str(e)} Please try again.") try: results = find_similar_sentences(query, file_path) @@ -39,8 +69,6 @@ def main(): for sentence, score in results: print(f"Similarity: {score:.4f}") print(f"Sentence: {sentence}\n") - except FileNotFoundError: - print(f"Error: The file '{file_path}' was not found. Please check the file path and try again.") except Exception as e: print(f"An error occurred: {str(e)}") From 4e2c4a26c0b87ecd2822dcb36ff1a1dc24d54029 Mon Sep 17 00:00:00 2001 From: A-Akhil Date: Sun, 20 Oct 2024 16:35:37 +0530 Subject: [PATCH 3/5] Used for making dummy text for example #1333 --- NLP/dummysentence.py | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 NLP/dummysentence.py diff --git a/NLP/dummysentence.py b/NLP/dummysentence.py new file mode 100644 index 000000000..b07d7b46b --- /dev/null +++ b/NLP/dummysentence.py @@ -0,0 +1,66 @@ +import os +from sentence_transformers import SentenceTransformer, util + +MODEL_NAME = 'all-MiniLM-L6-v2' +MODEL_FOLDER = 'model' + +def load_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return [line.strip() for line in file if line.strip()] + +def load_or_download_model(): + model_path = os.path.join(MODEL_FOLDER, MODEL_NAME) + if os.path.exists(model_path): + print(f"Loading model from {model_path}") + return SentenceTransformer(model_path) + else: + print(f"Downloading model {MODEL_NAME}") + model = SentenceTransformer(MODEL_NAME) + os.makedirs(MODEL_FOLDER, exist_ok=True) + model.save(model_path) + print(f"Model saved to {model_path}") + return model + +def find_similar_sentences(query, file_path, top_n=5): + # Load the pre-trained model + model = load_or_download_model() + + # Load and encode the sentences from the file + sentences = load_file(file_path) + sentence_embeddings = model.encode(sentences) + + # Encode the query + query_embedding = model.encode([query]) + + # Calculate cosine similarities + cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0] + + # Get top N results + top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n] + + return top_results + +def main(): + print("Welcome to the Sentence Similarity Search Tool!") + + # Get user input for query + query = input("Enter your query: ") + + # Get user input for file path + file_name = input("Enter the name of your text file (without .txt extension): ") + file_path = f"{file_name}.txt" + + try: + results = find_similar_sentences(query, file_path) + + print(f"\nTop 5 similar sentences for query: '{query}'\n") + for sentence, score in results: + print(f"Similarity: {score:.4f}") + print(f"Sentence: {sentence}\n") + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found. Please check the file name and try again.") + except Exception as e: + print(f"An error occurred: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file From 59748260db26e3f81f62b11311a5c18aa957903a Mon Sep 17 00:00:00 2001 From: A-Akhil Date: Sun, 20 Oct 2024 16:38:09 +0530 Subject: [PATCH 4/5] added somemore similarity measurement method #1333 --- NLP/Sentence_Similarity.py | 58 +++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/NLP/Sentence_Similarity.py b/NLP/Sentence_Similarity.py index d3d536092..538b2eee0 100644 --- a/NLP/Sentence_Similarity.py +++ b/NLP/Sentence_Similarity.py @@ -1,4 +1,5 @@ import os +import numpy as np from sentence_transformers import SentenceTransformer, util MODEL_NAME = 'all-MiniLM-L6-v2' @@ -21,23 +22,34 @@ def load_or_download_model(): print(f"Model saved to {model_path}") return model -def find_similar_sentences(query, file_path, top_n=5): - # Load the pre-trained model - model = load_or_download_model() +def cosine_similarity(query_embedding, sentence_embeddings): + return util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0] - # Load and encode the sentences from the file - sentences = load_file(file_path) - sentence_embeddings = model.encode(sentences) +def euclidean_distance(query_embedding, sentence_embeddings): + return -np.linalg.norm(query_embedding - sentence_embeddings, axis=1) - # Encode the query - query_embedding = model.encode([query]) +def manhattan_distance(query_embedding, sentence_embeddings): + return -np.sum(np.abs(query_embedding - sentence_embeddings), axis=1) - # Calculate cosine similarities - cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0] +def dot_product(query_embedding, sentence_embeddings): + return np.dot(sentence_embeddings, query_embedding.T).flatten() - # Get top N results - top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n] +similarity_functions = { + '1': ('Cosine Similarity', cosine_similarity), + '2': ('Euclidean Distance', euclidean_distance), + '3': ('Manhattan Distance', manhattan_distance), + '4': ('Dot Product', dot_product) +} +def find_similar_sentences(query, file_path, similarity_func, top_n=5): + model = load_or_download_model() + sentences = load_file(file_path) + sentence_embeddings = model.encode(sentences) + query_embedding = model.encode([query]) + + similarity_scores = similarity_func(query_embedding, sentence_embeddings) + top_results = sorted(zip(sentences, similarity_scores), key=lambda x: x[1], reverse=True)[:top_n] + return top_results def validate_file_path(file_path): @@ -48,12 +60,10 @@ def validate_file_path(file_path): return file_path def main(): - print("Welcome to the Sentence Similarity Search Tool!") + print("Welcome to the Enhanced Sentence Similarity Search Tool!") - # Get user input for query query = input("Enter your query: ") - # Get user input for file path and validate it while True: file_path = input("Enter the path to your text file without extension: ") try: @@ -61,13 +71,23 @@ def main(): break except FileNotFoundError as e: print(f"Error: {str(e)} Please try again.") + + print("\nChoose a similarity measurement method:") + for key, (name, _) in similarity_functions.items(): + print(f"{key}. {name}") + + while True: + choice = input("Enter the number of your choice: ") + if choice in similarity_functions: + similarity_name, similarity_func = similarity_functions[choice] + break + print("Invalid choice. Please try again.") try: - results = find_similar_sentences(query, file_path) - - print(f"\nTop 5 similar sentences for query: '{query}'\n") + results = find_similar_sentences(query, file_path, similarity_func) + print(f"\nTop 5 similar sentences for query: '{query}' using {similarity_name}\n") for sentence, score in results: - print(f"Similarity: {score:.4f}") + print(f"Similarity Score: {score:.4f}") print(f"Sentence: {sentence}\n") except Exception as e: print(f"An error occurred: {str(e)}") From 2dd3790824918ab2bac8f5e3ab627ebe622d974f Mon Sep 17 00:00:00 2001 From: A-Akhil Date: Sun, 20 Oct 2024 18:06:24 +0530 Subject: [PATCH 5/5] added many searching algorithm #1333 --- NLP/multi_similarity_tool.py | 270 +++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 NLP/multi_similarity_tool.py diff --git a/NLP/multi_similarity_tool.py b/NLP/multi_similarity_tool.py new file mode 100644 index 000000000..4047b798b --- /dev/null +++ b/NLP/multi_similarity_tool.py @@ -0,0 +1,270 @@ +import os +import numpy as np +from scipy.spatial.distance import euclidean, cityblock, minkowski, chebyshev, canberra, braycurtis, jensenshannon, hamming # Add other distance functions as needed +from sentence_transformers import SentenceTransformer, util # Import SentenceTransformer +from scipy.stats import pearsonr, spearmanr +from scipy.special import kl_div +from scipy.spatial.distance import jensenshannon + + +MODEL_NAME = 'all-MiniLM-L6-v2' +MODEL_FOLDER = 'model' + +def load_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return [line.strip() for line in file if line.strip()] + +def load_or_download_model(): + model_path = os.path.join(MODEL_FOLDER, MODEL_NAME) + if os.path.exists(model_path): + print(f"Loading model from {model_path}") + return SentenceTransformer(model_path) + else: + print(f"Downloading model {MODEL_NAME}") + model = SentenceTransformer(MODEL_NAME) + os.makedirs(MODEL_FOLDER, exist_ok=True) + model.save(model_path) + print(f"Model saved to {model_path}") + return model + +def cosine_similarity(query_embedding, sentence_embeddings): + return util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0] + +def euclidean_distance(query_embedding, sentence_embeddings): + return -np.array([euclidean(query_embedding, sent_emb) for sent_emb in sentence_embeddings]) + +def manhattan_distance(query_embedding, sentence_embeddings): + return -np.array([cityblock(query_embedding, sent_emb) for sent_emb in sentence_embeddings]) + +def dot_product(query_embedding, sentence_embeddings): + return np.dot(sentence_embeddings, query_embedding.T).flatten() + +def pearson_correlation(query_embedding, sentence_embeddings): + return np.array([pearsonr(query_embedding.flatten(), sent_emb.flatten())[0] for sent_emb in sentence_embeddings]) + +def jaccard_similarity(query_embedding, sentence_embeddings): + # Simplified Jaccard similarity for continuous values + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sum(np.maximum(query_embedding, sent_emb)) for sent_emb in sentence_embeddings]) + +def hamming_distance(query_embedding, sentence_embeddings): + # Simplified Hamming distance for continuous values + return -np.array([np.sum(query_embedding != sent_emb) for sent_emb in sentence_embeddings]) + +def minkowski_distance(query_embedding, sentence_embeddings, p=3): + return -np.array([minkowski(query_embedding, sent_emb, p) for sent_emb in sentence_embeddings]) + +def chebyshev_distance(query_embedding, sentence_embeddings): + return -np.array([chebyshev(query_embedding, sent_emb) for sent_emb in sentence_embeddings]) + +def canberra_distance(query_embedding, sentence_embeddings): + return -np.array([canberra(query_embedding, sent_emb) for sent_emb in sentence_embeddings]) + +def bray_curtis_distance(query_embedding, sentence_embeddings): + return -np.array([braycurtis(query_embedding, sent_emb) for sent_emb in sentence_embeddings]) + +def mahalanobis_distance(query_embedding, sentence_embeddings): + # Placeholder: Requires covariance matrix calculation + return -np.array([euclidean(query_embedding, sent_emb) for sent_emb in sentence_embeddings]) + +def dice_similarity(query_embedding, sentence_embeddings): + return np.array([2 * np.sum(np.minimum(query_embedding, sent_emb)) / (np.sum(query_embedding) + np.sum(sent_emb)) for sent_emb in sentence_embeddings]) + +def tanimoto_similarity(query_embedding, sentence_embeddings): + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sum(np.maximum(query_embedding, sent_emb)) for sent_emb in sentence_embeddings]) + +def spearman_correlation(query_embedding, sentence_embeddings): + return np.array([spearmanr(query_embedding.flatten(), sent_emb.flatten())[0] for sent_emb in sentence_embeddings]) + +def wasserstein_distance(query_embedding, sentence_embeddings): + # Placeholder: Requires more complex implementation + return -np.array([np.sum(np.abs(np.sort(query_embedding) - np.sort(sent_emb))) for sent_emb in sentence_embeddings]) + +def kl_divergence(query_embedding, sentence_embeddings): + return -np.array([np.sum(kl_div(query_embedding + 1e-10, sent_emb + 1e-10)) for sent_emb in sentence_embeddings]) + + +def haversine_distance(query_embedding, sentence_embeddings): + # Placeholder: Not applicable for high-dimensional embeddings + return -euclidean_distance(query_embedding, sentence_embeddings) + +def cosine_distance(query_embedding, sentence_embeddings): + return 1 - cosine_similarity(query_embedding, sentence_embeddings) + +def sorensen_dice_coefficient(query_embedding, sentence_embeddings): + return dice_similarity(query_embedding, sentence_embeddings) + +def levenshtein_distance(query_embedding, sentence_embeddings): + # Placeholder: Not directly applicable to embeddings + return -euclidean_distance(query_embedding, sentence_embeddings) + +def jaro_winkler_distance(query_embedding, sentence_embeddings): + # Placeholder: Not directly applicable to embeddings + return -euclidean_distance(query_embedding, sentence_embeddings) + +def rogers_tanimoto_similarity(query_embedding, sentence_embeddings): + # Simplified for continuous values + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sum(np.maximum(query_embedding, sent_emb)) for sent_emb in sentence_embeddings]) + +def yule_similarity(query_embedding, sentence_embeddings): + # Placeholder: Not directly applicable to embeddings + return cosine_similarity(query_embedding, sentence_embeddings) + +def kulczynski_similarity(query_embedding, sentence_embeddings): + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.minimum(np.sum(query_embedding), np.sum(sent_emb)) for sent_emb in sentence_embeddings]) + +def gower_distance(query_embedding, sentence_embeddings): + # Simplified Gower distance + return -np.array([np.mean(np.abs(query_embedding - sent_emb)) for sent_emb in sentence_embeddings]) + +def russell_rao_similarity(query_embedding, sentence_embeddings): + # Simplified for continuous values + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / len(query_embedding) for sent_emb in sentence_embeddings]) + +def ochiai_similarity(query_embedding, sentence_embeddings): + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sqrt(np.sum(query_embedding) * np.sum(sent_emb)) for sent_emb in sentence_embeddings]) + +def matching_coefficient(query_embedding, sentence_embeddings): + # Simplified for continuous values + return np.array([np.sum(query_embedding == sent_emb) / len(query_embedding) for sent_emb in sentence_embeddings]) + +def tversky_index(query_embedding, sentence_embeddings, alpha=0.5, beta=0.5): + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / (np.sum(np.minimum(query_embedding, sent_emb)) + alpha * np.sum(np.maximum(0, query_embedding - sent_emb)) + beta * np.sum(np.maximum(0, sent_emb - query_embedding))) for sent_emb in sentence_embeddings]) + +def sorensen_similarity(query_embedding, sentence_embeddings): + return dice_similarity(query_embedding, sentence_embeddings) + +def overlap_coefficient(query_embedding, sentence_embeddings): + return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.minimum(np.sum(query_embedding), np.sum(sent_emb)) for sent_emb in sentence_embeddings]) + +def edit_distance(query_embedding, sentence_embeddings): + # Placeholder: Not directly applicable to embeddings + return -euclidean_distance(query_embedding, sentence_embeddings) + +def sokal_michener_distance(query_embedding, sentence_embeddings): + # Simplified for continuous values + return np.array([np.sum(np.abs(query_embedding - sent_emb)) / len(query_embedding) for sent_emb in sentence_embeddings]) + +def tschebyshev_distance(query_embedding, sentence_embeddings): + return chebyshev_distance(query_embedding, sentence_embeddings) + +def dice_hamming_distance(query_embedding, sentence_embeddings): + dice = dice_similarity(query_embedding, sentence_embeddings) + hamming = hamming_distance(query_embedding, sentence_embeddings) + return (dice + hamming) / 2 + +def improved_jensen_distance(query_embedding, sentence_embeddings, epsilon=1e-10): + # Add a small epsilon to avoid division by zero + query_embedding = query_embedding + epsilon + sentence_embeddings = sentence_embeddings + epsilon + + # Normalize the query embedding + query_sum = np.sum(query_embedding) + query_embedding = query_embedding / query_sum + + # Normalize each sentence embedding + sentence_embeddings_normalized = sentence_embeddings / np.sum(sentence_embeddings, axis=1, keepdims=True) + + # Compute Jensen-Shannon distance for each sentence embedding + distances = np.array([jensenshannon(query_embedding, sent_emb) for sent_emb in sentence_embeddings_normalized]) + + # Replace any NaN or inf values with a large finite number + distances = np.nan_to_num(distances, nan=np.finfo(float).max, posinf=np.finfo(float).max) + + return distances + +def log_likelihood(query_embedding, sentence_embeddings): + # Placeholder: Requires probability distributions + return cosine_similarity(query_embedding, sentence_embeddings) + +similarity_functions = { + '1': ('Cosine Similarity', cosine_similarity), + '2': ('Euclidean Distance', euclidean_distance), + '3': ('Manhattan Distance', manhattan_distance), + '4': ('Dot Product', dot_product), + '5': ('Pearson Correlation', pearson_correlation), + '6': ('Jaccard Similarity', jaccard_similarity), + '7': ('Hamming Distance', hamming_distance), + '8': ('Minkowski Distance', minkowski_distance), + '9': ('Chebyshev Distance', chebyshev_distance), + '10': ('Canberra Distance', canberra_distance), + '11': ('Bray-Curtis Distance', bray_curtis_distance), + '12': ('Dice Similarity', dice_similarity), + '13': ('Tanimoto Similarity', tanimoto_similarity), + '14': ('Spearman Correlation', spearman_correlation), + '15': ('Wasserstein Distance', wasserstein_distance), + '16': ('KL Divergence', kl_divergence), + '17': ('Cosine Distance', cosine_distance), + '18': ('Sorensen-Dice Coefficient', sorensen_dice_coefficient), + '19': ('Levenshtein Distance', levenshtein_distance), + '20': ('Jaro-Winkler Distance', jaro_winkler_distance), + '21': ('Rogers-Tanimoto Similarity', rogers_tanimoto_similarity), + '22': ('Yule Similarity', yule_similarity), + '23': ('Kulczynski Similarity', kulczynski_similarity), + '24': ('Gower Distance', gower_distance), + '25': ('Russell-Rao Similarity', russell_rao_similarity), + '26': ('Matching Coefficient', matching_coefficient), + '27': ('Tversky Index', tversky_index), + '28': ('Sørensen Similarity', sorensen_similarity), + '29': ('Overlap Coefficient', overlap_coefficient), + '30': ('Edit Distance', edit_distance), + '31': ('Sokal-Michener Distance', sokal_michener_distance), + '32': ('Tschebyshev Distance', tschebyshev_distance), + '33': ('Dice-Hamming Distance', dice_hamming_distance), + '34': ('Jensen Distance', improved_jensen_distance), + '35': ('Log Likelihood', log_likelihood), +} + + +def find_similar_sentences(query, file_path, similarity_func, top_n=5): + model = load_or_download_model() + sentences = load_file(file_path) + sentence_embeddings = model.encode(sentences) + query_embedding = model.encode([query])[0] # Flatten the query embedding + + similarity_scores = similarity_func(query_embedding, sentence_embeddings) + top_results = sorted(zip(sentences, similarity_scores), key=lambda x: x[1], reverse=True)[:top_n] + + return top_results + +def validate_file_path(file_path): + if not file_path.endswith('.txt'): + file_path += '.txt' + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file '{file_path}' does not exist.") + return file_path + +def main(): + print("Welcome to the Comprehensive Sentence Similarity Search Tool!") + + query = input("Enter your query: ") + + while True: + file_path = input("Enter the path to your text file without extension: ") + try: + file_path = validate_file_path(file_path) + break + except FileNotFoundError as e: + print(f"Error: {str(e)} Please try again.") + + print("\nChoose a similarity measurement method:") + for key, (name, _) in similarity_functions.items(): + print(f"{key}. {name}") + + while True: + choice = input("Enter the number of your choice: ") + if choice in similarity_functions: + similarity_name, similarity_func = similarity_functions[choice] + break + print("Invalid choice. Please try again.") + + try: + results = find_similar_sentences(query, file_path, similarity_func) + print(f"\nTop 5 similar sentences for query: '{query}' using {similarity_name}\n") + for sentence, score in results: + print(f"Similarity Score: {score:.4f}") + print(f"Sentence: {sentence}\n") + except Exception as e: + print(f"An error occurred: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file