From e05a0bd510661f381cb607394c8bcdab1c634133 Mon Sep 17 00:00:00 2001
From: A-Akhil <akhilrahul70@gmail.com>
Date: Sun, 20 Oct 2024 16:29:27 +0530
Subject: [PATCH 1/5] Now it can filter the sentence #1333

---
 NLP/Sentence_Similarity.py | 48 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 NLP/Sentence_Similarity.py

diff --git a/NLP/Sentence_Similarity.py b/NLP/Sentence_Similarity.py
new file mode 100644
index 000000000..686f8672a
--- /dev/null
+++ b/NLP/Sentence_Similarity.py
@@ -0,0 +1,48 @@
+from sentence_transformers import SentenceTransformer, util
+
+def load_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return [line.strip() for line in file if line.strip()]
+
+def find_similar_sentences(query, file_path, top_n=5):
+    # Load the pre-trained model
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+
+    # Load and encode the sentences from the file
+    sentences = load_file(file_path)
+    sentence_embeddings = model.encode(sentences)
+
+    # Encode the query
+    query_embedding = model.encode([query])
+
+    # Calculate cosine similarities
+    cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
+
+    # Get top N results
+    top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n]
+
+    return top_results
+
+def main():
+    print("Welcome to the Sentence Similarity Search Tool!")
+    
+    # Get user input for query
+    query = input("Enter your query: ")
+    
+    # Get user input for file path
+    file_path = input("Enter the path to your text file: ")
+
+    try:
+        results = find_similar_sentences(query, file_path)
+
+        print(f"\nTop 5 similar sentences for query: '{query}'\n")
+        for sentence, score in results:
+            print(f"Similarity: {score:.4f}")
+            print(f"Sentence: {sentence}\n")
+    except FileNotFoundError:
+        print(f"Error: The file '{file_path}' was not found. Please check the file path and try again.")
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 15ee93732b422c3fd562ef8bb18218d3da946f54 Mon Sep 17 00:00:00 2001
From: A-Akhil <akhilrahul70@gmail.com>
Date: Sun, 20 Oct 2024 16:33:12 +0530
Subject: [PATCH 2/5] now it saves the model in local directory #1333

---
 NLP/Sentence_Similarity.py | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/NLP/Sentence_Similarity.py b/NLP/Sentence_Similarity.py
index 686f8672a..d3d536092 100644
--- a/NLP/Sentence_Similarity.py
+++ b/NLP/Sentence_Similarity.py
@@ -1,12 +1,29 @@
+import os
 from sentence_transformers import SentenceTransformer, util
 
+MODEL_NAME = 'all-MiniLM-L6-v2'
+MODEL_FOLDER = 'model'
+
 def load_file(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
         return [line.strip() for line in file if line.strip()]
 
+def load_or_download_model():
+    model_path = os.path.join(MODEL_FOLDER, MODEL_NAME)
+    if os.path.exists(model_path):
+        print(f"Loading model from {model_path}")
+        return SentenceTransformer(model_path)
+    else:
+        print(f"Downloading model {MODEL_NAME}")
+        model = SentenceTransformer(MODEL_NAME)
+        os.makedirs(MODEL_FOLDER, exist_ok=True)
+        model.save(model_path)
+        print(f"Model saved to {model_path}")
+        return model
+
 def find_similar_sentences(query, file_path, top_n=5):
     # Load the pre-trained model
-    model = SentenceTransformer('all-MiniLM-L6-v2')
+    model = load_or_download_model()
 
     # Load and encode the sentences from the file
     sentences = load_file(file_path)
@@ -23,14 +40,27 @@ def find_similar_sentences(query, file_path, top_n=5):
 
     return top_results
 
+def validate_file_path(file_path):
+    if not file_path.endswith('.txt'):
+        file_path += '.txt'
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"The file '{file_path}' does not exist.")
+    return file_path
+
 def main():
     print("Welcome to the Sentence Similarity Search Tool!")
     
     # Get user input for query
     query = input("Enter your query: ")
     
-    # Get user input for file path
-    file_path = input("Enter the path to your text file: ")
+    # Get user input for file path and validate it
+    while True:
+        file_path = input("Enter the path to your text file without extension: ")
+        try:
+            file_path = validate_file_path(file_path)
+            break
+        except FileNotFoundError as e:
+            print(f"Error: {str(e)} Please try again.")
 
     try:
         results = find_similar_sentences(query, file_path)
@@ -39,8 +69,6 @@ def main():
         for sentence, score in results:
             print(f"Similarity: {score:.4f}")
             print(f"Sentence: {sentence}\n")
-    except FileNotFoundError:
-        print(f"Error: The file '{file_path}' was not found. Please check the file path and try again.")
     except Exception as e:
         print(f"An error occurred: {str(e)}")
 

From 4e2c4a26c0b87ecd2822dcb36ff1a1dc24d54029 Mon Sep 17 00:00:00 2001
From: A-Akhil <akhilrahul70@gmail.com>
Date: Sun, 20 Oct 2024 16:35:37 +0530
Subject: [PATCH 3/5] Used for making dummy text for example #1333

---
 NLP/dummysentence.py | 66 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 NLP/dummysentence.py

diff --git a/NLP/dummysentence.py b/NLP/dummysentence.py
new file mode 100644
index 000000000..b07d7b46b
--- /dev/null
+++ b/NLP/dummysentence.py
@@ -0,0 +1,66 @@
+import os
+from sentence_transformers import SentenceTransformer, util
+
+MODEL_NAME = 'all-MiniLM-L6-v2'
+MODEL_FOLDER = 'model'
+
+def load_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return [line.strip() for line in file if line.strip()]
+
+def load_or_download_model():
+    model_path = os.path.join(MODEL_FOLDER, MODEL_NAME)
+    if os.path.exists(model_path):
+        print(f"Loading model from {model_path}")
+        return SentenceTransformer(model_path)
+    else:
+        print(f"Downloading model {MODEL_NAME}")
+        model = SentenceTransformer(MODEL_NAME)
+        os.makedirs(MODEL_FOLDER, exist_ok=True)
+        model.save(model_path)
+        print(f"Model saved to {model_path}")
+        return model
+
+def find_similar_sentences(query, file_path, top_n=5):
+    # Load the pre-trained model
+    model = load_or_download_model()
+
+    # Load and encode the sentences from the file
+    sentences = load_file(file_path)
+    sentence_embeddings = model.encode(sentences)
+
+    # Encode the query
+    query_embedding = model.encode([query])
+
+    # Calculate cosine similarities
+    cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
+
+    # Get top N results
+    top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n]
+
+    return top_results
+
+def main():
+    print("Welcome to the Sentence Similarity Search Tool!")
+    
+    # Get user input for query
+    query = input("Enter your query: ")
+    
+    # Get user input for file path
+    file_name = input("Enter the name of your text file (without .txt extension): ")
+    file_path = f"{file_name}.txt"
+
+    try:
+        results = find_similar_sentences(query, file_path)
+
+        print(f"\nTop 5 similar sentences for query: '{query}'\n")
+        for sentence, score in results:
+            print(f"Similarity: {score:.4f}")
+            print(f"Sentence: {sentence}\n")
+    except FileNotFoundError:
+        print(f"Error: The file '{file_path}' was not found. Please check the file name and try again.")
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 59748260db26e3f81f62b11311a5c18aa957903a Mon Sep 17 00:00:00 2001
From: A-Akhil <akhilrahul70@gmail.com>
Date: Sun, 20 Oct 2024 16:38:09 +0530
Subject: [PATCH 4/5] added somemore similarity measurement method #1333

---
 NLP/Sentence_Similarity.py | 58 +++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/NLP/Sentence_Similarity.py b/NLP/Sentence_Similarity.py
index d3d536092..538b2eee0 100644
--- a/NLP/Sentence_Similarity.py
+++ b/NLP/Sentence_Similarity.py
@@ -1,4 +1,5 @@
 import os
+import numpy as np
 from sentence_transformers import SentenceTransformer, util
 
 MODEL_NAME = 'all-MiniLM-L6-v2'
@@ -21,23 +22,34 @@ def load_or_download_model():
         print(f"Model saved to {model_path}")
         return model
 
-def find_similar_sentences(query, file_path, top_n=5):
-    # Load the pre-trained model
-    model = load_or_download_model()
+def cosine_similarity(query_embedding, sentence_embeddings):
+    return util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
 
-    # Load and encode the sentences from the file
-    sentences = load_file(file_path)
-    sentence_embeddings = model.encode(sentences)
+def euclidean_distance(query_embedding, sentence_embeddings):
+    return -np.linalg.norm(query_embedding - sentence_embeddings, axis=1)
 
-    # Encode the query
-    query_embedding = model.encode([query])
+def manhattan_distance(query_embedding, sentence_embeddings):
+    return -np.sum(np.abs(query_embedding - sentence_embeddings), axis=1)
 
-    # Calculate cosine similarities
-    cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
+def dot_product(query_embedding, sentence_embeddings):
+    return np.dot(sentence_embeddings, query_embedding.T).flatten()
 
-    # Get top N results
-    top_results = sorted(zip(sentences, cosine_scores), key=lambda x: x[1], reverse=True)[:top_n]
+similarity_functions = {
+    '1': ('Cosine Similarity', cosine_similarity),
+    '2': ('Euclidean Distance', euclidean_distance),
+    '3': ('Manhattan Distance', manhattan_distance),
+    '4': ('Dot Product', dot_product)
+}
 
+def find_similar_sentences(query, file_path, similarity_func, top_n=5):
+    model = load_or_download_model()
+    sentences = load_file(file_path)
+    sentence_embeddings = model.encode(sentences)
+    query_embedding = model.encode([query])
+    
+    similarity_scores = similarity_func(query_embedding, sentence_embeddings)
+    top_results = sorted(zip(sentences, similarity_scores), key=lambda x: x[1], reverse=True)[:top_n]
+    
     return top_results
 
 def validate_file_path(file_path):
@@ -48,12 +60,10 @@ def validate_file_path(file_path):
     return file_path
 
 def main():
-    print("Welcome to the Sentence Similarity Search Tool!")
+    print("Welcome to the Enhanced Sentence Similarity Search Tool!")
     
-    # Get user input for query
     query = input("Enter your query: ")
     
-    # Get user input for file path and validate it
     while True:
         file_path = input("Enter the path to your text file without extension: ")
         try:
@@ -61,13 +71,23 @@ def main():
             break
         except FileNotFoundError as e:
             print(f"Error: {str(e)} Please try again.")
+    
+    print("\nChoose a similarity measurement method:")
+    for key, (name, _) in similarity_functions.items():
+        print(f"{key}. {name}")
+    
+    while True:
+        choice = input("Enter the number of your choice: ")
+        if choice in similarity_functions:
+            similarity_name, similarity_func = similarity_functions[choice]
+            break
+        print("Invalid choice. Please try again.")
 
     try:
-        results = find_similar_sentences(query, file_path)
-
-        print(f"\nTop 5 similar sentences for query: '{query}'\n")
+        results = find_similar_sentences(query, file_path, similarity_func)
+        print(f"\nTop 5 similar sentences for query: '{query}' using {similarity_name}\n")
         for sentence, score in results:
-            print(f"Similarity: {score:.4f}")
+            print(f"Similarity Score: {score:.4f}")
             print(f"Sentence: {sentence}\n")
     except Exception as e:
         print(f"An error occurred: {str(e)}")

From 2dd3790824918ab2bac8f5e3ab627ebe622d974f Mon Sep 17 00:00:00 2001
From: A-Akhil <akhilrahul70@gmail.com>
Date: Sun, 20 Oct 2024 18:06:24 +0530
Subject: [PATCH 5/5] added many searching algorithm #1333

---
 NLP/multi_similarity_tool.py | 270 +++++++++++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 NLP/multi_similarity_tool.py

diff --git a/NLP/multi_similarity_tool.py b/NLP/multi_similarity_tool.py
new file mode 100644
index 000000000..4047b798b
--- /dev/null
+++ b/NLP/multi_similarity_tool.py
@@ -0,0 +1,270 @@
+import os
+import numpy as np
+from scipy.spatial.distance import euclidean, cityblock, minkowski, chebyshev, canberra, braycurtis, jensenshannon, hamming  # Add other distance functions as needed
+from sentence_transformers import SentenceTransformer, util  # Import SentenceTransformer
+from scipy.stats import pearsonr, spearmanr
+from scipy.special import kl_div
+from scipy.spatial.distance import jensenshannon
+
+
+MODEL_NAME = 'all-MiniLM-L6-v2'
+MODEL_FOLDER = 'model'
+
+def load_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return [line.strip() for line in file if line.strip()]
+
+def load_or_download_model():
+    model_path = os.path.join(MODEL_FOLDER, MODEL_NAME)
+    if os.path.exists(model_path):
+        print(f"Loading model from {model_path}")
+        return SentenceTransformer(model_path)
+    else:
+        print(f"Downloading model {MODEL_NAME}")
+        model = SentenceTransformer(MODEL_NAME)
+        os.makedirs(MODEL_FOLDER, exist_ok=True)
+        model.save(model_path)
+        print(f"Model saved to {model_path}")
+        return model
+
+def cosine_similarity(query_embedding, sentence_embeddings):
+    return util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
+
+def euclidean_distance(query_embedding, sentence_embeddings):
+    return -np.array([euclidean(query_embedding, sent_emb) for sent_emb in sentence_embeddings])
+
+def manhattan_distance(query_embedding, sentence_embeddings):
+    return -np.array([cityblock(query_embedding, sent_emb) for sent_emb in sentence_embeddings])
+
+def dot_product(query_embedding, sentence_embeddings):
+    return np.dot(sentence_embeddings, query_embedding.T).flatten()
+
+def pearson_correlation(query_embedding, sentence_embeddings):
+    return np.array([pearsonr(query_embedding.flatten(), sent_emb.flatten())[0] for sent_emb in sentence_embeddings])
+
+def jaccard_similarity(query_embedding, sentence_embeddings):
+    # Simplified Jaccard similarity for continuous values
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sum(np.maximum(query_embedding, sent_emb)) for sent_emb in sentence_embeddings])
+
+def hamming_distance(query_embedding, sentence_embeddings):
+    # Simplified Hamming distance for continuous values
+    return -np.array([np.sum(query_embedding != sent_emb) for sent_emb in sentence_embeddings])
+
+def minkowski_distance(query_embedding, sentence_embeddings, p=3):
+    return -np.array([minkowski(query_embedding, sent_emb, p) for sent_emb in sentence_embeddings])
+
+def chebyshev_distance(query_embedding, sentence_embeddings):
+    return -np.array([chebyshev(query_embedding, sent_emb) for sent_emb in sentence_embeddings])
+
+def canberra_distance(query_embedding, sentence_embeddings):
+    return -np.array([canberra(query_embedding, sent_emb) for sent_emb in sentence_embeddings])
+
+def bray_curtis_distance(query_embedding, sentence_embeddings):
+    return -np.array([braycurtis(query_embedding, sent_emb) for sent_emb in sentence_embeddings])
+
+def mahalanobis_distance(query_embedding, sentence_embeddings):
+    # Placeholder: Requires covariance matrix calculation
+    return -np.array([euclidean(query_embedding, sent_emb) for sent_emb in sentence_embeddings])
+
+def dice_similarity(query_embedding, sentence_embeddings):
+    return np.array([2 * np.sum(np.minimum(query_embedding, sent_emb)) / (np.sum(query_embedding) + np.sum(sent_emb)) for sent_emb in sentence_embeddings])
+
+def tanimoto_similarity(query_embedding, sentence_embeddings):
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sum(np.maximum(query_embedding, sent_emb)) for sent_emb in sentence_embeddings])
+
+def spearman_correlation(query_embedding, sentence_embeddings):
+    return np.array([spearmanr(query_embedding.flatten(), sent_emb.flatten())[0] for sent_emb in sentence_embeddings])
+
+def wasserstein_distance(query_embedding, sentence_embeddings):
+    # Placeholder: Requires more complex implementation
+    return -np.array([np.sum(np.abs(np.sort(query_embedding) - np.sort(sent_emb))) for sent_emb in sentence_embeddings])
+
+def kl_divergence(query_embedding, sentence_embeddings):
+    return -np.array([np.sum(kl_div(query_embedding + 1e-10, sent_emb + 1e-10)) for sent_emb in sentence_embeddings])
+
+
+def haversine_distance(query_embedding, sentence_embeddings):
+    # Placeholder: Not applicable for high-dimensional embeddings
+    return -euclidean_distance(query_embedding, sentence_embeddings)
+
+def cosine_distance(query_embedding, sentence_embeddings):
+    return 1 - cosine_similarity(query_embedding, sentence_embeddings)
+
+def sorensen_dice_coefficient(query_embedding, sentence_embeddings):
+    return dice_similarity(query_embedding, sentence_embeddings)
+
+def levenshtein_distance(query_embedding, sentence_embeddings):
+    # Placeholder: Not directly applicable to embeddings
+    return -euclidean_distance(query_embedding, sentence_embeddings)
+
+def jaro_winkler_distance(query_embedding, sentence_embeddings):
+    # Placeholder: Not directly applicable to embeddings
+    return -euclidean_distance(query_embedding, sentence_embeddings)
+
+def rogers_tanimoto_similarity(query_embedding, sentence_embeddings):
+    # Simplified for continuous values
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sum(np.maximum(query_embedding, sent_emb)) for sent_emb in sentence_embeddings])
+
+def yule_similarity(query_embedding, sentence_embeddings):
+    # Placeholder: Not directly applicable to embeddings
+    return cosine_similarity(query_embedding, sentence_embeddings)
+
+def kulczynski_similarity(query_embedding, sentence_embeddings):
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.minimum(np.sum(query_embedding), np.sum(sent_emb)) for sent_emb in sentence_embeddings])
+
+def gower_distance(query_embedding, sentence_embeddings):
+    # Simplified Gower distance
+    return -np.array([np.mean(np.abs(query_embedding - sent_emb)) for sent_emb in sentence_embeddings])
+
+def russell_rao_similarity(query_embedding, sentence_embeddings):
+    # Simplified for continuous values
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / len(query_embedding) for sent_emb in sentence_embeddings])
+
+def ochiai_similarity(query_embedding, sentence_embeddings):
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.sqrt(np.sum(query_embedding) * np.sum(sent_emb)) for sent_emb in sentence_embeddings])
+
+def matching_coefficient(query_embedding, sentence_embeddings):
+    # Simplified for continuous values
+    return np.array([np.sum(query_embedding == sent_emb) / len(query_embedding) for sent_emb in sentence_embeddings])
+
+def tversky_index(query_embedding, sentence_embeddings, alpha=0.5, beta=0.5):
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / (np.sum(np.minimum(query_embedding, sent_emb)) + alpha * np.sum(np.maximum(0, query_embedding - sent_emb)) + beta * np.sum(np.maximum(0, sent_emb - query_embedding))) for sent_emb in sentence_embeddings])
+
+def sorensen_similarity(query_embedding, sentence_embeddings):
+    return dice_similarity(query_embedding, sentence_embeddings)
+
+def overlap_coefficient(query_embedding, sentence_embeddings):
+    return np.array([np.sum(np.minimum(query_embedding, sent_emb)) / np.minimum(np.sum(query_embedding), np.sum(sent_emb)) for sent_emb in sentence_embeddings])
+
+def edit_distance(query_embedding, sentence_embeddings):
+    # Placeholder: Not directly applicable to embeddings
+    return -euclidean_distance(query_embedding, sentence_embeddings)
+
+def sokal_michener_distance(query_embedding, sentence_embeddings):
+    # Simplified for continuous values
+    return np.array([np.sum(np.abs(query_embedding - sent_emb)) / len(query_embedding) for sent_emb in sentence_embeddings])
+
+def tschebyshev_distance(query_embedding, sentence_embeddings):
+    return chebyshev_distance(query_embedding, sentence_embeddings)
+
+def dice_hamming_distance(query_embedding, sentence_embeddings):
+    dice = dice_similarity(query_embedding, sentence_embeddings)
+    hamming = hamming_distance(query_embedding, sentence_embeddings)
+    return (dice + hamming) / 2
+
+def improved_jensen_distance(query_embedding, sentence_embeddings, epsilon=1e-10):
+    # Add a small epsilon to avoid division by zero
+    query_embedding = query_embedding + epsilon
+    sentence_embeddings = sentence_embeddings + epsilon
+
+    # Normalize the query embedding
+    query_sum = np.sum(query_embedding)
+    query_embedding = query_embedding / query_sum
+
+    # Normalize each sentence embedding
+    sentence_embeddings_normalized = sentence_embeddings / np.sum(sentence_embeddings, axis=1, keepdims=True)
+
+    # Compute Jensen-Shannon distance for each sentence embedding
+    distances = np.array([jensenshannon(query_embedding, sent_emb) for sent_emb in sentence_embeddings_normalized])
+
+    # Replace any NaN or inf values with a large finite number
+    distances = np.nan_to_num(distances, nan=np.finfo(float).max, posinf=np.finfo(float).max)
+
+    return distances
+
+def log_likelihood(query_embedding, sentence_embeddings):
+    # Placeholder: Requires probability distributions
+    return cosine_similarity(query_embedding, sentence_embeddings)
+
+similarity_functions = {
+    '1': ('Cosine Similarity', cosine_similarity),
+    '2': ('Euclidean Distance', euclidean_distance),
+    '3': ('Manhattan Distance', manhattan_distance),
+    '4': ('Dot Product', dot_product),
+    '5': ('Pearson Correlation', pearson_correlation),
+    '6': ('Jaccard Similarity', jaccard_similarity),
+    '7': ('Hamming Distance', hamming_distance),
+    '8': ('Minkowski Distance', minkowski_distance),
+    '9': ('Chebyshev Distance', chebyshev_distance),
+    '10': ('Canberra Distance', canberra_distance),
+    '11': ('Bray-Curtis Distance', bray_curtis_distance),
+    '12': ('Dice Similarity', dice_similarity),
+    '13': ('Tanimoto Similarity', tanimoto_similarity),
+    '14': ('Spearman Correlation', spearman_correlation),
+    '15': ('Wasserstein Distance', wasserstein_distance),
+    '16': ('KL Divergence', kl_divergence),
+    '17': ('Cosine Distance', cosine_distance),
+    '18': ('Sorensen-Dice Coefficient', sorensen_dice_coefficient),
+    '19': ('Levenshtein Distance', levenshtein_distance),
+    '20': ('Jaro-Winkler Distance', jaro_winkler_distance),
+    '21': ('Rogers-Tanimoto Similarity', rogers_tanimoto_similarity),
+    '22': ('Yule Similarity', yule_similarity),
+    '23': ('Kulczynski Similarity', kulczynski_similarity),
+    '24': ('Gower Distance', gower_distance),
+    '25': ('Russell-Rao Similarity', russell_rao_similarity),
+    '26': ('Matching Coefficient', matching_coefficient),
+    '27': ('Tversky Index', tversky_index),
+    '28': ('Sørensen Similarity', sorensen_similarity),
+    '29': ('Overlap Coefficient', overlap_coefficient),
+    '30': ('Edit Distance', edit_distance),
+    '31': ('Sokal-Michener Distance', sokal_michener_distance),
+    '32': ('Tschebyshev Distance', tschebyshev_distance),
+    '33': ('Dice-Hamming Distance', dice_hamming_distance),
+    '34': ('Jensen Distance', improved_jensen_distance),
+    '35': ('Log Likelihood', log_likelihood),
+}
+
+
+def find_similar_sentences(query, file_path, similarity_func, top_n=5):
+    model = load_or_download_model()
+    sentences = load_file(file_path)
+    sentence_embeddings = model.encode(sentences)
+    query_embedding = model.encode([query])[0]  # Flatten the query embedding
+    
+    similarity_scores = similarity_func(query_embedding, sentence_embeddings)
+    top_results = sorted(zip(sentences, similarity_scores), key=lambda x: x[1], reverse=True)[:top_n]
+    
+    return top_results
+
+def validate_file_path(file_path):
+    if not file_path.endswith('.txt'):
+        file_path += '.txt'
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"The file '{file_path}' does not exist.")
+    return file_path
+
+def main():
+    print("Welcome to the Comprehensive Sentence Similarity Search Tool!")
+    
+    query = input("Enter your query: ")
+    
+    while True:
+        file_path = input("Enter the path to your text file without extension: ")
+        try:
+            file_path = validate_file_path(file_path)
+            break
+        except FileNotFoundError as e:
+            print(f"Error: {str(e)} Please try again.")
+    
+    print("\nChoose a similarity measurement method:")
+    for key, (name, _) in similarity_functions.items():
+        print(f"{key}. {name}")
+    
+    while True:
+        choice = input("Enter the number of your choice: ")
+        if choice in similarity_functions:
+            similarity_name, similarity_func = similarity_functions[choice]
+            break
+        print("Invalid choice. Please try again.")
+
+    try:
+        results = find_similar_sentences(query, file_path, similarity_func)
+        print(f"\nTop 5 similar sentences for query: '{query}' using {similarity_name}\n")
+        for sentence, score in results:
+            print(f"Similarity Score: {score:.4f}")
+            print(f"Sentence: {sentence}\n")
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file