diff --git a/.gitignore b/.gitignore index b3d2c52..58e3177 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,8 @@ var/ # pytest *pytest_cache +# Credentials key_openai.txt + +# Models saved locally +models/ diff --git a/README.md b/README.md index 2d4053b..e46fde3 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,50 @@ To run REMO, you will need the following: 2. Interact with the API using a REST client or web browser: `http://localhost:8000` +## Models + +### Embedding Model + +REMO currently uses the +Universal Sentence Encoder v5 for generating embeddings. + +#### Loading from TensorFlow Hub + +This is the default option. + +When + +```python +ARE_YOU_TESTING__LOAD_MODEL_LOCAL = False +``` + +in file `utils.py`, the model is loaded from TensorFlow Hub. + +#### Loading from a local file + +Downloading the model from TensorFlow Hub every time you need to spin up +the microservice would be expensive and time-consuming. + +1. Download the `.tar.gz` file from + TensorFlow Hub: https://tfhub.dev/google/universal-sentence-encoder-large/5 + + ![img.png](docs/images/embedding_local_1.png) + +2. Extract the file to the folder + ``` + models/universal-sentence-encoder-large_5/ + ``` + with + ```shell + tar -xvzf universal-sentence-encoder-large_5.tar.gz + ``` + +3. Set + ```python + ARE_YOU_TESTING__LOAD_MODEL_LOCAL = True + ``` + in file `utils.py`. + ## API Endpoints - **POST /add_message**: Add a new message to REMO. Speaker, timestamp, and content required. diff --git a/docs/images/embedding_local_1.png b/docs/images/embedding_local_1.png new file mode 100644 index 0000000..c49cefe Binary files /dev/null and b/docs/images/embedding_local_1.png differ diff --git a/remo_memory/__init__.py b/remo_memory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/remo.py b/remo_memory/remo.py similarity index 72% rename from remo.py rename to remo_memory/remo.py index f10ad9f..3aae363 100644 --- a/remo.py +++ b/remo_memory/remo.py @@ -1,42 +1,55 @@ from fastapi import FastAPI import utils import os +import uvicorn app = FastAPI() root_folder = os.getcwd() -#root_folder = 'C:/raven_private/REMO/' +# root_folder = 'C:/raven_private/REMO/' max_cluster_size = 5 # REMO = Rolling Episodic Memory Organizer + @app.post("/add_message") -async def add_message(message: str, speaker: str, timestamp: float): +async def add_message( + message: str, + speaker: str, + timestamp: float, +): # Add message to REMO new_message = utils.create_message(message, speaker, timestamp) - print('\n\nADD MESSAGE -', new_message) + print("\n\nADD MESSAGE -", new_message) utils.save_message(root_folder, new_message) return {"detail": "Message added"} + @app.get("/search") async def search(query: str): # Search the tree for relevant nodes - print('\n\nSEARCH -', query) + print("\n\nSEARCH -", query) taxonomy = utils.search_tree(root_folder, query) return {"results": taxonomy} + @app.post("/rebuild_tree") async def rebuild_tree(): # Trigger full tree rebuilding event - print('\n\nREBUILD TREE') + print("\n\nREBUILD TREE") utils.rebuild_tree(root_folder, max_cluster_size) return {"detail": "Tree rebuilding completed"} + @app.post("/maintain_tree") async def maintain_tree(): # Trigger tree maintenance event - print('\n\nMAINTAIN TREE') + print("\n\nMAINTAIN TREE") utils.maintain_tree(root_folder) return {"detail": "Tree maintenance completed"} + + +if __name__ == '__main__': + uvicorn.run(app, host='0.0.0.0', port=8000) diff --git a/utils.py b/remo_memory/utils.py similarity index 63% rename from utils.py rename to remo_memory/utils.py index de0be3a..1ccd7be 100644 --- a/utils.py +++ b/remo_memory/utils.py @@ -1,4 +1,6 @@ import os +from pathlib import Path + import yaml import shutil import openai @@ -7,43 +9,66 @@ from typing import Dict, Any, List from sklearn.metrics.pairwise import cosine_similarity from sklearn.cluster import KMeans +import tensorflow as tf import tensorflow_hub as hub -embedding_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5") +ARE_YOU_TESTING__LOAD_MODEL_LOCAL = True + +ROOT_REPO_PATH = Path().parent.absolute() + +if ARE_YOU_TESTING__LOAD_MODEL_LOCAL: + embedding_model = tf.saved_model.load( + ROOT_REPO_PATH + / "models/universal-sentence-encoder-large_5" + ) + +else: + embedding_model = hub.load( + "https://tfhub.dev/google/universal-sentence-encoder-large/5" + ) def open_file(filepath): - with open(filepath, 'r', encoding='utf-8', errors='ignore') as infile: + with open(filepath, "r", encoding="utf-8", errors="ignore") as infile: return infile.read() + def save_file(filepath, content): - with open(filepath, 'w', encoding='utf-8') as outfile: + with open(filepath, "w", encoding="utf-8") as outfile: outfile.write(content) + def save_yaml(filepath, data): - with open(filepath, 'w', encoding='utf-8') as file: + with open(filepath, "w", encoding="utf-8") as file: yaml.dump(data, file, allow_unicode=True) + def load_yaml(filepath): - with open(filepath, 'r', encoding='utf-8') as file: + with open(filepath, "r", encoding="utf-8") as file: data = yaml.load(file, Loader=yaml.FullLoader) return data -def create_message(message: str, speaker: str, timestamp: float) -> Dict[str, Any]: + +def create_message( + message: str, speaker: str, timestamp: float +) -> Dict[str, Any]: # Create message dictionary return {"content": message, "speaker": speaker, "timestamp": timestamp} + def save_message(root_folder, message: Dict[str, Any]): - timestamp, speaker = message['timestamp'], message['speaker'] + timestamp, speaker = message["timestamp"], message["speaker"] filename = f"chat_{timestamp}_{speaker}.yaml" filepath = os.path.join(root_folder, "L1_raw_logs", filename) os.makedirs(os.path.dirname(filepath), exist_ok=True) save_yaml(filepath, message) + def search_tree(root_folder, query): # TODO add a "forks" parameter to allow for branching relevance - # TODO add a "fuzziness" parameter that can generate a random vector to modify the search query + # TODO add a "fuzziness" parameter + # that can generate a random vector to modify the search query query_embedding = embedding_model([query]).numpy() level = 6 taxonomy = [] @@ -55,13 +80,23 @@ def search_tree(root_folder, query): level -= 1 while level > 2: - level_files = [os.path.join(level_dir, f) for f in os.listdir(level_dir) if f.endswith(".yaml")] + level_files = [ + os.path.join(level_dir, f) + for f in os.listdir(level_dir) + if f.endswith(".yaml") + ] max_similarity = -1 closest_file = None for file in level_files: data = load_yaml(file) - similarity = cosine_similarity(query_embedding, np.array(data["vector"]).reshape(1, -1))[0][0] + similarity = ( + cosine_similarity( + query_embedding, + np.array(data["vector"]).reshape(1, -1), + ) + [0][0] + ) if similarity > max_similarity: max_similarity = similarity @@ -98,22 +133,34 @@ def rebuild_tree(root_folder: str, max_cluster_size: int = 10): process_missing_messages(root_folder) # Cluster L2 message pairs using cosine similarity, up to 10 per cluster - clusters = cluster_elements(root_folder, "L2_message_pairs", max_cluster_size) + clusters = cluster_elements( + root_folder, "L2_message_pairs", max_cluster_size + ) # Create summaries and save them in the next rank (L3_summaries) - create_summaries(root_folder, clusters, f"L3_summaries", "L2_message_pairs") + create_summaries( + root_folder, clusters, f"L3_summaries", "L2_message_pairs" + ) - # If top rank (e.g. L3_summaries) has > max_cluster_size files, repeat process, creating new taxonomical ranks + # If top rank (e.g. L3_summaries) has > max_cluster_size files, + # repeat process, creating new taxonomical ranks current_rank = 3 while True: # calculate clusters at new rank - clusters = cluster_elements(root_folder, f"L{current_rank}_summaries", max_cluster_size) - + clusters = cluster_elements( + root_folder, f"L{current_rank}_summaries", max_cluster_size + ) + # summarize those clusters - create_summaries(root_folder, clusters, f"L{current_rank + 1}_summaries", f"L{current_rank}_summaries") + create_summaries( + root_folder, + clusters, + f"L{current_rank + 1}_summaries", + f"L{current_rank}_summaries", + ) current_rank += 1 - - # if clusters less than max cluster size, we are done :) + + # if clusters less than max cluster size, we are done :) if len(clusters) <= max_cluster_size: break @@ -141,26 +188,34 @@ def process_missing_messages(root_folder: str): file1_data = load_yaml(file1_path) file2_data = load_yaml(file2_path) - context = file1_data['content'] - response = file2_data['content'] - speaker = file2_data['speaker'] - timestamp = file2_data['timestamp'] + context = file1_data["content"] + response = file2_data["content"] + speaker = file2_data["speaker"] + timestamp = file2_data["timestamp"] combined_text = context + " --- " + response embedding = embedding_model([combined_text]).numpy().tolist() message_pair_data = { - 'content': combined_text, - 'speaker': speaker, - 'timestamp': timestamp, - 'vector': embedding + "content": combined_text, + "speaker": speaker, + "timestamp": timestamp, + "vector": embedding, } # Save message pair in L2_message_pairs folder - message_pair_path = os.path.join(message_pairs_dir, message_pair_filename) + message_pair_path = os.path.join( + message_pairs_dir, message_pair_filename + ) save_yaml(message_pair_path, message_pair_data) -def create_summaries(root_folder: str, clusters: List[List[str]], target_folder: str, source_folder: str): + +def create_summaries( + root_folder: str, + clusters: List[List[str]], + target_folder: str, + source_folder: str, +): source_folder_path = os.path.join(root_folder, source_folder) target_folder_path = os.path.join(root_folder, target_folder) os.makedirs(target_folder_path, exist_ok=True) @@ -186,7 +241,7 @@ def create_summaries(root_folder: str, clusters: List[List[str]], target_folder: "content": summary, "vector": summary_embedding, "files": files, - "timestamp": time() + "timestamp": time(), } timestamp = time() @@ -195,9 +250,15 @@ def create_summaries(root_folder: str, clusters: List[List[str]], target_folder: save_yaml(summary_filepath, summary_data) -def cluster_elements(root_folder: str, target_folder: str, max_cluster_size: int = 10) -> List[List[str]]: +def cluster_elements( + root_folder: str, target_folder: str, max_cluster_size: int = 10 +) -> List[List[str]]: folder_path = os.path.join(root_folder, target_folder) - yaml_files = [f for f in os.listdir(folder_path) if f.endswith(".yaml")] + yaml_files = [ + f + for f in os.listdir(folder_path) + if f.endswith(".yaml") + ] # Load vectors vectors = [] @@ -219,84 +280,120 @@ def cluster_elements(root_folder: str, target_folder: str, max_cluster_size: int return clusters + def maintain_tree(root_folder: str): l2_message_pairs_dir = os.path.join(root_folder, "L2_message_pairs") - + # Create L2 directory if it does not exist if not os.path.exists(l2_message_pairs_dir): os.makedirs(l2_message_pairs_dir) - + # Get list of files in L2 before processing missing messages l2_files_before = set(os.listdir(l2_message_pairs_dir)) - + # Process missing messages to generate new message pairs in L2 process_missing_messages(root_folder) - + # Get list of files in L2 after processing missing messages l2_files_after = set(os.listdir(l2_message_pairs_dir)) - - # Calculate the difference between the two lists to obtain the new message pairs - new_message_pairs = l2_files_after - l2_files_before - #new_message_pairs = [os.path.join("L2_message_pairs", f) for f in l2_files_after - l2_files_before] - - # Iterate through new files in L2 and check cosine similarity to files in L3 - integrate_new_elements(root_folder, "L3_summaries", new_message_pairs, 0.75) -def integrate_new_elements(root_folder: str, target_folder: str, new_elements: List[str], threshold: float): + # Calculate the difference between the two lists + # to obtain the new message pairs + new_message_pairs = l2_files_after - l2_files_before + # new_message_pairs = [ + # os.path.join("L2_message_pairs", f) + # for f in l2_files_after - l2_files_before + # ] + + # Iterate through new files in L2 + # and check cosine similarity to files in L3 + integrate_new_elements( + root_folder, "L3_summaries", new_message_pairs, 0.75 + ) + + +def integrate_new_elements( + root_folder: str, + target_folder: str, + new_elements: List[str], + threshold: float, +): target_dir = os.path.join(root_folder, target_folder) - + # Create target directory if it does not exist if not os.path.exists(target_dir): os.makedirs(target_dir) - + for new_element in new_elements: - new_element_path = os.path.join(root_folder, "L2_message_pairs", new_element) + new_element_path = os.path.join( + root_folder, "L2_message_pairs", new_element + ) new_element_data = load_yaml(new_element_path) - new_element_vector = np.array(new_element_data["vector"]).reshape(1, -1) - + new_element_vector = ( + np.array(new_element_data["vector"]) + .reshape(1, -1) + ) + max_similarity = -1 closest_file = None - + for file in os.listdir(target_dir): file_path = os.path.join(target_dir, file) file_data = load_yaml(file_path) file_vector = np.array(file_data["vector"]).reshape(1, -1) + + similarity = ( + cosine_similarity(new_element_vector, file_vector) + [0][0] + ) - similarity = cosine_similarity(new_element_vector, file_vector)[0][0] if similarity > max_similarity: max_similarity = similarity closest_file = file - + if max_similarity > threshold: - # Update the corresponding summary and record the name of the modified file + # Update the corresponding summary + # and record the name of the modified file closest_file_path = os.path.join(target_dir, closest_file) closest_file_data = load_yaml(closest_file_path) closest_file_data["files"].append(new_element) - - combined_content = closest_file_data["content"] + " --- " + new_element_data["content"] + + combined_content = ( + closest_file_data["content"] + + " --- " + + new_element_data["content"] + ) updated_summary = quick_summarize(combined_content) - updated_summary_embedding = embedding_model([updated_summary]).numpy().tolist() - + updated_summary_embedding = ( + embedding_model([updated_summary]).numpy().tolist() + ) + closest_file_data["content"] = updated_summary closest_file_data["vector"] = updated_summary_embedding closest_file_data["timestamp"] = time() - + save_yaml(closest_file_path, closest_file_data) else: # Create a new summary for the new_element combined_content = new_element_data["content"] new_summary = quick_summarize(combined_content) - new_summary_embedding = embedding_model([new_summary]).numpy().tolist() - + new_summary_embedding = ( + embedding_model([new_summary]).numpy().tolist() + ) + new_summary_data = { "content": new_summary, "vector": new_summary_embedding, "files": [new_element], - "timestamp": time() + "timestamp": time(), } - - new_summary_filename = f"summary_{len(os.listdir(target_dir))}.yaml" - new_summary_filepath = os.path.join(target_dir, new_summary_filename) + + new_summary_filename = ( + f"summary_{len(os.listdir(target_dir))}.yaml" + ) + new_summary_filepath = os.path.join( + target_dir, new_summary_filename + ) save_yaml(new_summary_filepath, new_summary_data) @@ -304,19 +401,35 @@ def quick_summarize(text): max_chunk_size = 10000 if len(text) <= max_chunk_size: - prompt = 'Write a detailed summary of the following:\n\n%s\n\nDETAILED SUMMARY:' % text + prompt = ( + ( + "Write a detailed summary of the following:" + "\n\n%s" + "\n\nDETAILED SUMMARY:" + ) + % text + ) response = gpt3_completion(prompt) return response else: # Split the text into evenly sized chunks num_chunks = int(np.ceil(len(text) / max_chunk_size)) chunk_size = int(np.ceil(len(text) / num_chunks)) - text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)] + text_chunks = [ + text[i : i + chunk_size] for i in range(0, len(text), chunk_size) + ] # Summarize each chunk summaries = [] for chunk in text_chunks: - prompt = 'Write a detailed summary of the following:\n\n%s\n\nDETAILED SUMMARY:' % chunk + prompt = ( + ( + "Write a detailed summary of the following:" + "\n\n%s" + "\n\nDETAILED SUMMARY:" + ) + % chunk + ) response = gpt3_completion(prompt) summaries.append(response) @@ -325,12 +438,24 @@ def quick_summarize(text): return final_summary -def gpt3_completion(prompt, engine='text-davinci-003', temp=0.0, top_p=1.0, tokens=1000, freq_pen=0.0, pres_pen=0.0, stop=['asdfasdfasdf']): - openai.api_key = open_file('key_openai.txt') +def gpt3_completion( + prompt, + engine="text-davinci-003", + temp=0.0, + top_p=1.0, + tokens=1000, + freq_pen=0.0, + pres_pen=0.0, + stop=["asdfasdfasdf"], +): + openai.api_key = open_file("key_openai.txt") + max_retry = 5 retry = 0 - prompt = prompt.encode(encoding='ASCII',errors='ignore').decode() + prompt = prompt.encode(encoding="ASCII", errors="ignore").decode() + while True: + try: response = openai.Completion.create( engine=engine, @@ -340,18 +465,26 @@ def gpt3_completion(prompt, engine='text-davinci-003', temp=0.0, top_p=1.0, toke top_p=top_p, frequency_penalty=freq_pen, presence_penalty=pres_pen, - stop=stop) - text = response['choices'][0]['text'].strip() - #text = re.sub('[\r\n]+', '\n', text) - #text = re.sub('[\t ]+', ' ', text) - filename = '%s_gpt3.txt' % time() - if not os.path.exists('gpt3_logs'): - os.makedirs('gpt3_logs') - save_file('gpt3_logs/%s' % filename, prompt + '\n\n==========\n\n' + text) + stop=stop, + ) + text = response["choices"][0]["text"].strip() + # text = re.sub('[\r\n]+', '\n', text) + # text = re.sub('[\t ]+', ' ', text) + + filename = "%s_gpt3.txt" % time() + if not os.path.exists("gpt3_logs"): + os.makedirs("gpt3_logs") + + save_file( + "gpt3_logs/%s" % filename, + prompt + "\n\n==========\n\n" + text, + ) + return text + except Exception as oops: retry += 1 if retry >= max_retry: return "GPT3 error: %s" % oops - print('Error communicating with OpenAI:', oops) + print("Error communicating with OpenAI:", oops) sleep(1)