diff --git a/reference architectures/RAG/RAG_CreateAndQuery.ipynb b/reference architectures/RAG/RAG_CreateAndQuery.ipynb
new file mode 100644
index 00000000..edd64099
--- /dev/null
+++ b/reference architectures/RAG/RAG_CreateAndQuery.ipynb
@@ -0,0 +1,419 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "61086d65-39a7-4feb-9204-4f04a468fd15",
+ "metadata": {},
+ "source": [
+ "First lets import the necessary libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25735dd7-5411-47d8-8e9d-4f4598e6faa0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import gradio as gr # for the chat interface\n",
+ "import boto3 # to read the files in the S3 Object Storage\n",
+ "from PyPDF2 import PdfReader # to open the pdfs and extract the texts\n",
+ "import requests # to do the API calls\n",
+ "import psycopg2 # to connect to postgreSQL\n",
+ "from psycopg2.extras import execute_values \n",
+ "from psycopg2 import OperationalError\n",
+ "from markdownify import markdownify as md # to open and split the markwdown files\n",
+ "import time # for the wait function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a113252-8886-4fb3-87d5-d6f925399714",
+ "metadata": {},
+ "source": [
+ "Then we set the variables and credentials needed"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6446c156-c826-4c57-8a84-79bb6c6e3ec6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# connection info for the S3 bucket which stores the documents to ingest\n",
+ "S3_endpoint = \"https://s3.gra.io.cloud.ovh.net/\" \n",
+ "S3_container_name = \"rag-knowledge-files\"\n",
+ "S3_secret_key = \"yourS3secretkey\"\n",
+ "S3_access_key = \"yourS3accesskey\"\n",
+ "\n",
+ "# connection info for PostgreSQL database with PGVector extension to store the embedddings\n",
+ "postgre_host = 'xxxx.database.cloud.ovh.net'\n",
+ "postgre_db = 'defaultdb'\n",
+ "postgre_port = '20184'\n",
+ "sslmode = \"require\"\n",
+ "postgre_password = \"xxxx\"\n",
+ "postgre_user = \"xxxx\"\n",
+ "\n",
+ "# connection info for AI endpoint models (LLM and Embedding)\n",
+ "ovhcloud_embedding_endpoint_url = 'https://multilingual-e5-base.endpoints.kepler.ai.cloud.ovh.net/api/text2vec'\n",
+ "ovhcloud_llm_endpoint_url = 'https://mixtral-8x22b-instruct-v01.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1/chat/completions'\n",
+ "ovhcloud_ai_endpoint_api_key = \"xxxx\"\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "94b6e15e-3e1e-4f07-97e7-2375952f3c7b",
+ "metadata": {},
+ "source": [
+ "Lets connect to the postgreSQL DB and initiate the vector database"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6eb58ea0-cb2d-4c8d-973e-d1bef4800872",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connect to PostgreSQL\n",
+ "try:\n",
+ " conn = psycopg2.connect(\n",
+ " dbname=\"defaultdb\",\n",
+ " user=postgre_user,\n",
+ " password=postgre_password,\n",
+ " host=postgre_host,\n",
+ " port=postgre_port,\n",
+ " sslmode=sslmode\n",
+ " )\n",
+ " print(\"Connection to PostgreSQL DB successful\")\n",
+ "except OperationalError as e:\n",
+ " print(f\"The error '{e}' occurred\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ecc49cc3-18af-463d-b1b9-37d45cca4215",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cur = conn.cursor()\n",
+ "# check if the pgvector extension exists, if not activate it. Without this Postgre will now accept vectors and you won’t be able to do vector similarity search\n",
+ "cur.execute(\"CREATE EXTENSION IF NOT EXISTS vector\")\n",
+ "conn.commit()\n",
+ "\n",
+ "# check if the embeddings table exists, if not create it\n",
+ "# embeddings table stores the embeddings but also the document name, the page number and the text itself\n",
+ "cur.execute(\"CREATE TABLE IF NOT EXISTS embeddings (id SERIAL PRIMARY KEY, document_name TEXT, page_number INT, text TEXT, embedding VECTOR)\")\n",
+ "conn.commit()\n",
+ "# we make sure the table is empty. This means every time we launch this job it will regenerate the full tabled based on all the documents in the object storage.\n",
+ "cur.execute(\"TRUNCATE TABLE embeddings;\")\n",
+ "conn.commit()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44cf9bd6-4c08-49e7-b7cf-3524ac7c201c",
+ "metadata": {},
+ "source": [
+ "The first dataset we will ingest in the knowledge base is list of PDF files containing the english \"Terms & Conditions\" for OVHcloud services. Those can be downloaded here https://www.ovhcloud.com/en-ie/terms-and-conditions/contracts/.\n",
+ "The documents have been uploaded into a S3 Object Storage bucket as PDF so we will connect to the S3 using boto3 (we could have mounted the S3 bucket in the notebook but this is also another way)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a72fb79f-e5d6-4573-a1e2-2e8afc65aa98",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connect to S3 and print the list of files\n",
+ "s3 = boto3.client('s3',\n",
+ " endpoint_url=S3_endpoint,\n",
+ " aws_access_key_id=S3_access_key,\n",
+ " aws_secret_access_key=S3_secret_key)\n",
+ "bucket = s3.list_objects(Bucket=S3_container_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0f1985a3-9ef0-4828-b1f1-1edced71ea46",
+ "metadata": {},
+ "source": [
+ "Now for each pdf in the bucket we will extract the text from each page. The text from each page will be sent to the embedding model to calculate the embedding vector. The vector is then inserted in the PGVector table including the text itself and some metadata."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6037e1cd-1e36-4ce0-9d99-f724b2b08deb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# only select the pdf files and then for each count the number of pages and display the nb\n",
+ "for obj in bucket['Contents']:\n",
+ " if obj['Key'].endswith('.pdf'):\n",
+ " # Download the PDF file\n",
+ " s3.download_file(S3_container_name, obj['Key'], 'temp.pdf')\n",
+ "\n",
+ " # Read the PDF file\n",
+ " with open('temp.pdf', 'rb') as file:\n",
+ " reader = PdfReader(file)\n",
+ " print(f\"Number of pages in {obj['Key']}:\", len(reader.pages))\n",
+ " # for each page, extract the text and send it to the embedding endpoint\n",
+ " for page_num in range(len(reader.pages)):\n",
+ " page = reader.pages[page_num]\n",
+ " text = page.extract_text()\n",
+ " # Generate embeddings\n",
+ " response = requests.post(ovhcloud_embedding_endpoint_url, json={\"text\": text}, headers={'Authorization': f'Bearer {ovhcloud_ai_endpoint_api_key}'})\n",
+ " embeddings = response.json()\n",
+ " data = [(obj['Key'], page_num, text, embeddings)]\n",
+ " # Insert embeddings into PostgreSQL\n",
+ " try:\n",
+ " execute_values(\n",
+ " cur, \n",
+ " \"INSERT INTO embeddings (document_name, page_number, text, embedding) VALUES %s\", \n",
+ " data\n",
+ " )\n",
+ " conn.commit()\n",
+ " except Exception as e:\n",
+ " print(f\"Error inserting data for {obj['Key']} page {page_num}: {e}\")\n",
+ " print(\"INSERT INTO embeddings (document_name, page_number, text, embedding) VALUES %s\" % data)\n",
+ " conn.rollback()\n",
+ " # 3 second wait in order now to reach API limit calls\n",
+ " time.sleep(3)\n",
+ "\n",
+ " # close the file and then delete the temp file\n",
+ " file.close()\n",
+ " # delete the temp file temp.pdf\n",
+ " os.remove('temp.pdf')\n",
+ "# check how many rows are in the embeddings table and print it\n",
+ "cur.execute(\"SELECT COUNT(*) FROM embeddings\")\n",
+ "print(\"DB now has\", cur.fetchone()[0], \" embeddings\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1fe7ed2-4437-4f72-a872-d6117a821f79",
+ "metadata": {},
+ "source": [
+ "Now that we have added all the \"Terms & Conditions\" documents from the S3 bucket files, let's add a different source of information : the OVHcloud documentation.\n",
+ "The documentation is hosted in this github (https://github.com/ovh/docs), as markdown files. Each page exists in several languages so we will take the english pages : guide.en-ie.md"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84da017a-3b72-494d-9252-2a17cf538790",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to get all 'guide.en-ie.md' files in a directory and its subdirectories\n",
+ "def get_guide_md_files(directory):\n",
+ " guide_files = []\n",
+ " for root, _, files in os.walk(directory):\n",
+ " for file in files:\n",
+ " if file == 'guide.en-ie.md':\n",
+ " guide_files.append(os.path.join(root, file))\n",
+ " return guide_files\n",
+ "\n",
+ "# Function to extract sections from markdown text\n",
+ "def extract_md_sections(md_text):\n",
+ " sections = []\n",
+ " current_section = []\n",
+ " lines = md_text.split('\\n')\n",
+ " \n",
+ " for line in lines:\n",
+ " if line.startswith('#'):\n",
+ " if current_section:\n",
+ " sections.append('\\n'.join(current_section))\n",
+ " current_section = []\n",
+ " current_section.append(line)\n",
+ " \n",
+ " if current_section:\n",
+ " sections.append('\\n'.join(current_section))\n",
+ " \n",
+ " return sections\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f35fddfd-32a7-4aa5-a32c-2a0820383e71",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Local directory containing markdown files\n",
+ "local_directory = 'docs/pages/'\n",
+ "\n",
+ "# Process 'guide.en-ie.md' files in the local directory\n",
+ "guide_files = get_guide_md_files(local_directory)\n",
+ "\n",
+ "for guide_file in guide_files:\n",
+ " with open(guide_file, 'r', encoding='utf-8') as file:\n",
+ " md_text = file.read()\n",
+ " \n",
+ " sections = extract_md_sections(md_text)\n",
+ " \n",
+ " for section_num, section_text in enumerate(sections):\n",
+ " # Add folder structure at the beginning of the text\n",
+ " folder_structure = os.path.relpath(guide_file, local_directory)\n",
+ " text_with_structure = f\"{folder_structure}\\n\\n{section_text}\"\n",
+ " \n",
+ " # Generate embeddings\n",
+ " response = requests.post(ovhcloud_embedding_endpoint_url, json={\"text\": text_with_structure}, headers={'Authorization': f'Bearer {ovhcloud_ai_endpoint_api_key}'})\n",
+ " embeddings = response.json()\n",
+ " \n",
+ " data = [(folder_structure, section_num, text_with_structure, embeddings)]\n",
+ " # Insert embeddings into PostgreSQL\n",
+ " try:\n",
+ " execute_values( \n",
+ " cur, \n",
+ " \"INSERT INTO embeddings (document_name, page_number, text, embedding) VALUES %s\", \n",
+ " data\n",
+ " )\n",
+ " conn.commit()\n",
+ " except Exception as e:\n",
+ " print(f\"Error inserting data for {guide_file} section {section_num}: {e}\")\n",
+ " conn.rollback()\n",
+ " \n",
+ " # 3-second wait to avoid reaching API rate limits\n",
+ " time.sleep(3)\n",
+ "\n",
+ "# Check the number of rows in the embeddings table\n",
+ "cur.execute(\"SELECT COUNT(*) FROM embeddings\")\n",
+ "print(\"DB now has\", cur.fetchone()[0], \" embeddings\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4bddce2-6fdc-4542-b235-9b6665bd0edf",
+ "metadata": {},
+ "source": [
+ "We are done adding documents in the DB so we close the connection."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48fe9e97-b536-4731-aef6-7748ae08857b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Close the connection\n",
+ "cur.close()\n",
+ "conn.close()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "618867a5-0a84-42ba-9460-4fec50f17d49",
+ "metadata": {},
+ "source": [
+ "Now that we have created the knowledge database, lets create a way to query it.\n",
+ "We will be using a chat interface from gradio. This chat interface needs a fonction that will be called each time a new message is sent by a user."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4795b131-e4b8-4a5a-8273-b0f91a89af7e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def rag_response(message, history):\n",
+ " print(history)\n",
+ " # Get the embeddings of the query text\n",
+ " response = requests.post(ovhcloud_embedding_endpoint_url, json={\"text\": message}, headers={'Authorization': f'Bearer {ovhcloud_ai_endpoint_api_key}'})\n",
+ " query_embedding = response.json()\n",
+ "\n",
+ " # Connect to the PostgreSQL db and run a similarity query on this embedding to get the top 5 most similar texts\n",
+ " with psycopg2.connect(dbname=postgre_db, user=postgre_user, password=postgre_password, host=postgre_host, port=postgre_port, sslmode=sslmode) as conn:\n",
+ " with conn.cursor() as cur:\n",
+ " cur.execute(\"SELECT document_name, page_number, embedding <-> %s::vector as distance, text FROM embeddings ORDER BY distance LIMIT 5\", (query_embedding,))\n",
+ " results = cur.fetchall()\n",
+ "\n",
+ " # Build the context string for the LLM model by concatenating the top 5 most relevant texts\n",
+ " system_prompt = \"You are a nice chatbot and you have to answer the user question based on the context provided below and no prior knowledge. If the answer was found in a context document, provide the document name and page number. \\n \"\n",
+ " system_prompt += ''.join(f\"\\n Document: {result[0]}, Page: {result[1]}, Text: {result[3]} \\n\" for result in results)\n",
+ " system_prompt += \"\"\n",
+ "\n",
+ " # Build the history with validation for role\n",
+ " valid_roles = {\"user\", \"assistant\", \"system\", \"tool\"}\n",
+ " messages = [{\"role\": \"system\", \"content\": system_prompt}] + [\n",
+ " {\"role\": role, \"content\": content} for role, content in history if role in valid_roles\n",
+ " ] + [{\"role\": \"user\", \"content\": message}]\n",
+ "\n",
+ " # Call the LLM model API with the user question and system prompt\n",
+ " response = requests.post(ovhcloud_llm_endpoint_url, json={\"max_tokens\": 512, \"messages\": messages, \"model\": \"Mixtral-8x22B-Instruct-v0.1\", \"temperature\": 0}, headers={\"Content-Type\": \"application/json\", \"Authorization\": f\"Bearer {ovhcloud_ai_endpoint_api_key}\"})\n",
+ "\n",
+ " # Handle response\n",
+ " return response.json()[\"choices\"][0][\"message\"][\"content\"] if response.status_code == 200 else f\"{response.status_code} {response.text}\"\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ddec6ed8-c41e-4789-8ed1-f83f2c014d1a",
+ "metadata": {},
+ "source": [
+ "Now lets launch the chat interface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "11e312a8-5ebe-4673-b39f-f9fb01c44172",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gr.ChatInterface(rag_response).launch(share=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "215fedfe",
+ "metadata": {},
+ "source": [
+ "You can test the fonction directly in the notebook :"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5824605b-42f2-4e5c-8b9c-8054869667df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "message=\"Customer is asking if in our AI Deploy service he can access to docker containers using SSH ?\"\n",
+ "history=\"\"\n",
+ "rag_response(message, history)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Conda",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/reference architectures/RAG/README.md b/reference architectures/RAG/README.md
new file mode 100644
index 00000000..02a19733
--- /dev/null
+++ b/reference architectures/RAG/README.md
@@ -0,0 +1 @@
+These are the files containing the code used in the Reference Architecture for Retrieval Augmented Generation posted in [OVHcloud Blog](https://blog.ovhcloud.com/reference-architecture-retrieval-augmented-generation-rag/)
\ No newline at end of file
diff --git a/reference architectures/RAG/generate_knowledge_base.py b/reference architectures/RAG/generate_knowledge_base.py
new file mode 100644
index 00000000..3e93f82f
--- /dev/null
+++ b/reference architectures/RAG/generate_knowledge_base.py
@@ -0,0 +1,177 @@
+import boto3
+from PyPDF2 import PdfReader
+import requests
+import psycopg2
+from psycopg2.extras import execute_values
+from psycopg2 import OperationalError
+import os
+from markdownify import markdownify as md
+import time # for the wait function
+
+# Variables that wont change much and are not sensitive
+S3_endpoint = "https://s3.gra.io.cloud.ovh.net/"
+S3_container_name = "rag-knowledge-files"
+ovhcloud_embedding_endpoint_url = 'https://multilingual-e5-base.endpoints.kepler.ai.cloud.ovh.net/api/text2vec'
+ovhcloud_llm_endpoint_url = 'https://mixtral-8x22b-instruct-v01.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1/chat/completions'
+postgre_host = 'postgresql-5840cdf3-oa2f926d2.database.cloud.ovh.net'
+postgre_db = 'defaultdb'
+postgre_port = '20184'
+sslmode = "require"
+
+# variables that could change and are sensitive which will be provided via environment variables
+S3_secret_key = os.getenv('S3_SECRET_KEY')
+S3_access_key = os.getenv('S3_ACCESS_KEY')
+ovhcloud_ai_endpoint_api_key = os.getenv('OVHCLOUD_AI_ENDPOINT_API_KEY')
+postgre_password = os.getenv("POSTGRE_PASSWORD")
+postgre_user = os.getenv("POSTGRE_USER")
+
+# Connect to PostgreSQL. If connection fails, it will raise an exception, print the error and the program will stop
+# we will use the default database
+try:
+ conn = psycopg2.connect(
+ dbname="defaultdb",
+ user=postgre_user,
+ password=postgre_password,
+ host=postgre_host,
+ port=postgre_port,
+ sslmode=sslmode
+ )
+ print("Connection to PostgreSQL DB successful")
+except OperationalError as e:
+ print(f"The error '{e}' occurred")
+cur = conn.cursor()
+
+# check if the pgvector extension exists, if not activate it. Without this Postgre will now accept vectors and you won’t be able to do vector similarity search
+cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
+conn.commit()
+
+# check if the embeddings table exists, if not create it
+# embeddings table stores the embeddings but also the document name, the page number and the text itself
+cur.execute("CREATE TABLE IF NOT EXISTS embeddings (id SERIAL PRIMARY KEY, document_name TEXT, page_number INT, text TEXT, embedding VECTOR)")
+conn.commit()
+# we make sure the table is empty. This means every time we launch this job it will regenerate the full tabled based on all the documents in the object storage.
+cur.execute("TRUNCATE TABLE embeddings;")
+conn.commit()
+
+# Connect to S3 and print the list of files
+s3 = boto3.client('s3',
+ endpoint_url=S3_endpoint,
+ aws_access_key_id=S3_access_key,
+ aws_secret_access_key=S3_secret_key)
+bucket = s3.list_objects(Bucket=S3_container_name)
+
+# only select the pdf files and then for each count the number of pages and display the nb
+for obj in bucket['Contents']:
+ if obj['Key'].endswith('.pdf'):
+ # Download the PDF file
+ s3.download_file(S3_container_name, obj['Key'], 'temp.pdf')
+
+ # Read the PDF file
+ with open('temp.pdf', 'rb') as file:
+ reader = PdfReader(file)
+ print(f"Number of pages in {obj['Key']}:", len(reader.pages))
+ # for each page, extract the text and send it to the embedding endpoint
+ for page_num in range(len(reader.pages)):
+ page = reader.pages[page_num]
+ text = page.extract_text()
+ # Generate embeddings
+ response = requests.post(ovhcloud_embedding_endpoint_url, json={"text": text}, headers={'Authorization': f'Bearer {ovhcloud_ai_endpoint_api_key}'})
+ embeddings = response.json()
+ data = [(obj['Key'], page_num, text, embeddings)]
+ # Insert embeddings into PostgreSQL
+ try:
+ execute_values(
+ cur,
+ "INSERT INTO embeddings (document_name, page_number, text, embedding) VALUES %s",
+ data
+ )
+ conn.commit()
+ except Exception as e:
+ print(f"Error inserting data for {obj['Key']} page {page_num}: {e}")
+ print("INSERT INTO embeddings (document_name, page_number, text, embedding) VALUES %s" % data)
+ conn.rollback()
+ # 3 second wait in order now to reach API limit calls
+ time.sleep(3)
+
+ # close the file and then delete the temp file
+ file.close()
+ # delete the temp file temp.pdf
+ os.remove('temp.pdf')
+# check how many rows are in the embeddings table and print it
+cur.execute("SELECT COUNT(*) FROM embeddings")
+print("PDF files added - DB now has", cur.fetchone()[0], " embeddings")
+
+# Function to get all 'guide.en-ie.md' files in a directory and its subdirectories
+def get_guide_md_files(directory):
+ guide_files = []
+ for root, _, files in os.walk(directory):
+ for file in files:
+ if file == 'guide.en-ie.md':
+ guide_files.append(os.path.join(root, file))
+ return guide_files
+
+
+# Function to extract sections from markdown text
+def extract_md_sections(md_text):
+ sections = []
+ current_section = []
+ lines = md_text.split('\n')
+
+ for line in lines:
+ if line.startswith('#'):
+ if current_section:
+ sections.append('\n'.join(current_section))
+ current_section = []
+ current_section.append(line)
+
+ if current_section:
+ sections.append('\n'.join(current_section))
+
+ return sections
+
+
+# Local directory containing markdown files
+local_directory = 'docs/pages/'
+
+# Process 'guide.en-ie.md' files in the local directory
+guide_files = get_guide_md_files(local_directory)
+
+for guide_file in guide_files:
+ with open(guide_file, 'r', encoding='utf-8') as file:
+ md_text = file.read()
+
+ sections = extract_md_sections(md_text)
+
+ for section_num, section_text in enumerate(sections):
+ # Add folder structure at the beginning of the text
+ folder_structure = os.path.relpath(guide_file, local_directory)
+ text_with_structure = f"{folder_structure}\n\n{section_text}"
+
+ # Generate embeddings
+ response = requests.post(ovhcloud_embedding_endpoint_url, json={"text": text_with_structure},
+ headers={'Authorization': f'Bearer {ovhcloud_ai_endpoint_api_key}'})
+ embeddings = response.json()
+
+ data = [(folder_structure, section_num, text_with_structure, embeddings)]
+ # Insert embeddings into PostgreSQL
+ try:
+ execute_values(
+ cur,
+ "INSERT INTO embeddings (document_name, page_number, text, embedding) VALUES %s",
+ data
+ )
+ conn.commit()
+ except Exception as e:
+ print(f"Error inserting data for {guide_file} section {section_num}: {e}")
+ conn.rollback()
+
+ # 3-second wait to avoid reaching API rate limits
+ time.sleep(3)
+
+# check how many rows are in the embeddings table and print it
+cur.execute("SELECT COUNT(*) FROM embeddings")
+print("Markdown Docs added - DB now has", cur.fetchone()[0], " embeddings")
+
+# Close the connection
+cur.close()
+conn.close()
diff --git a/reference architectures/RAG/query_knowledge_base.py b/reference architectures/RAG/query_knowledge_base.py
new file mode 100644
index 00000000..de7f2d4d
--- /dev/null
+++ b/reference architectures/RAG/query_knowledge_base.py
@@ -0,0 +1,48 @@
+import os
+import requests
+import psycopg2
+import gradio as gr
+
+# Fixed variables
+ovhcloud_embedding_endpoint_url = 'https://multilingual-e5-base.endpoints.kepler.ai.cloud.ovh.net/api/text2vec'
+ovhcloud_llm_endpoint_url = 'https://mixtral-8x22b-instruct-v01.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1/chat/completions'
+postgre_host = 'postgresql-5840cdf3-oa2f926d2.database.cloud.ovh.net'
+postgre_db = 'defaultdb'
+postgre_port = '20184'
+sslmode = "require"
+
+# sensitive variables that will be provided via environment variables
+ovhcloud_ai_endpoint_api_key = os.getenv('OVHCLOUD_AI_ENDPOINT_API_KEY')
+postgre_password = os.getenv("POSTGRE_PASSWORD")
+postgre_user = os.getenv("POSTGRE_USER")
+
+def rag_response(message, history):
+ # Get environment variables
+
+
+ # Get the embeddings of the query text
+ response = requests.post(ovhcloud_embedding_endpoint_url, json={"text": message}, headers={'Authorization': f'Bearer {ovhcloud_ai_endpoint_api_key}'})
+ query_embedding = response.json()
+
+ # Connect to the postgresql db and run a similarity query on this embedding to get the top 5 most similar texts
+ with psycopg2.connect(dbname=postgre_db, user=postgre_user, password=postgre_password, host=postgre_host, port=postgre_port, sslmode=sslmode) as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT document_name, page_number, embedding <-> %s::vector as distance, text FROM embeddings ORDER BY distance LIMIT 5", (query_embedding,))
+ results = cur.fetchall()
+
+ # Build the context string for the llm model by concatenating the top 5 most relevant texts
+ system_prompt = "You are nice chatbot and you have to answer the user question based on the context provided bellow and not prior knowledge. If the answer was found in a context document, list the document name and page number. \n "
+ system_prompt += ''.join(f"\n Document: {result[0]}, Page: {result[1]}, Text: {result[3]} \n" for result in results)
+ system_prompt += ""
+
+ # Build the history
+ messages = [{"role": "system", "content": system_prompt}] + [{"role": role, "content": content} for role, content in history] + [{"role": "user", "content": message}]
+
+ # Call the llm model api with the user question and system prompt
+ response = requests.post(ovhcloud_llm_endpoint_url, json={"max_tokens": 512, "messages": messages, "model": "Mixtral-8x22B-Instruct-v0.1", "temperature": 0}, headers={"Content-Type": "application/json", "Authorization": f"Bearer {ovhcloud_ai_endpoint_api_key}"})
+
+ # Handle response
+ return response.json()["choices"][0]["message"]["content"] if response.status_code == 200 else f"{response.status_code} {response.text}"
+
+# Launch the chat interface from gradio
+gr.ChatInterface(rag_response).launch(server_name="0.0.0.0", server_port=8080)
\ No newline at end of file
diff --git a/reference architectures/RAG/requirements-generate.txt b/reference architectures/RAG/requirements-generate.txt
new file mode 100644
index 00000000..a48dd58f
--- /dev/null
+++ b/reference architectures/RAG/requirements-generate.txt
@@ -0,0 +1,5 @@
+boto3
+requests
+PyPDF2
+psycopg2-binary
+markdownify
diff --git a/reference architectures/RAG/requirements-query.txt b/reference architectures/RAG/requirements-query.txt
new file mode 100644
index 00000000..9f5387fc
--- /dev/null
+++ b/reference architectures/RAG/requirements-query.txt
@@ -0,0 +1,3 @@
+requests
+psycopg2-binary
+gradio
\ No newline at end of file