diff --git a/app/Dockerfile b/app/Dockerfile index 1c4a434..dd377da 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -34,10 +34,12 @@ COPY requirements.txt /app/ RUN pip install --no-cache-dir -r requirements.txt # Set the environment variable for the sentence transformers model -ENV SENTENCE_TRANSFORMERS_HOME="/app/sentence-transformers" +ENV SENTENCE_TRANSFORMERS_HOME="/root/.cache/sentence_transformers" + +COPY ./embeddings.py /app/embeddings.py # Preload the sentence transformer model to cache -RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" +RUN python embeddings.py # Copy the application source code into the container COPY . /app @@ -49,4 +51,4 @@ EXPOSE 5050 ENV FLASK_APP=app.py # Run the application using uWSGI -CMD ["uwsgi", "--lazy-apps", "--http", "0.0.0.0:5050", "--wsgi-file", "app.py", "--callable", "app", "--processes", "4", "--threads", "2"] +CMD ["uwsgi", "--http", "0.0.0.0:5050", "--wsgi-file", "app.py", "--callable", "app", "--processes", "4", "--threads", "2"] diff --git a/app/embeddings.py b/app/embeddings.py new file mode 100644 index 0000000..5d93b00 --- /dev/null +++ b/app/embeddings.py @@ -0,0 +1,15 @@ +import os +from sentence_transformers import SentenceTransformer + +def load_model(): + """ + Load the SentenceTransformer model from the specified path. + The model path is determined by the SENTENCE_TRANSFORMERS_HOME environment variable. + """ + # model_path = os.getenv("SENTENCE_TRANSFORMERS_HOME", "./models/sentence-transformers") + # model = SentenceTransformer(f"{model_path}/models--sentence-transformers--all-MiniLM-L6-v2") + model = SentenceTransformer("all-MiniLM-L6-v2") + return model + +if __name__ == "__main__": + load_model() diff --git a/app/rag_system.py b/app/rag_system.py index a6b88a5..e8930fd 100644 --- a/app/rag_system.py +++ b/app/rag_system.py @@ -6,6 +6,7 @@ from sentence_transformers import SentenceTransformer import numpy as np from sklearn.metrics.pairwise import cosine_similarity +from embeddings import load_model import traceback openai.api_base = os.getenv("OPENAI_BASE_URL") @@ -15,7 +16,7 @@ class RAGSystem: def __init__(self, knowledge_base_path='./data/knowledge_base.json'): self.knowledge_base_path = knowledge_base_path self.knowledge_base = self.load_knowledge_base() - self.model = SentenceTransformer('all-MiniLM-L6-v2') + self.model = load_model() self.doc_embeddings = self.embed_knowledge_base() self.conversation_history = [] diff --git a/compose.dev.yaml b/compose.dev.yaml index d6dc813..3fda574 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -16,7 +16,6 @@ services: SEGMENT_WRITE_KEY: ${SEGMENT_WRITE_KEY} # Set your Segment write key here or in the .env file SESSION_COOKIE_SECURE: 0 OPENAI_BASE_URL: "http://llm:5051/api/v1" - SENTENCE_TRANSFORMERS_HOME: /app/sentence-transformers volumes: - type: bind source: ./app diff --git a/compose.yaml b/compose.yaml index fdb4f1c..a5b8726 100644 --- a/compose.yaml +++ b/compose.yaml @@ -26,7 +26,6 @@ services: INTERCOM_TOKEN: INTERCOM_ADMIN_ID: REDIS_URL: redis://redis:6379/0 - SENTENCE_TRANSFORMERS_HOME: /app/sentence-transformers deploy: resources: reservations: