Skip to content

Commit a485225

Browse files
committed
wip commit
1 parent 257adbe commit a485225

File tree

7 files changed

+34
-34
lines changed

7 files changed

+34
-34
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ sentence-transformers
44
.tmp/*
55
!.tmp/prebuild.sh
66
node_modules
7+
venv

app/.dockerignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ myenv
22
.direnv
33
.envrc
44
__pycache__
5+
venv
6+
sentence-transformers

app/Dockerfile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ RUN apt-get update && apt-get install -y \
1212
git \
1313
&& rm -rf /var/lib/apt/lists/*
1414

15-
# Install Go for ARM architecture (latest supported version 1.21)
16-
RUN curl -OL https://golang.org/dl/go1.21.1.linux-arm64.tar.gz && \
17-
tar -C /usr/local -xzf go1.21.1.linux-arm64.tar.gz && \
18-
rm go1.21.1.linux-arm64.tar.gz
15+
# Install Go for x86 architecture (latest supported version 1.21)
16+
RUN curl -OL https://golang.org/dl/go1.21.1.linux-amd64.tar.gz && \
17+
tar -C /usr/local -xzf go1.21.1.linux-amd64.tar.gz && \
18+
rm go1.21.1.linux-amd64.tar.gz
1919

2020
# Set Go environment variables
2121
ENV PATH="/usr/local/go/bin:${PATH}"
@@ -42,11 +42,11 @@ RUN chmod +x .tmp/prebuild.sh
4242
# Expose port 5050 for the Flask application
4343
EXPOSE 5050
4444

45-
# Run test file
46-
RUN python test_intercom.py
45+
# # Run test file
46+
# RUN python test_intercom.py
4747

4848
# Define environment variable for Flask
4949
ENV FLASK_APP=app.py
5050

5151
# Run the application using uWSGI
52-
CMD ["uwsgi", "--http", "0.0.0.0:5050", "--wsgi-file", "app.py", "--callable", "app", "--processes", "4", "--threads", "2"]
52+
CMD ["uwsgi", "--lazy-apps", "--http", "0.0.0.0:5050", "--wsgi-file", "app.py", "--callable", "app", "--processes", "4", "--threads", "2"]

app/rag_system.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,14 @@ def embed_knowledge_base(self):
3030
def normalize_query(self, query):
3131
return query.lower().strip()
3232

33-
def get_query_embedding(self, query, use_cpu=True):
33+
def get_query_embedding(self, query):
3434
normalized_query = self.normalize_query(query)
3535
query_embedding = self.model.encode([normalized_query], convert_to_tensor=True)
36-
if use_cpu:
37-
query_embedding = query_embedding.cpu()
36+
query_embedding = query_embedding.cpu()
3837
return query_embedding
3938

40-
def get_doc_embeddings(self, use_cpu=True):
41-
if use_cpu:
42-
return self.doc_embeddings.cpu()
43-
return self.doc_embeddings
39+
def get_doc_embeddings(self):
40+
return self.doc_embeddings.cpu()
4441

4542
def compute_document_scores(self, query_embedding, doc_embeddings, high_match_threshold):
4643
text_similarities = cosine_similarity(query_embedding, doc_embeddings)[0]
@@ -66,12 +63,9 @@ def compute_document_scores(self, query_embedding, doc_embeddings, high_match_th
6663

6764
return result
6865

69-
def retrieve(self, query, similarity_threshold=0.4, high_match_threshold=0.8, max_docs=5, use_cpu=True):
70-
# Note: Set use_cpu=True to run on CPU, which is useful for testing or environments without a GPU.
71-
# Set use_cpu=False to leverage GPU for better performance in production.
72-
73-
query_embedding = self.get_query_embedding(query, use_cpu)
74-
doc_embeddings = self.get_doc_embeddings(use_cpu)
66+
def retrieve(self, query, similarity_threshold=0.4, high_match_threshold=0.8, max_docs=5):
67+
query_embedding = self.get_query_embedding(query)
68+
doc_embeddings = self.get_doc_embeddings()
7569

7670
doc_scores = self.compute_document_scores(query_embedding, doc_embeddings, high_match_threshold)
7771
retrieved_docs = self.get_top_docs(doc_scores, similarity_threshold, max_docs)
@@ -149,11 +143,11 @@ def answer_query_stream(self, query):
149143

150144
collected_messages = []
151145
for chunk in stream:
152-
if chunk['choices'][0]['finish_reason'] is not None:
153-
break
154146
content = chunk['choices'][0]['delta'].get('content', '')
155147
collected_messages.append(content)
156148
yield content
149+
if chunk['choices'][0].get('finish_reason') is not None:
150+
break
157151

158152
if len(citations) > 0:
159153
yield "\n\nReferences:\n" + "\n".join(citations)
@@ -193,3 +187,6 @@ def get_context(self, retrieved_docs):
193187
for doc in retrieved_docs:
194188
retrieved_text.append(f"{doc['about']}. {doc['text']}")
195189
return "\n\n".join(retrieved_text)
190+
191+
# # Instantiate the RAGSystem
192+
# rag_system = RAGSystem()

app/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ scikit-learn==1.2.2
55
segment-analytics-python==2.3.3
66
numpy==1.24.4
77
sentence-transformers==2.3.1
8-
torch==2.0.1
8+
--find-links https://download.pytorch.org/whl/cpu/torch_stable.html
9+
torch==2.0.1+cpu
910
huggingface_hub==0.15.1
1011
openai==0.28.0
1112
PyYAML==6.0.2

app/test_rag_system.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ def test_get_doc_embeddings(self):
5353
def test_retrieve_fallback(self):
5454
# test a query that should return the fallback response
5555
query = "Hello"
56-
# set use_cpu to True, as testing has no GPU calculations
57-
result = self.rag_system.retrieve(query, use_cpu=True)
56+
result = self.rag_system.retrieve(query)
5857
self.assertIsInstance(result, list)
5958
self.assertGreater(len(result), 0)
6059
self.assertEqual(len(result), 1) # should return one result for fallback
@@ -67,8 +66,7 @@ def test_retrieve_fallback(self):
6766
def test_retrieve_actual_response(self):
6867
# test a query that should return an actual response from the knowledge base
6968
query = "What is Defang?"
70-
# set use_cpu to True, as testing has no GPU calculations
71-
result = self.rag_system.retrieve(query, use_cpu=True)
69+
result = self.rag_system.retrieve(query)
7270
self.assertIsInstance(result, list)
7371
self.assertGreater(len(result), 0)
7472
self.assertLessEqual(len(result), 5) # should return up to max_docs (5)
@@ -80,9 +78,8 @@ def test_retrieve_actual_response(self):
8078

8179
def test_compute_document_scores(self):
8280
query = "Does Defang have an MCP sample?"
83-
# get embeddings and move them to CPU, as testing has no GPU calculations
84-
query_embedding = self.rag_system.get_query_embedding(query, use_cpu=True)
85-
doc_embeddings = self.rag_system.get_doc_embeddings(use_cpu=True)
81+
query_embedding = self.rag_system.get_query_embedding(query)
82+
doc_embeddings = self.rag_system.get_doc_embeddings()
8683

8784
# call function and get results
8885
result = self.rag_system.compute_document_scores(query_embedding, doc_embeddings, high_match_threshold=0.8)
@@ -105,4 +102,4 @@ def test_compute_document_scores(self):
105102
print("Test for compute_document_scores passed successfully!")
106103

107104
if __name__ == '__main__':
108-
unittest.main()
105+
unittest.main()

compose.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
services:
22
app:
33
restart: always
4-
domainname: ask.defang.io
4+
# domainname: ask.defang.io
55
x-defang-dns-role: arn:aws:iam::258338292852:role/dnsadmin-39a19c3
6+
platform: linux/amd64
67
build:
78
context: ./app
8-
shm_size: "30gb"
9+
dockerfile: Dockerfile
910
ports:
1011
- target: 5050
1112
published: 5050
@@ -21,10 +22,11 @@ services:
2122
SESSION_COOKIE_SECURE: 1
2223
OPENAI_API_KEY: ${OPENAI_API_KEY} # Set your OpenAI API key here or in the .env file
2324
OPENAI_BASE_URL: "http://llm/api/v1"
24-
MODEL: "anthropic.claude-3-haiku-20240307-v1:0"
25+
MODEL: "ai/claude3-haiku"
2526
INTERCOM_TOKEN:
2627
INTERCOM_ADMIN_ID:
2728
REDIS_URL: redis://redis:6379/0
29+
SENTENCE_TRANSFORMERS_HOME: /app/sentence-transformers
2830
deploy:
2931
resources:
3032
reservations:

0 commit comments

Comments
 (0)