Skip to content

Commit 0cf1cc6

Browse files
committed
Update Docker Compose configuration and refactor vectorstore_factory for improved readability and logging
- Change WEBAPP_WEBHOOK_HOST and URL_APP to use localhost for local development. - Update SOCKET_URL to use host.docker.internal for better compatibility. - Refactor vectorstore_factory function to enhance readability with consistent formatting and improved error handling. - Safely log Qdrant arguments to handle potential Unicode characters.
1 parent ffcb35e commit 0cf1cc6

File tree

2 files changed

+98
-30
lines changed

2 files changed

+98
-30
lines changed

agent-backend/src/vectorstores/factory.py

Lines changed: 94 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,23 @@
33
from models.vectordatabase import VectorDatabase
44
from langchain_core.embeddings import Embeddings
55

6-
def vectorstore_factory(embedding_model: Embeddings, collection_name: str, tool, type: str, api_key: str = None, url: str = None, namespace: str = None, byoVectorDb: bool = False):
6+
7+
def vectorstore_factory(
8+
embedding_model: Embeddings,
9+
collection_name: str,
10+
tool,
11+
type: str,
12+
api_key: str = None,
13+
url: str = None,
14+
namespace: str = None,
15+
byoVectorDb: bool = False,
16+
):
717
if byoVectorDb is False:
818
type = VectorDatabase.Pinecone
919
elif type is None:
10-
raise ValueError("Vector database type must be specified when byoVectorDb is True")
20+
raise ValueError(
21+
"Vector database type must be specified when byoVectorDb is True"
22+
)
1123

1224
match type:
1325
case VectorDatabase.Qdrant:
@@ -26,35 +38,51 @@ def vectorstore_factory(embedding_model: Embeddings, collection_name: str, tool,
2638
from qdrant_client.http.api_client import ApiClient, AsyncApiClient
2739
from httpx import AsyncClient, Client, Request, Response
2840
from typing import Callable, Awaitable, Any
41+
2942
Send = Callable[[Request], Response]
3043
SendAsync = Callable[[Request], Awaitable[Response]]
44+
3145
class BaseAsyncMiddleware:
32-
async def __call__(self, request: Request, call_next: SendAsync) -> Response:
46+
async def __call__(
47+
self, request: Request, call_next: SendAsync
48+
) -> Response:
3349
return await call_next(request)
50+
3451
class BaseMiddleware:
3552
def __call__(self, request: Request, call_next: Send) -> Response:
3653
return call_next(request)
54+
3755
def custom_init(cls, host: str = None, **kwargs: Any) -> None:
3856
cls.host = host
3957
cls.middleware: MiddlewareT = BaseMiddleware()
40-
if 'vector_name' in kwargs:
41-
kwargs.pop('vector_name')
42-
kwargs.pop('filter')
58+
if "vector_name" in kwargs:
59+
kwargs.pop("vector_name")
60+
kwargs.pop("filter")
4361
cls._client = Client(**kwargs)
62+
4463
ApiClient.__init__ = custom_init
64+
4565
def a_custom_imit(self, host: str = None, **kwargs: Any) -> None:
4666
self.host = host
4767
self.middleware: AsyncMiddlewareT = BaseAsyncMiddleware()
48-
if 'vector_name' in kwargs:
49-
kwargs.pop('vector_name')
50-
kwargs.pop('filter')
68+
if "vector_name" in kwargs:
69+
kwargs.pop("vector_name")
70+
kwargs.pop("filter")
5171
self._async_client = AsyncClient(**kwargs)
72+
5273
AsyncApiClient.__init__ = a_custom_imit
5374

5475
# monkey patching langchain for this to work properly with our Qdrant metadata layout (not nested)
5576
from langchain_community.vectorstores.qdrant import Qdrant
5677
from langchain_community.docstore.document import Document
57-
def custom_document_from_scored_point(cls, scored_point, collection_name, content_payload_key, metadata_payload_key):
78+
79+
def custom_document_from_scored_point(
80+
cls,
81+
scored_point,
82+
collection_name,
83+
content_payload_key,
84+
metadata_payload_key,
85+
):
5886
metadata = scored_point.payload or {}
5987
# Check if metadata is a dictionary and handle it appropriately
6088
if isinstance(metadata, dict):
@@ -70,7 +98,11 @@ def custom_document_from_scored_point(cls, scored_point, collection_name, conten
7098
page_content=scored_point.payload.get(content_payload_key),
7199
metadata=metadata,
72100
)
73-
Qdrant._document_from_scored_point = classmethod(custom_document_from_scored_point)
101+
102+
Qdrant._document_from_scored_point = classmethod(
103+
custom_document_from_scored_point
104+
)
105+
74106
def custom_visit_comparison(self, comparison):
75107
try:
76108
from qdrant_client.http import models as rest
@@ -93,23 +125,29 @@ def custom_visit_comparison(self, comparison):
93125
)
94126
else:
95127
kwargs = {comparison.comparator.value: comparison.value}
96-
return rest.FieldCondition(key=attribute, range=rest.Range(**kwargs))
128+
return rest.FieldCondition(
129+
key=attribute, range=rest.Range(**kwargs)
130+
)
131+
97132
QdrantTranslator.visit_comparison = custom_visit_comparison
98133

99134
# Create qdrant filters from tool ragFilters
100135
from tools.retrievers.filters import create_qdrant_filters
136+
101137
my_filters = create_qdrant_filters(tool.ragFilters)
102138

103139
# Monkey patch similarity_search_with_score_by_vector to inject our filters
104140
from typing import List, Optional, Tuple, Dict, Union
105141
from qdrant_client.conversions import common_types
142+
106143
DictFilter = Dict[str, Union[str, int, bool, dict, list]]
107144
MetadataFilter = Union[DictFilter, common_types.Filter]
145+
108146
def similarity_search_with_score_by_vector_with_filter(
109147
self,
110148
embedding: List[float],
111149
k: int = 4,
112-
filter: Optional[MetadataFilter] = None, # NOTE: unused
150+
filter: Optional[MetadataFilter] = None, # NOTE: unused
113151
search_params: Optional[common_types.SearchParams] = None,
114152
offset: int = 0,
115153
score_threshold: Optional[float] = None,
@@ -144,25 +182,50 @@ def similarity_search_with_score_by_vector_with_filter(
144182
)
145183
for result in results
146184
]
147-
Qdrant.similarity_search_with_score_by_vector = similarity_search_with_score_by_vector_with_filter
148-
print("Using arguments for Qdrant:", url, api_key, collection_name)
149-
185+
186+
Qdrant.similarity_search_with_score_by_vector = (
187+
similarity_search_with_score_by_vector_with_filter
188+
)
189+
# Safely log Qdrant arguments, handling potential Unicode characters
190+
try:
191+
safe_url = str(url) if url else "None"
192+
safe_api_key = str(api_key) if api_key else "None"
193+
safe_collection_name = (
194+
str(collection_name) if collection_name else "None"
195+
)
196+
print(
197+
f"Using arguments for Qdrant: {safe_url} {safe_api_key} {safe_collection_name}"
198+
)
199+
except UnicodeEncodeError as e:
200+
print(
201+
f"Using arguments for Qdrant: [URL: {'None' if url is None else 'Present'}] [API_KEY: {'None' if api_key is None else 'Present'}] [COLLECTION: {'None' if collection_name is None else 'Present'}] (Unicode encoding error: {e})"
202+
)
203+
150204
return Qdrant.from_existing_collection(
151205
embedding=embedding_model,
152206
path=None,
153207
collection_name=collection_name,
154208
# vector_name=embedding_model.model,
155-
url=url[:-5] if url and url.endswith(':6334') else url if url is not None else QDRANT_HOST,
156-
api_key=api_key
209+
url=(
210+
url[:-5]
211+
if url and url.endswith(":6334")
212+
else url if url is not None else QDRANT_HOST
213+
),
214+
api_key=api_key,
157215
)
158216
case VectorDatabase.Pinecone:
159217

160-
from langchain_community.vectorstores.pinecone import Pinecone, _import_pinecone,_is_pinecone_v3
218+
from langchain_community.vectorstores.pinecone import (
219+
Pinecone,
220+
_import_pinecone,
221+
_is_pinecone_v3,
222+
)
161223
from typing import List, Optional, Tuple, Dict, Union
162224
from langchain_community.docstore.document import Document
163225
from tools.retrievers.filters import create_pinecone_filters
164-
226+
165227
my_filters = create_pinecone_filters(tool.ragFilters)
228+
166229
def similarity_search_by_vector_with_score_with_filter(
167230
self,
168231
embedding: List[float],
@@ -188,24 +251,29 @@ def similarity_search_by_vector_with_score_with_filter(
188251
if self._text_key in metadata:
189252
text = metadata.pop(self._text_key)
190253
score = res["score"]
191-
docs.append((Document(page_content=text, metadata=metadata), score))
254+
docs.append(
255+
(Document(page_content=text, metadata=metadata), score)
256+
)
192257
else:
193258
logger.warning(
194259
f"Found document with no `{self._text_key}` key. Skipping."
195260
)
196261
return docs
197262

198-
Pinecone.similarity_search_by_vector_with_score = similarity_search_by_vector_with_score_with_filter
263+
Pinecone.similarity_search_by_vector_with_score = (
264+
similarity_search_by_vector_with_score_with_filter
265+
)
199266

200267
if api_key is None:
201268
from init.env_variables import HOSTED_PINECONE_API_KEY
202-
os.environ['PINECONE_API_KEY'] = HOSTED_PINECONE_API_KEY
269+
270+
os.environ["PINECONE_API_KEY"] = HOSTED_PINECONE_API_KEY
203271
else:
204-
os.environ['PINECONE_API_KEY'] = api_key
272+
os.environ["PINECONE_API_KEY"] = api_key
205273

206274
return Pinecone.from_existing_index(
207-
index_name=collection_name, #TODO: make customisable
275+
index_name=collection_name, # TODO: make customisable
208276
embedding=embedding_model,
209-
text_key="page_content", #TODO: check
277+
text_key="page_content", # TODO: check
210278
namespace=namespace,
211279
)

docker-compose.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ services:
7979
- AGENT_BACKEND_SOCKET_TOKEN=changeme
8080
- PROJECT_ID=${PROJECT_ID}
8181
- FROM_EMAIL_ADDRESS=noreply@agentcloud.dev
82-
- WEBAPP_WEBHOOK_HOST=http://webapp_next:3000
83-
- URL_APP=http://127.0.0.1:3000
82+
- WEBAPP_WEBHOOK_HOST=http://localhost:3000
83+
- URL_APP=http://localhost:3000
8484
- DEBUG=webapp:*
8585
- STRIPE_WEBHOOK_SECRET=
8686
- STRIPE_ACCOUNT_SECRET=
@@ -174,7 +174,7 @@ services:
174174
- LOCAL=True
175175
- MAX_THREADS=50
176176
- BASE_PATH=/app
177-
- SOCKET_URL=http://webapp_next:3000/
177+
- SOCKET_URL=http://host.docker.internal:3000
178178
- DB_URL=mongodb://docker_mongo:27017/test
179179
- REDIS_HOST=docker_redis
180180
- QDRANT_HOST=http://qdrant
@@ -224,7 +224,7 @@ services:
224224
- QDRANT_PORT=6334
225225
- REDIS_HOST=docker_redis
226226
- REDIS_PORT=6379
227-
- WEBAPP_HOST=webapp_next
227+
- WEBAPP_HOST=localhost
228228
- THREAD_PERCENTAGE_UTILISATION=0.8
229229
- USE_GPU=false
230230
- LOGGING_LEVEL=warn

0 commit comments

Comments
 (0)