Update Docker Compose configuration and refactor vectorstore_factory for improved readability and logging

iandjx · iandjx · commit 0cf1cc630e80 · 2025-07-21T10:02:59.000+08:00
- Change WEBAPP_WEBHOOK_HOST and URL_APP to use localhost for local development.
- Update SOCKET_URL to use host.docker.internal for better compatibility.
- Refactor vectorstore_factory function to enhance readability with consistent formatting and improved error handling.
- Safely log Qdrant arguments to handle potential Unicode characters.
diff --git a/agent-backend/src/vectorstores/factory.py b/agent-backend/src/vectorstores/factory.py
@@ -3,11 +3,23 @@
 from models.vectordatabase import VectorDatabase
 from langchain_core.embeddings import Embeddings
 
-def vectorstore_factory(embedding_model: Embeddings, collection_name: str, tool, type: str, api_key: str = None, url: str = None, namespace: str = None, byoVectorDb: bool = False):
+
+def vectorstore_factory(
+    embedding_model: Embeddings,
+    collection_name: str,
+    tool,
+    type: str,
+    api_key: str = None,
+    url: str = None,
+    namespace: str = None,
+    byoVectorDb: bool = False,
+):
     if byoVectorDb is False:
         type = VectorDatabase.Pinecone
     elif type is None:
-        raise ValueError("Vector database type must be specified when byoVectorDb is True")
+        raise ValueError(
+            "Vector database type must be specified when byoVectorDb is True"
+        )
 
     match type:
         case VectorDatabase.Qdrant:
@@ -26,35 +38,51 @@ def vectorstore_factory(embedding_model: Embeddings, collection_name: str, tool,
             from qdrant_client.http.api_client import ApiClient, AsyncApiClient
             from httpx import AsyncClient, Client, Request, Response
             from typing import Callable, Awaitable, Any
+
             Send = Callable[[Request], Response]
             SendAsync = Callable[[Request], Awaitable[Response]]
+
             class BaseAsyncMiddleware:
-                async def __call__(self, request: Request, call_next: SendAsync) -> Response:
+                async def __call__(
+                    self, request: Request, call_next: SendAsync
+                ) -> Response:
                     return await call_next(request)
+
             class BaseMiddleware:
                 def __call__(self, request: Request, call_next: Send) -> Response:
                     return call_next(request)
+
             def custom_init(cls, host: str = None, **kwargs: Any) -> None:
                 cls.host = host
                 cls.middleware: MiddlewareT = BaseMiddleware()
-                if 'vector_name' in kwargs:
-                    kwargs.pop('vector_name')
-                    kwargs.pop('filter')
+                if "vector_name" in kwargs:
+                    kwargs.pop("vector_name")
+                    kwargs.pop("filter")
                 cls._client = Client(**kwargs)
+
             ApiClient.__init__ = custom_init
+
             def a_custom_imit(self, host: str = None, **kwargs: Any) -> None:
                 self.host = host
                 self.middleware: AsyncMiddlewareT = BaseAsyncMiddleware()
-                if 'vector_name' in kwargs:
-                    kwargs.pop('vector_name')
-                    kwargs.pop('filter')
+                if "vector_name" in kwargs:
+                    kwargs.pop("vector_name")
+                    kwargs.pop("filter")
                 self._async_client = AsyncClient(**kwargs)
+
             AsyncApiClient.__init__ = a_custom_imit
 
             # monkey patching langchain for this to work properly with our Qdrant metadata layout (not nested)
             from langchain_community.vectorstores.qdrant import Qdrant
             from langchain_community.docstore.document import Document
-            def custom_document_from_scored_point(cls, scored_point, collection_name, content_payload_key, metadata_payload_key):
+
+            def custom_document_from_scored_point(
+                cls,
+                scored_point,
+                collection_name,
+                content_payload_key,
+                metadata_payload_key,
+            ):
                 metadata = scored_point.payload or {}
                 # Check if metadata is a dictionary and handle it appropriately
                 if isinstance(metadata, dict):
@@ -70,7 +98,11 @@ def custom_document_from_scored_point(cls, scored_point, collection_name, conten
                     page_content=scored_point.payload.get(content_payload_key),
                     metadata=metadata,
                 )
-            Qdrant._document_from_scored_point = classmethod(custom_document_from_scored_point)
+
+            Qdrant._document_from_scored_point = classmethod(
+                custom_document_from_scored_point
+            )
+
             def custom_visit_comparison(self, comparison):
                 try:
                     from qdrant_client.http import models as rest
@@ -93,23 +125,29 @@ def custom_visit_comparison(self, comparison):
                     )
                 else:
                     kwargs = {comparison.comparator.value: comparison.value}
-                    return rest.FieldCondition(key=attribute, range=rest.Range(**kwargs))
+                    return rest.FieldCondition(
+                        key=attribute, range=rest.Range(**kwargs)
+                    )
+
             QdrantTranslator.visit_comparison = custom_visit_comparison
 
             # Create qdrant filters from tool ragFilters
             from tools.retrievers.filters import create_qdrant_filters
+
             my_filters = create_qdrant_filters(tool.ragFilters)
 
             # Monkey patch similarity_search_with_score_by_vector to inject our filters
             from typing import List, Optional, Tuple, Dict, Union
             from qdrant_client.conversions import common_types
+
             DictFilter = Dict[str, Union[str, int, bool, dict, list]]
             MetadataFilter = Union[DictFilter, common_types.Filter]
+
             def similarity_search_with_score_by_vector_with_filter(
                 self,
                 embedding: List[float],
                 k: int = 4,
-                filter: Optional[MetadataFilter] = None, # NOTE: unused
+                filter: Optional[MetadataFilter] = None,  # NOTE: unused
                 search_params: Optional[common_types.SearchParams] = None,
                 offset: int = 0,
                 score_threshold: Optional[float] = None,
@@ -144,25 +182,50 @@ def similarity_search_with_score_by_vector_with_filter(
                     )
                     for result in results
                 ]
-            Qdrant.similarity_search_with_score_by_vector = similarity_search_with_score_by_vector_with_filter
-            print("Using arguments for Qdrant:", url, api_key, collection_name)
-            
+
+            Qdrant.similarity_search_with_score_by_vector = (
+                similarity_search_with_score_by_vector_with_filter
+            )
+            # Safely log Qdrant arguments, handling potential Unicode characters
+            try:
+                safe_url = str(url) if url else "None"
+                safe_api_key = str(api_key) if api_key else "None"
+                safe_collection_name = (
+                    str(collection_name) if collection_name else "None"
+                )
+                print(
+                    f"Using arguments for Qdrant: {safe_url} {safe_api_key} {safe_collection_name}"
+                )
+            except UnicodeEncodeError as e:
+                print(
+                    f"Using arguments for Qdrant: [URL: {'None' if url is None else 'Present'}] [API_KEY: {'None' if api_key is None else 'Present'}] [COLLECTION: {'None' if collection_name is None else 'Present'}] (Unicode encoding error: {e})"
+                )
+
             return Qdrant.from_existing_collection(
                 embedding=embedding_model,
                 path=None,
                 collection_name=collection_name,
                 # vector_name=embedding_model.model,
-                url=url[:-5] if url and url.endswith(':6334') else url if url is not None else QDRANT_HOST,
-                api_key=api_key
+                url=(
+                    url[:-5]
+                    if url and url.endswith(":6334")
+                    else url if url is not None else QDRANT_HOST
+                ),
+                api_key=api_key,
             )
         case VectorDatabase.Pinecone:
 
-            from langchain_community.vectorstores.pinecone import Pinecone, _import_pinecone,_is_pinecone_v3
+            from langchain_community.vectorstores.pinecone import (
+                Pinecone,
+                _import_pinecone,
+                _is_pinecone_v3,
+            )
             from typing import List, Optional, Tuple, Dict, Union
             from langchain_community.docstore.document import Document
             from tools.retrievers.filters import create_pinecone_filters
-            
+
             my_filters = create_pinecone_filters(tool.ragFilters)
+
             def similarity_search_by_vector_with_score_with_filter(
                 self,
                 embedding: List[float],
@@ -188,24 +251,29 @@ def similarity_search_by_vector_with_score_with_filter(
                     if self._text_key in metadata:
                         text = metadata.pop(self._text_key)
                         score = res["score"]
-                        docs.append((Document(page_content=text, metadata=metadata), score))
+                        docs.append(
+                            (Document(page_content=text, metadata=metadata), score)
+                        )
                     else:
                         logger.warning(
                             f"Found document with no `{self._text_key}` key. Skipping."
                         )
                 return docs
 
-            Pinecone.similarity_search_by_vector_with_score = similarity_search_by_vector_with_score_with_filter
+            Pinecone.similarity_search_by_vector_with_score = (
+                similarity_search_by_vector_with_score_with_filter
+            )
 
             if api_key is None:
                 from init.env_variables import HOSTED_PINECONE_API_KEY
-                os.environ['PINECONE_API_KEY'] = HOSTED_PINECONE_API_KEY
+
+                os.environ["PINECONE_API_KEY"] = HOSTED_PINECONE_API_KEY
             else:
-                os.environ['PINECONE_API_KEY'] = api_key
+                os.environ["PINECONE_API_KEY"] = api_key
 
             return Pinecone.from_existing_index(
-                index_name=collection_name, #TODO: make customisable
+                index_name=collection_name,  # TODO: make customisable
                 embedding=embedding_model,
-                text_key="page_content", #TODO: check
+                text_key="page_content",  # TODO: check
                 namespace=namespace,
             )
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -79,8 +79,8 @@ services:
       - AGENT_BACKEND_SOCKET_TOKEN=changeme
       - PROJECT_ID=${PROJECT_ID}
       - FROM_EMAIL_ADDRESS=noreply@agentcloud.dev
-      - WEBAPP_WEBHOOK_HOST=http://webapp_next:3000
-      - URL_APP=http://127.0.0.1:3000
+      - WEBAPP_WEBHOOK_HOST=http://localhost:3000
+      - URL_APP=http://localhost:3000
       - DEBUG=webapp:*
       - STRIPE_WEBHOOK_SECRET=
       - STRIPE_ACCOUNT_SECRET=
@@ -174,7 +174,7 @@ services:
       - LOCAL=True
       - MAX_THREADS=50
       - BASE_PATH=/app
-      - SOCKET_URL=http://webapp_next:3000/
+      - SOCKET_URL=http://host.docker.internal:3000
       - DB_URL=mongodb://docker_mongo:27017/test
       - REDIS_HOST=docker_redis
       - QDRANT_HOST=http://qdrant
@@ -224,7 +224,7 @@ services:
       - QDRANT_PORT=6334
       - REDIS_HOST=docker_redis
       - REDIS_PORT=6379
-      - WEBAPP_HOST=webapp_next
+      - WEBAPP_HOST=localhost
       - THREAD_PERCENTAGE_UTILISATION=0.8
       - USE_GPU=false
       - LOGGING_LEVEL=warn