33from models .vectordatabase import VectorDatabase
44from langchain_core .embeddings import Embeddings
55
6- def vectorstore_factory (embedding_model : Embeddings , collection_name : str , tool , type : str , api_key : str = None , url : str = None , namespace : str = None , byoVectorDb : bool = False ):
6+
7+ def vectorstore_factory (
8+ embedding_model : Embeddings ,
9+ collection_name : str ,
10+ tool ,
11+ type : str ,
12+ api_key : str = None ,
13+ url : str = None ,
14+ namespace : str = None ,
15+ byoVectorDb : bool = False ,
16+ ):
717 if byoVectorDb is False :
818 type = VectorDatabase .Pinecone
919 elif type is None :
10- raise ValueError ("Vector database type must be specified when byoVectorDb is True" )
20+ raise ValueError (
21+ "Vector database type must be specified when byoVectorDb is True"
22+ )
1123
1224 match type :
1325 case VectorDatabase .Qdrant :
@@ -26,35 +38,51 @@ def vectorstore_factory(embedding_model: Embeddings, collection_name: str, tool,
2638 from qdrant_client .http .api_client import ApiClient , AsyncApiClient
2739 from httpx import AsyncClient , Client , Request , Response
2840 from typing import Callable , Awaitable , Any
41+
2942 Send = Callable [[Request ], Response ]
3043 SendAsync = Callable [[Request ], Awaitable [Response ]]
44+
3145 class BaseAsyncMiddleware :
32- async def __call__ (self , request : Request , call_next : SendAsync ) -> Response :
46+ async def __call__ (
47+ self , request : Request , call_next : SendAsync
48+ ) -> Response :
3349 return await call_next (request )
50+
3451 class BaseMiddleware :
3552 def __call__ (self , request : Request , call_next : Send ) -> Response :
3653 return call_next (request )
54+
3755 def custom_init (cls , host : str = None , ** kwargs : Any ) -> None :
3856 cls .host = host
3957 cls .middleware : MiddlewareT = BaseMiddleware ()
40- if ' vector_name' in kwargs :
41- kwargs .pop (' vector_name' )
42- kwargs .pop (' filter' )
58+ if " vector_name" in kwargs :
59+ kwargs .pop (" vector_name" )
60+ kwargs .pop (" filter" )
4361 cls ._client = Client (** kwargs )
62+
4463 ApiClient .__init__ = custom_init
64+
4565 def a_custom_imit (self , host : str = None , ** kwargs : Any ) -> None :
4666 self .host = host
4767 self .middleware : AsyncMiddlewareT = BaseAsyncMiddleware ()
48- if ' vector_name' in kwargs :
49- kwargs .pop (' vector_name' )
50- kwargs .pop (' filter' )
68+ if " vector_name" in kwargs :
69+ kwargs .pop (" vector_name" )
70+ kwargs .pop (" filter" )
5171 self ._async_client = AsyncClient (** kwargs )
72+
5273 AsyncApiClient .__init__ = a_custom_imit
5374
5475 # monkey patching langchain for this to work properly with our Qdrant metadata layout (not nested)
5576 from langchain_community .vectorstores .qdrant import Qdrant
5677 from langchain_community .docstore .document import Document
57- def custom_document_from_scored_point (cls , scored_point , collection_name , content_payload_key , metadata_payload_key ):
78+
79+ def custom_document_from_scored_point (
80+ cls ,
81+ scored_point ,
82+ collection_name ,
83+ content_payload_key ,
84+ metadata_payload_key ,
85+ ):
5886 metadata = scored_point .payload or {}
5987 # Check if metadata is a dictionary and handle it appropriately
6088 if isinstance (metadata , dict ):
@@ -70,7 +98,11 @@ def custom_document_from_scored_point(cls, scored_point, collection_name, conten
7098 page_content = scored_point .payload .get (content_payload_key ),
7199 metadata = metadata ,
72100 )
73- Qdrant ._document_from_scored_point = classmethod (custom_document_from_scored_point )
101+
102+ Qdrant ._document_from_scored_point = classmethod (
103+ custom_document_from_scored_point
104+ )
105+
74106 def custom_visit_comparison (self , comparison ):
75107 try :
76108 from qdrant_client .http import models as rest
@@ -93,23 +125,29 @@ def custom_visit_comparison(self, comparison):
93125 )
94126 else :
95127 kwargs = {comparison .comparator .value : comparison .value }
96- return rest .FieldCondition (key = attribute , range = rest .Range (** kwargs ))
128+ return rest .FieldCondition (
129+ key = attribute , range = rest .Range (** kwargs )
130+ )
131+
97132 QdrantTranslator .visit_comparison = custom_visit_comparison
98133
99134 # Create qdrant filters from tool ragFilters
100135 from tools .retrievers .filters import create_qdrant_filters
136+
101137 my_filters = create_qdrant_filters (tool .ragFilters )
102138
103139 # Monkey patch similarity_search_with_score_by_vector to inject our filters
104140 from typing import List , Optional , Tuple , Dict , Union
105141 from qdrant_client .conversions import common_types
142+
106143 DictFilter = Dict [str , Union [str , int , bool , dict , list ]]
107144 MetadataFilter = Union [DictFilter , common_types .Filter ]
145+
108146 def similarity_search_with_score_by_vector_with_filter (
109147 self ,
110148 embedding : List [float ],
111149 k : int = 4 ,
112- filter : Optional [MetadataFilter ] = None , # NOTE: unused
150+ filter : Optional [MetadataFilter ] = None , # NOTE: unused
113151 search_params : Optional [common_types .SearchParams ] = None ,
114152 offset : int = 0 ,
115153 score_threshold : Optional [float ] = None ,
@@ -144,25 +182,50 @@ def similarity_search_with_score_by_vector_with_filter(
144182 )
145183 for result in results
146184 ]
147- Qdrant .similarity_search_with_score_by_vector = similarity_search_with_score_by_vector_with_filter
148- print ("Using arguments for Qdrant:" , url , api_key , collection_name )
149-
185+
186+ Qdrant .similarity_search_with_score_by_vector = (
187+ similarity_search_with_score_by_vector_with_filter
188+ )
189+ # Safely log Qdrant arguments, handling potential Unicode characters
190+ try :
191+ safe_url = str (url ) if url else "None"
192+ safe_api_key = str (api_key ) if api_key else "None"
193+ safe_collection_name = (
194+ str (collection_name ) if collection_name else "None"
195+ )
196+ print (
197+ f"Using arguments for Qdrant: { safe_url } { safe_api_key } { safe_collection_name } "
198+ )
199+ except UnicodeEncodeError as e :
200+ print (
201+ f"Using arguments for Qdrant: [URL: { 'None' if url is None else 'Present' } ] [API_KEY: { 'None' if api_key is None else 'Present' } ] [COLLECTION: { 'None' if collection_name is None else 'Present' } ] (Unicode encoding error: { e } )"
202+ )
203+
150204 return Qdrant .from_existing_collection (
151205 embedding = embedding_model ,
152206 path = None ,
153207 collection_name = collection_name ,
154208 # vector_name=embedding_model.model,
155- url = url [:- 5 ] if url and url .endswith (':6334' ) else url if url is not None else QDRANT_HOST ,
156- api_key = api_key
209+ url = (
210+ url [:- 5 ]
211+ if url and url .endswith (":6334" )
212+ else url if url is not None else QDRANT_HOST
213+ ),
214+ api_key = api_key ,
157215 )
158216 case VectorDatabase .Pinecone :
159217
160- from langchain_community .vectorstores .pinecone import Pinecone , _import_pinecone ,_is_pinecone_v3
218+ from langchain_community .vectorstores .pinecone import (
219+ Pinecone ,
220+ _import_pinecone ,
221+ _is_pinecone_v3 ,
222+ )
161223 from typing import List , Optional , Tuple , Dict , Union
162224 from langchain_community .docstore .document import Document
163225 from tools .retrievers .filters import create_pinecone_filters
164-
226+
165227 my_filters = create_pinecone_filters (tool .ragFilters )
228+
166229 def similarity_search_by_vector_with_score_with_filter (
167230 self ,
168231 embedding : List [float ],
@@ -188,24 +251,29 @@ def similarity_search_by_vector_with_score_with_filter(
188251 if self ._text_key in metadata :
189252 text = metadata .pop (self ._text_key )
190253 score = res ["score" ]
191- docs .append ((Document (page_content = text , metadata = metadata ), score ))
254+ docs .append (
255+ (Document (page_content = text , metadata = metadata ), score )
256+ )
192257 else :
193258 logger .warning (
194259 f"Found document with no `{ self ._text_key } ` key. Skipping."
195260 )
196261 return docs
197262
198- Pinecone .similarity_search_by_vector_with_score = similarity_search_by_vector_with_score_with_filter
263+ Pinecone .similarity_search_by_vector_with_score = (
264+ similarity_search_by_vector_with_score_with_filter
265+ )
199266
200267 if api_key is None :
201268 from init .env_variables import HOSTED_PINECONE_API_KEY
202- os .environ ['PINECONE_API_KEY' ] = HOSTED_PINECONE_API_KEY
269+
270+ os .environ ["PINECONE_API_KEY" ] = HOSTED_PINECONE_API_KEY
203271 else :
204- os .environ [' PINECONE_API_KEY' ] = api_key
272+ os .environ [" PINECONE_API_KEY" ] = api_key
205273
206274 return Pinecone .from_existing_index (
207- index_name = collection_name , # TODO: make customisable
275+ index_name = collection_name , # TODO: make customisable
208276 embedding = embedding_model ,
209- text_key = "page_content" , # TODO: check
277+ text_key = "page_content" , # TODO: check
210278 namespace = namespace ,
211279 )
0 commit comments