@@ -182,56 +182,22 @@ async def local_search(
182182 ------
183183 TODO: Document any exceptions to expect.
184184 """
185- #################################### BEGIN PATCH ####################################
186- # TODO: remove the following patch that checks for a vector_store prior to v1 release
187- # TODO: this is a backwards compatibility patch that injects the default vector_store settings into the config if it is not present
188- # Only applicable in situations involving a local vector_store (lancedb). The general idea:
189- # if vector_store not in config:
190- # 1. assume user is running local if vector_store is not in config
191- # 2. insert default vector_store in config
192- # 3 .create lancedb vector_store instance
193- # 4. upload vector embeddings from the input dataframes to the vector_store
194- backwards_compatible = False
195- if not config .embeddings .vector_store :
196- backwards_compatible = True
197- from graphrag .query .input .loaders .dfs import store_entity_semantic_embeddings
198- from graphrag .vector_stores .lancedb import LanceDBVectorStore
199-
200- config .embeddings .vector_store = {
201- "type" : "lancedb" ,
202- "db_uri" : f"{ Path (config .storage .base_dir )} /lancedb" ,
203- "collection_name" : "entity_description_embeddings" ,
204- "overwrite" : True ,
205- }
206- _entities = read_indexer_entities (nodes , entities , community_level )
207- description_embedding_store = LanceDBVectorStore (
208- db_uri = config .embeddings .vector_store ["db_uri" ],
209- collection_name = config .embeddings .vector_store ["collection_name" ],
210- overwrite = config .embeddings .vector_store ["overwrite" ],
211- )
212- description_embedding_store .connect (
213- db_uri = config .embeddings .vector_store ["db_uri" ]
214- )
215- # dump embeddings from the entities list to the description_embedding_store
216- store_entity_semantic_embeddings (
217- entities = _entities , vectorstore = description_embedding_store
218- )
219- #################################### END PATCH ####################################
185+ config = _patch_vector_store (config , nodes , entities , community_level )
220186
221187 # TODO: update filepath of lancedb (if used) until the new config engine has been implemented
222188 # TODO: remove the type ignore annotations below once the new config engine has been refactored
223189 vector_store_type = config .embeddings .vector_store .get ("type" ) # type: ignore
224190 vector_store_args = config .embeddings .vector_store
225- if vector_store_type == VectorStoreType .LanceDB and not backwards_compatible :
191+ if vector_store_type == VectorStoreType .LanceDB :
226192 db_uri = config .embeddings .vector_store ["db_uri" ] # type: ignore
227193 lancedb_dir = Path (config .root_dir ).resolve () / db_uri
228194 vector_store_args ["db_uri" ] = str (lancedb_dir ) # type: ignore
229195
230196 reporter .info (f"Vector Store Args: { redact (vector_store_args )} " ) # type: ignore
231- if not backwards_compatible : # can remove this check and always set the description_embedding_store before v1 release
232- description_embedding_store = _get_embedding_description_store (
233- config_args = vector_store_args , # type: ignore
234- )
197+
198+ description_embedding_store = _get_embedding_description_store (
199+ config_args = vector_store_args , # type: ignore
200+ )
235201
236202 _entities = read_indexer_entities (nodes , entities , community_level )
237203 _covariates = read_indexer_covariates (covariates ) if covariates is not None else []
@@ -289,56 +255,22 @@ async def local_search_streaming(
289255 ------
290256 TODO: Document any exceptions to expect.
291257 """
292- #################################### BEGIN PATCH ####################################
293- # TODO: remove the following patch that checks for a vector_store prior to v1 release
294- # TODO: this is a backwards compatibility patch that injects the default vector_store settings into the config if it is not present
295- # Only applicable in situations involving a local vector_store (lancedb). The general idea:
296- # if vector_store not in config:
297- # 1. assume user is running local if vector_store is not in config
298- # 2. insert default vector_store in config
299- # 3 .create lancedb vector_store instance
300- # 4. upload vector embeddings from the input dataframes to the vector_store
301- backwards_compatible = False
302- if not config .embeddings .vector_store :
303- backwards_compatible = True
304- from graphrag .query .input .loaders .dfs import store_entity_semantic_embeddings
305- from graphrag .vector_stores .lancedb import LanceDBVectorStore
306-
307- config .embeddings .vector_store = {
308- "type" : "lancedb" ,
309- "db_uri" : f"{ Path (config .storage .base_dir )} /lancedb" ,
310- "collection_name" : "entity_description_embeddings" ,
311- "overwrite" : True ,
312- }
313- _entities = read_indexer_entities (nodes , entities , community_level )
314- description_embedding_store = LanceDBVectorStore (
315- db_uri = config .embeddings .vector_store ["db_uri" ],
316- collection_name = config .embeddings .vector_store ["collection_name" ],
317- overwrite = config .embeddings .vector_store ["overwrite" ],
318- )
319- description_embedding_store .connect (
320- db_uri = config .embeddings .vector_store ["db_uri" ]
321- )
322- # dump embeddings from the entities list to the description_embedding_store
323- store_entity_semantic_embeddings (
324- entities = _entities , vectorstore = description_embedding_store
325- )
326- #################################### END PATCH ####################################
258+ config = _patch_vector_store (config , nodes , entities , community_level )
327259
328260 # TODO: must update filepath of lancedb (if used) until the new config engine has been implemented
329261 # TODO: remove the type ignore annotations below once the new config engine has been refactored
330262 vector_store_type = config .embeddings .vector_store .get ("type" ) # type: ignore
331263 vector_store_args = config .embeddings .vector_store
332- if vector_store_type == VectorStoreType .LanceDB and not backwards_compatible :
264+ if vector_store_type == VectorStoreType .LanceDB :
333265 db_uri = config .embeddings .vector_store ["db_uri" ] # type: ignore
334266 lancedb_dir = Path (config .root_dir ).resolve () / db_uri
335267 vector_store_args ["db_uri" ] = str (lancedb_dir ) # type: ignore
336268
337269 reporter .info (f"Vector Store Args: { redact (vector_store_args )} " ) # type: ignore
338- if not backwards_compatible : # can remove this check and always set the description_embedding_store before v1 release
339- description_embedding_store = _get_embedding_description_store (
340- config_args = vector_store_args , # type: ignore
341- )
270+
271+ description_embedding_store = _get_embedding_description_store (
272+ conf_args = vector_store_args , # type: ignore
273+ )
342274
343275 _entities = read_indexer_entities (nodes , entities , community_level )
344276 _covariates = read_indexer_covariates (covariates ) if covariates is not None else []
@@ -368,13 +300,55 @@ async def local_search_streaming(
368300 yield stream_chunk
369301
370302
303+ def _patch_vector_store (
304+ config : GraphRagConfig ,
305+ nodes : pd .DataFrame ,
306+ entities : pd .DataFrame ,
307+ community_level : int ,
308+ ) -> GraphRagConfig :
309+ # TODO: remove the following patch that checks for a vector_store prior to v1 release
310+ # TODO: this is a backwards compatibility patch that injects the default vector_store settings into the config if it is not present
311+ # Only applicable in situations involving a local vector_store (lancedb). The general idea:
312+ # if vector_store not in config:
313+ # 1. assume user is running local if vector_store is not in config
314+ # 2. insert default vector_store in config
315+ # 3 .create lancedb vector_store instance
316+ # 4. upload vector embeddings from the input dataframes to the vector_store
317+ if not config .embeddings .vector_store :
318+ from graphrag .query .input .loaders .dfs import store_entity_semantic_embeddings
319+ from graphrag .vector_stores .lancedb import LanceDBVectorStore
320+
321+ config .embeddings .vector_store = {
322+ "type" : "lancedb" ,
323+ "db_uri" : f"{ Path (config .storage .base_dir )} /lancedb" ,
324+ "container_name" : "default" ,
325+ "overwrite" : True ,
326+ }
327+ description_embedding_store = LanceDBVectorStore (
328+ db_uri = config .embeddings .vector_store ["db_uri" ],
329+ collection_name = "default-entity-description" ,
330+ overwrite = config .embeddings .vector_store ["overwrite" ],
331+ )
332+ description_embedding_store .connect (
333+ db_uri = config .embeddings .vector_store ["db_uri" ]
334+ )
335+ # dump embeddings from the entities list to the description_embedding_store
336+ _entities = read_indexer_entities (nodes , entities , community_level )
337+ store_entity_semantic_embeddings (
338+ entities = _entities , vectorstore = description_embedding_store
339+ )
340+ return config
341+
342+
371343def _get_embedding_description_store (
372344 config_args : dict ,
373345):
374346 """Get the embedding description store."""
375347 vector_store_type = config_args ["type" ]
348+ collection_name = f"{ config_args ['container_name' ]} -entity-description"
376349 description_embedding_store = VectorStoreFactory .get_vector_store (
377- vector_store_type = vector_store_type , kwargs = config_args
350+ vector_store_type = vector_store_type ,
351+ kwargs = {** config_args , "collection_name" : collection_name },
378352 )
379353 description_embedding_store .connect (** config_args )
380354 return description_embedding_store
0 commit comments