1919def filter_docstore (
2020 loaded_embeddings : VectorStore ,
2121 cli_kwargs : dict ,
22- ) -> Tuple [ VectorStore , bytes ] :
22+ ) -> VectorStore :
2323 if "filter_metadata" in cli_kwargs :
2424 filter_meta = create_metadata_filter (
2525 loaded_embeddings = loaded_embeddings ,
@@ -59,14 +59,15 @@ def filter_cont(cont: str) -> bool:
5959 logger .warning ("Your filter matched all stored documents!" )
6060 assert good , "No documents in the vectorstore match the given filter"
6161
62- # directly remove the filtered documents from the docstore
63- # but first store the docstore before altering it to allow
64- # unfiltering in the prompt
65- start_time = time .time ()
66- unfiltered_docstore_bytes = loaded_embeddings .serialize_to_bytes ()
67- serialize_time = time .time () - start_time
68- logger .debug (f"Serializing unfiltered docstore took { serialize_time :.3f} seconds" )
62+ # commented because it's taking quite long
63+ # # first store the docstore before altering it to allow
64+ # # unfiltering in the prompt
65+ # start_time = time.time()
66+ # unfiltered_docstore_bytes = loaded_embeddings.serialize_to_bytes()
67+ # serialize_time = time.time() - start_time
68+ # logger.debug(f"Serializing unfiltered docstore took {serialize_time:.3f} seconds")
6969
70+ # directly remove the filtered documents from the docstore
7071 start_time = time .time ()
7172 status = loaded_embeddings .delete (ids_to_del )
7273 delete_time = time .time () - start_time
@@ -87,7 +88,7 @@ def filter_cont(cont: str) -> bool:
8788 loaded_embeddings .index_to_docstore_id
8889 ), "Something went wrong when deleting filtered out documents"
8990
90- return loaded_embeddings , unfiltered_docstore_bytes
91+ return loaded_embeddings
9192
9293
9394def create_metadata_filter (
0 commit comments