@@ -459,21 +459,24 @@ def _node_batcher(
459459 yield nodes [i : i + batch_size ]
460460
461461 def _update_docstore (
462- self , nodes : Sequence [BaseNode ], store_doc_text : bool = True
462+ self ,
463+ nodes : Sequence [BaseNode ],
464+ effective_strategy : DocstoreStrategy ,
465+ store_doc_text : bool = True ,
463466 ) -> None :
464467 """Update the document store with the given nodes."""
465468 assert self .docstore is not None
466469
467- if self . docstore_strategy in (
470+ if effective_strategy in (
468471 DocstoreStrategy .UPSERTS ,
469472 DocstoreStrategy .UPSERTS_AND_DELETE ,
470473 ):
471474 self .docstore .set_document_hashes ({n .id_ : n .hash for n in nodes })
472475 self .docstore .add_documents (nodes , store_text = store_doc_text )
473- elif self . docstore_strategy == DocstoreStrategy .DUPLICATES_ONLY :
476+ elif effective_strategy == DocstoreStrategy .DUPLICATES_ONLY :
474477 self .docstore .add_documents (nodes , store_text = store_doc_text )
475478 else :
476- raise ValueError (f"Invalid docstore strategy: { self . docstore_strategy } " )
479+ raise ValueError (f"Invalid docstore strategy: { effective_strategy } " )
477480
478481 @dispatcher .span
479482 def run (
@@ -511,30 +514,34 @@ def run(
511514 """
512515 input_nodes = self ._prepare_inputs (documents , nodes )
513516
517+ effective_strategy = self .docstore_strategy
518+ if (
519+ self .docstore is not None
520+ and self .vector_store is None
521+ and self .docstore_strategy
522+ in (DocstoreStrategy .UPSERTS , DocstoreStrategy .UPSERTS_AND_DELETE )
523+ ):
524+ warnings .warn (
525+ f"docstore_strategy='{ self .docstore_strategy .value } ' requires a vector store "
526+ "to apply upsert/delete semantics; falling back to 'duplicates_only' for this run. "
527+ "pipeline.docstore_strategy is unchanged." ,
528+ UserWarning ,
529+ stacklevel = 3 ,
530+ )
531+ effective_strategy = DocstoreStrategy .DUPLICATES_ONLY
532+
514533 # check if we need to dedup
515534 if self .docstore is not None and self .vector_store is not None :
516- if self . docstore_strategy in (
535+ if effective_strategy in (
517536 DocstoreStrategy .UPSERTS ,
518537 DocstoreStrategy .UPSERTS_AND_DELETE ,
519538 ):
520539 nodes_to_run = self ._handle_upserts (input_nodes )
521- elif self . docstore_strategy == DocstoreStrategy .DUPLICATES_ONLY :
540+ elif effective_strategy == DocstoreStrategy .DUPLICATES_ONLY :
522541 nodes_to_run = self ._handle_duplicates (input_nodes )
523542 else :
524- raise ValueError (f"Invalid docstore strategy: { self . docstore_strategy } " )
543+ raise ValueError (f"Invalid docstore strategy: { effective_strategy } " )
525544 elif self .docstore is not None and self .vector_store is None :
526- if self .docstore_strategy == DocstoreStrategy .UPSERTS :
527- logger .info (
528- "Docstore strategy set to upserts, but no vector store. "
529- "Switching to duplicates_only strategy."
530- )
531- self .docstore_strategy = DocstoreStrategy .DUPLICATES_ONLY
532- elif self .docstore_strategy == DocstoreStrategy .UPSERTS_AND_DELETE :
533- logger .info (
534- "Docstore strategy set to upserts and delete, but no vector store. "
535- "Switching to duplicates_only strategy."
536- )
537- self .docstore_strategy = DocstoreStrategy .DUPLICATES_ONLY
538545 nodes_to_run = self ._handle_duplicates (input_nodes )
539546 else :
540547 nodes_to_run = input_nodes
@@ -582,27 +589,34 @@ def run(
582589 self .vector_store .add (nodes_with_embeddings )
583590
584591 if self .docstore is not None :
585- self ._update_docstore (nodes_to_run , store_doc_text = store_doc_text )
592+ self ._update_docstore (
593+ nodes_to_run ,
594+ effective_strategy = effective_strategy ,
595+ store_doc_text = store_doc_text ,
596+ )
586597
587598 return nodes
588599
589600 # ------ async methods ------
590601 async def _aupdate_docstore (
591- self , nodes : Sequence [BaseNode ], store_doc_text : bool = True
602+ self ,
603+ nodes : Sequence [BaseNode ],
604+ effective_strategy : DocstoreStrategy ,
605+ store_doc_text : bool = True ,
592606 ) -> None :
593607 """Update the document store with the given nodes."""
594608 assert self .docstore is not None
595609
596- if self . docstore_strategy in (
610+ if effective_strategy in (
597611 DocstoreStrategy .UPSERTS ,
598612 DocstoreStrategy .UPSERTS_AND_DELETE ,
599613 ):
600614 await self .docstore .aset_document_hashes ({n .id_ : n .hash for n in nodes })
601615 await self .docstore .async_add_documents (nodes , store_text = store_doc_text )
602- elif self . docstore_strategy == DocstoreStrategy .DUPLICATES_ONLY :
616+ elif effective_strategy == DocstoreStrategy .DUPLICATES_ONLY :
603617 await self .docstore .async_add_documents (nodes , store_text = store_doc_text )
604618 else :
605- raise ValueError (f"Invalid docstore strategy: { self . docstore_strategy } " )
619+ raise ValueError (f"Invalid docstore strategy: { effective_strategy } " )
606620
607621 async def _ahandle_duplicates (
608622 self ,
@@ -700,38 +714,41 @@ async def arun(
700714 """
701715 input_nodes = self ._prepare_inputs (documents , nodes )
702716
717+ effective_strategy = self .docstore_strategy
718+ if (
719+ self .docstore is not None
720+ and self .vector_store is None
721+ and self .docstore_strategy
722+ in (DocstoreStrategy .UPSERTS , DocstoreStrategy .UPSERTS_AND_DELETE )
723+ ):
724+ warnings .warn (
725+ f"docstore_strategy='{ self .docstore_strategy .value } ' requires a vector store "
726+ "to apply upsert/delete semantics; falling back to 'duplicates_only' for this run. "
727+ "pipeline.docstore_strategy is unchanged." ,
728+ UserWarning ,
729+ stacklevel = 3 ,
730+ )
731+ effective_strategy = DocstoreStrategy .DUPLICATES_ONLY
732+
703733 # check if we need to dedup
704734 if self .docstore is not None and self .vector_store is not None :
705- if self . docstore_strategy in (
735+ if effective_strategy in (
706736 DocstoreStrategy .UPSERTS ,
707737 DocstoreStrategy .UPSERTS_AND_DELETE ,
708738 ):
709739 nodes_to_run = await self ._ahandle_upserts (
710740 input_nodes , store_doc_text = store_doc_text
711741 )
712- elif self . docstore_strategy == DocstoreStrategy .DUPLICATES_ONLY :
742+ elif effective_strategy == DocstoreStrategy .DUPLICATES_ONLY :
713743 nodes_to_run = await self ._ahandle_duplicates (
714744 input_nodes , store_doc_text = store_doc_text
715745 )
716746 else :
717- raise ValueError (f"Invalid docstore strategy: { self . docstore_strategy } " )
747+ raise ValueError (f"Invalid docstore strategy: { effective_strategy } " )
718748 elif self .docstore is not None and self .vector_store is None :
719- if self .docstore_strategy == DocstoreStrategy .UPSERTS :
720- logger .info (
721- "Docstore strategy set to upserts, but no vector store. "
722- "Switching to duplicates_only strategy."
723- )
724- self .docstore_strategy = DocstoreStrategy .DUPLICATES_ONLY
725- elif self .docstore_strategy == DocstoreStrategy .UPSERTS_AND_DELETE :
726- logger .info (
727- "Docstore strategy set to upserts and delete, but no vector store. "
728- "Switching to duplicates_only strategy."
729- )
730- self .docstore_strategy = DocstoreStrategy .DUPLICATES_ONLY
731749 nodes_to_run = await self ._ahandle_duplicates (
732750 input_nodes , store_doc_text = store_doc_text
733751 )
734-
735752 else :
736753 nodes_to_run = input_nodes
737754
@@ -785,6 +802,10 @@ async def arun(
785802 await self .vector_store .async_add (nodes_with_embeddings )
786803
787804 if self .docstore is not None :
788- await self ._aupdate_docstore (nodes_to_run , store_doc_text = store_doc_text )
805+ await self ._aupdate_docstore (
806+ nodes_to_run ,
807+ effective_strategy = effective_strategy ,
808+ store_doc_text = store_doc_text ,
809+ )
789810
790811 return nodes
0 commit comments