11from collections .abc import Iterable , Sequence
22from pathlib import Path
33from types import ModuleType
4- from typing import Any , ClassVar
4+ from typing import ClassVar , Generic
55
6- from pydantic import BaseModel , Field
6+ from pydantic import BaseModel
77from typing_extensions import Self
88
99from ragbits import document_search
1010from ragbits .core .audit .traces import trace , traceable
1111from ragbits .core .config import CoreConfig
12+ from ragbits .core .options import Options
1213from ragbits .core .sources .base import Source , SourceResolver
14+ from ragbits .core .types import NOT_GIVEN , NotGiven
1315from ragbits .core .utils ._pyproject import get_config_from_yaml
1416from ragbits .core .utils .config_handling import (
17+ ConfigurableComponent ,
1518 NoPreferredConfigError ,
1619 ObjectConstructionConfig ,
17- WithConstructionConfig ,
1820)
19- from ragbits .core .vector_stores import VectorStore
20- from ragbits .core .vector_stores .base import VectorStoreOptions
21+ from ragbits .core .vector_stores .base import VectorStore , VectorStoreOptionsT
2122from ragbits .document_search .documents .document import Document , DocumentMeta
2223from ragbits .document_search .documents .element import Element
2324from ragbits .document_search .ingestion .enrichers .router import ElementEnricherRouter
2425from ragbits .document_search .ingestion .parsers .router import DocumentParserRouter
25- from ragbits .document_search .ingestion .strategies import (
26- IngestStrategy ,
27- SequentialIngestStrategy ,
28- )
26+ from ragbits .document_search .ingestion .strategies import IngestStrategy , SequentialIngestStrategy
2927from ragbits .document_search .ingestion .strategies .base import IngestExecutionError , IngestExecutionResult
3028from ragbits .document_search .retrieval .rephrasers .base import QueryRephraser
3129from ragbits .document_search .retrieval .rephrasers .noop import NoopQueryRephraser
32- from ragbits .document_search .retrieval .rerankers .base import Reranker , RerankerOptions
30+ from ragbits .document_search .retrieval .rerankers .base import Reranker , RerankerOptionsT
3331from ragbits .document_search .retrieval .rerankers .noop import NoopReranker
3432
3533
36- class SearchConfig ( BaseModel ):
34+ class DocumentSearchOptions ( Options , Generic [ VectorStoreOptionsT , RerankerOptionsT ] ):
3735 """
38- Configuration for the search process.
36+ Object representing the options for the document search.
37+
38+ Attributes:
39+ vector_store_options: The options for the vector store.
40+ reranker_options: The options for the reranker.
3941 """
4042
41- reranker_kwargs : dict [str , Any ] = Field (default_factory = dict )
42- vector_store_kwargs : dict [str , Any ] = Field (default_factory = dict )
43- embedder_kwargs : dict [str , Any ] = Field (default_factory = dict )
43+ vector_store_options : VectorStoreOptionsT | None | NotGiven = NOT_GIVEN
44+ reranker_options : RerankerOptionsT | None | NotGiven = NOT_GIVEN
4445
4546
4647class DocumentSearchConfig (BaseModel ):
4748 """
48- Schema for the dict taken by DocumentSearch.from_config method .
49+ Schema for the document search config .
4950 """
5051
5152 vector_store : ObjectConstructionConfig
@@ -56,39 +57,49 @@ class DocumentSearchConfig(BaseModel):
5657 enricher_router : dict [str , ObjectConstructionConfig ] = {}
5758
5859
59- class DocumentSearch (WithConstructionConfig ):
60+ class DocumentSearch (ConfigurableComponent [ DocumentSearchOptions [ VectorStoreOptionsT , RerankerOptionsT ]] ):
6061 """
61- A main entrypoint to the DocumentSearch functionality.
62-
63- It provides methods for both ingestion and retrieval.
62+ Main entrypoint to the document search functionality. It provides methods for document retrieval and ingestion.
6463
6564 Retrieval:
66-
6765 1. Uses QueryRephraser to rephrase the query.
68- 2. Uses VectorStore to retrieve the most relevant chunks.
69- 3. Uses Reranker to rerank the chunks.
66+ 2. Uses VectorStore to retrieve the most relevant elements.
67+ 3. Uses Reranker to rerank the elements.
68+
69+ Ingestion:
70+ 1. Uses IngestStrategy to orchestrate ingestion process.
71+ 2. Uses DocumentParserRouter to route the document to the appropriate DocumentParser to parse the content.
72+ 3. Uses ElementEnricherRouter to redirect the element to the appropriate ElementEnricher to enrich the element.
7073 """
7174
75+ options_cls : type [DocumentSearchOptions ] = DocumentSearchOptions
7276 default_module : ClassVar [ModuleType | None ] = document_search
7377 configuration_key : ClassVar [str ] = "document_search"
7478
75- vector_store : VectorStore
76- query_rephraser : QueryRephraser
77- reranker : Reranker
78-
79- ingest_strategy : IngestStrategy
80- parser_router : DocumentParserRouter
81- enricher_router : ElementEnricherRouter
82-
8379 def __init__ (
8480 self ,
85- vector_store : VectorStore ,
81+ vector_store : VectorStore [VectorStoreOptionsT ],
82+ * ,
8683 query_rephraser : QueryRephraser | None = None ,
87- reranker : Reranker | None = None ,
84+ reranker : Reranker [RerankerOptionsT ] | None = None ,
85+ default_options : DocumentSearchOptions [VectorStoreOptionsT , RerankerOptionsT ] | None = None ,
8886 ingest_strategy : IngestStrategy | None = None ,
8987 parser_router : DocumentParserRouter | None = None ,
9088 enricher_router : ElementEnricherRouter | None = None ,
9189 ) -> None :
90+ """
91+ Initialize the DocumentSearch instance.
92+
93+ Args:
94+ vector_store: The vector store to use for retrieval.
95+ query_rephraser: The query rephraser to use for retrieval.
96+ reranker: The reranker to use for retrieval.
97+ default_options: The default options for the search.
98+ ingest_strategy: The ingestion strategy to use for ingestion.
99+ parser_router: The document parser router to use for ingestion.
100+ enricher_router: The element enricher router to use for ingestion.
101+ """
102+ super ().__init__ (default_options = default_options )
92103 self .vector_store = vector_store
93104 self .query_rephraser = query_rephraser or NoopQueryRephraser ()
94105 self .reranker = reranker or NoopReranker ()
@@ -178,39 +189,47 @@ def preferred_subclass(
178189
179190 raise NoPreferredConfigError (f"Could not find preferred factory or configuration for { cls .configuration_key } " )
180191
181- async def search (self , query : str , config : SearchConfig | None = None ) -> Sequence [Element ]:
192+ async def search (
193+ self ,
194+ query : str ,
195+ options : DocumentSearchOptions [VectorStoreOptionsT , RerankerOptionsT ] | None = None ,
196+ ) -> Sequence [Element ]:
182197 """
183198 Search for the most relevant chunks for a query.
184199
185200 Args:
186201 query: The query to search for.
187- config : The search configuration .
202+ options : The document search retrieval options .
188203
189204 Returns:
190205 A list of chunks.
191206 """
192- config = config or SearchConfig ()
193- queries = await self .query_rephraser .rephrase (query )
194- with trace (queries = queries , config = config , vectore_store = self .vector_store , reranker = self .reranker ) as outputs :
195- elements = []
196-
197- for rephrased_query in queries :
198- results = await self .vector_store .retrieve (
199- text = rephrased_query ,
200- options = VectorStoreOptions (** config .vector_store_kwargs ),
201- )
202- elements .append ([Element .from_vector_db_entry (result .entry , result .score ) for result in results ])
203-
204- outputs .search_results = await self .reranker .rerank (
207+ merged_options = (self .default_options | options ) if options else self .default_options
208+ vector_store_options = merged_options .vector_store_options or None
209+ reranker_options = merged_options .reranker_options or None
210+
211+ with trace (query = query , options = merged_options ) as outputs :
212+ queries = await self .query_rephraser .rephrase (query )
213+ elements = [
214+ [
215+ Element .from_vector_db_entry (result .entry , result .score )
216+ for result in await self .vector_store .retrieve (query , vector_store_options )
217+ ]
218+ for query in queries
219+ ]
220+ outputs .results = await self .reranker .rerank (
205221 elements = elements ,
206222 query = query ,
207- options = RerankerOptions ( ** config . reranker_kwargs ) ,
223+ options = reranker_options ,
208224 )
209- return outputs .search_results
225+
226+ return outputs .results
210227
211228 @traceable
212229 async def ingest (
213- self , documents : str | Iterable [DocumentMeta | Document | Source ], fail_on_error : bool = True
230+ self ,
231+ documents : str | Iterable [DocumentMeta | Document | Source ],
232+ fail_on_error : bool = True ,
214233 ) -> IngestExecutionResult :
215234 """
216235 Ingest documents into the search index.
0 commit comments