@@ -22,11 +22,13 @@ def __init__(self, datasource: DataSource):
2222 self .datasource = datasource
2323 self ._source_cls = self .datasource .pipeline_obj .source_cls
2424 self ._destination_cls = self .datasource .pipeline_obj .destination_cls
25+ logger .debug ("Initializing DataIngestionPipeline" )
2526
2627 self ._destination = None
2728 self ._transformations = self .datasource .pipeline_obj .transformation_objs
2829 embedding_cls = self .datasource .pipeline_obj .embedding_cls
2930 if embedding_cls :
31+ logger .debug ("Initializing DataIngestionPipeline Transformation" )
3032 embedding_additional_kwargs = {
3133 ** self .datasource .pipeline_obj .embedding .data .get ("additional_kwargs" , {}),
3234 ** {"datasource" : datasource },
@@ -39,29 +41,29 @@ def __init__(self, datasource: DataSource):
3941 }
4042 )
4143 )
44+ logger .debug ("Finished Initializing DataIngestionPipeline Transformation" )
4245
4346 if self ._destination_cls :
47+ logger .debug ("Initializing DataIngestionPipeline Destination" )
4448 self ._destination = self ._destination_cls (** self .datasource .pipeline_obj .destination_data )
4549 self ._destination .initialize_client (datasource = self .datasource , create_collection = True )
50+ logger .debug ("Finished Initializing DataIngestionPipeline Destination" )
4651
4752 def process (self , document : DataDocument ) -> DataDocument :
53+ logger .debug (f"Processing document: { document .name } " )
4854 document = self ._source_cls .process_document (document )
49- if self .datasource .pipeline_obj .embedding :
50- embedding_data = self .datasource .pipeline_obj .embedding .data
51- embedding_data ["additional_kwargs" ] = {
52- ** embedding_data .get ("additional_kwargs" , {}),
53- ** {"datasource" : self .datasource },
54- }
55- embedding_transformer = self .datasource .pipeline_obj .embedding_cls (** embedding_data )
56- self ._transformations .append (embedding_transformer )
57-
55+ logger .debug (f"Creating IngestionPipeline for document: { document .name } " )
5856 ingestion_pipeline = IngestionPipeline (transformations = self ._transformations )
5957 ldoc = LlamaDocumentShim (** document .model_dump ())
6058 ldoc .metadata = {** ldoc .metadata , ** document .metadata }
59+ logger .debug (f"Running IngestionPipeline for document: { document .name } " )
6160 document .nodes = ingestion_pipeline .run (documents = [ldoc ])
61+ logger .debug (f"Finished running IngestionPipeline for document: { document .name } " )
6262 document .node_ids = list (map (lambda x : x .id_ , document .nodes ))
6363 if self ._destination :
64+ logger .debug (f"Adding document: { document .name } to destination" )
6465 self ._destination .add (document = document )
66+ logger .debug (f"Finished adding document: { document .name } to destination" )
6567
6668 return document
6769
@@ -83,39 +85,50 @@ def __init__(self, datasource: DataSource):
8385 self ._destination_cls = self .datasource .pipeline_obj .destination_cls
8486 self ._destination = None
8587 self ._embedding_generator = None
88+ logger .debug ("Initializing DataQueryPipeline" )
8689
8790 if self ._destination_cls :
91+ logger .debug ("Initializing DataQueryPipeline Destination" )
8892 self ._destination = self ._destination_cls (** self .datasource .pipeline_obj .destination_data )
8993 self ._destination .initialize_client (datasource = self .datasource , create_collection = False )
94+ logger .debug ("Finished Initializing DataQueryPipeline Destination" )
9095
9196 if self .datasource .pipeline_obj .embedding :
97+ logger .debug ("Initializing DataQueryPipeline Embedding" )
9298 embedding_data = self .datasource .pipeline_obj .embedding .data
9399 embedding_data ["additional_kwargs" ] = {
94100 ** embedding_data .get ("additional_kwargs" , {}),
95101 ** {"datasource" : self .datasource },
96102 }
97103 self ._embedding_generator = self .datasource .pipeline_obj .embedding_cls (** embedding_data )
104+ logger .debug ("Finished Initializing DataQueryPipeline Embedding" )
98105
99106 def search (self , query : str , use_hybrid_search = True , ** kwargs ) -> List [dict ]:
100107 content_key = self .datasource .destination_text_content_key
101108 query_embedding = None
102109
110+ logger .debug (f"Initializing Search for query: { query } " )
111+
103112 if kwargs .get ("search_filters" , None ):
104113 raise NotImplementedError ("Search filters are not supported for this data source." )
105114
106115 documents = []
107116
108117 if self ._embedding_generator :
118+ logger .debug ("Generating embedding for query" )
109119 query_embedding = self ._embedding_generator .get_embedding (query )
120+ logger .debug ("Finished generating embedding for query" )
110121
111122 if self ._destination :
123+ logger .debug (f"Searching for query: { query } in destination" )
112124 query_result = self ._destination .search (
113125 query = query ,
114126 use_hybrid_search = use_hybrid_search ,
115127 query_embedding = query_embedding ,
116128 datasource_uuid = str (self .datasource .uuid ),
117129 ** kwargs ,
118130 )
131+ logger .debug (f"Received results for query: { query } from destination" )
119132 documents = list (
120133 map (
121134 lambda x : Document (page_content_key = content_key , page_content = x .text , metadata = x .metadata ),
0 commit comments