@@ -24,11 +24,11 @@ async def parse_file(
24
24
key = file .file_extension ().lower ()
25
25
processor = file_processors .get (key )
26
26
if processor is None :
27
- logger .info ("Skipping '%s', no parser found." , file .filename () )
27
+ logger .info ("'%s': Skipping , no parser found." , file .content . name )
28
28
return []
29
- logger .info ("Ingesting '%s'" , file .filename () )
29
+ logger .info ("'%s': Starting ingestion process " , file .content . name )
30
30
pages = [page async for page in processor .parser .parse (content = file .content )]
31
- logger .info ("Splitting '%s' into sections" , file .filename () )
31
+ logger .info ("'%s': Splitting into sections" , file .content . name )
32
32
if image_embeddings :
33
33
logger .warning ("Each page will be split into smaller chunks of text, but images will be of the entire page." )
34
34
sections = [
@@ -113,9 +113,11 @@ async def process_file_worker(semaphore: asyncio.Semaphore, file: File):
113
113
blob_image_embeddings : Optional [list [list [float ]]] = None
114
114
if self .image_embeddings and blob_sas_uris :
115
115
blob_image_embeddings = await self .image_embeddings .create_embeddings (blob_sas_uris )
116
+ logger .info ("'%s': Computing embeddings and updating search index" , file .content .name )
116
117
await self .search_manager .update_content (sections , blob_image_embeddings , url = file .url )
117
118
finally :
118
119
if file :
120
+ logger .info ("'%s': Finished processing file" , file .content .name )
119
121
file .close ()
120
122
121
123
if self .document_action == DocumentAction .Add :
0 commit comments