@@ -73,8 +73,8 @@ def query_index(
7373
7474 try :
7575 self .tool .stream_log (
76- f">>> Querying '{ vector_db_instance_id } ' for { doc_id } ..."
77- )
76+ f">>> Querying '{ vector_db_instance_id } ' for { doc_id } ..."
77+ )
7878 try :
7979 doc_id_eq_filter = MetadataFilter .from_dict (
8080 {
@@ -287,6 +287,20 @@ def index(
287287
288288 if doc_id_found and not reindex :
289289 self .tool .stream_log (f"File was indexed already under { doc_id } " )
290+
291+ if not fs .exists (output_file_path ):
292+ # Added this as a workaround to handle extraction
293+ # for documents uploaded twice in different projects.
294+ # to be reconsidered after permanent fixes.
295+ extracted_text = self .extract_text (
296+ x2text_instance_id = x2text_instance_id ,
297+ file_path = file_path ,
298+ output_file_path = output_file_path ,
299+ enable_highlight = enable_highlight ,
300+ usage_kwargs = usage_kwargs ,
301+ process_text = process_text ,
302+ fs = fs ,
303+ )
290304 return doc_id
291305
292306 extracted_text = self .extract_text (
@@ -298,7 +312,6 @@ def index(
298312 process_text = process_text ,
299313 fs = fs ,
300314 )
301-
302315 if not extracted_text :
303316 raise IndexingError ("No text available to index" )
304317
0 commit comments