Skip to content

Commit 622491c

Browse files
committed
Filter out empty pages when using page wise chunking
1 parent 2b9493c commit 622491c

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

adi_function_app/adi_2_ai_search.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def update_figure_description(
9292
"""
9393

9494
# Define the new string to replace the old content
95-
new_string = f'<!-- FigureId="{figure_id}" FigureContent="{img_description}" -->'
95+
new_string = f"""<!-- FigureId="{figure_id}
96+
" FigureContent="{img_description}" -->"""
9697

9798
# Calculate the end index of the content to be replaced
9899
end_index = offset + length
@@ -537,9 +538,6 @@ async def process_adi_2_ai_search(record: dict, chunk_by_page: bool = False) ->
537538
result, operation_id = await analyse_document(temp_file_path)
538539
except ValueError as inner_e:
539540
logging.error(inner_e)
540-
logging.error(
541-
f"Failed to analyze the document with Azure Document Intelligence: {e}"
542-
)
543541
logging.error(
544542
"Failed to analyse %s with Azure Document Intelligence.", blob
545543
)
@@ -607,7 +605,14 @@ async def process_adi_2_ai_search(record: dict, chunk_by_page: bool = False) ->
607605
)
608606
}
609607
for future in concurrent.futures.as_completed(futures):
610-
cleaned_result.append(future.result())
608+
result = future.result()
609+
if len(result["content"]) == 0:
610+
logging.error(
611+
"No content found in the cleaned result for slide %s.",
612+
result["pageNumber"],
613+
)
614+
else:
615+
cleaned_result.append(result)
611616

612617
else:
613618
markdown_content = result.content

0 commit comments

Comments
 (0)