Skip to content

Commit c5a6f2f

Browse files
Use async instead of threadpool (#46)
1 parent bffa569 commit c5a6f2f

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

adi_function_app/adi_2_ai_search.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@
1414
import asyncio
1515
import logging
1616
from storage_account import StorageAccountHelper
17-
import concurrent.futures
1817
import json
1918
from openai import AsyncAzureOpenAI
2019
from typing import Union
2120
import openai
2221
from environment import IdentityType, get_identity_type
2322

2423

25-
def build_and_clean_markdown_for_response(
24+
async def build_and_clean_markdown_for_response(
2625
markdown_text: str,
2726
figures: dict,
2827
page_no: int = None,
@@ -591,28 +590,32 @@ async def process_adi_2_ai_search(record: dict, chunk_by_page: bool = False) ->
591590
]
592591
content_with_figures = await asyncio.gather(*content_with_figures_tasks)
593592

594-
with concurrent.futures.ProcessPoolExecutor() as executor:
595-
futures = {
596-
executor.submit(
597-
build_and_clean_markdown_for_response,
593+
build_and_clean_markdown_for_response_tasks = []
594+
595+
for extracted_page_content, page_number in zip(
596+
content_with_figures, page_numbers
597+
):
598+
build_and_clean_markdown_for_response_tasks.append(
599+
build_and_clean_markdown_for_response(
598600
extracted_page_content[0],
599601
extracted_page_content[1],
600602
page_number,
601603
True,
602-
): extracted_page_content
603-
for extracted_page_content, page_number in zip(
604-
content_with_figures, page_numbers
605604
)
606-
}
607-
for future in concurrent.futures.as_completed(futures):
608-
result = future.result()
609-
if len(result["content"]) == 0:
610-
logging.error(
611-
"No content found in the cleaned result for slide %s.",
612-
result["pageNumber"],
613-
)
614-
else:
615-
cleaned_result.append(result)
605+
)
606+
607+
build_and_clean_markdown_for_response_results = await asyncio.gather(
608+
*build_and_clean_markdown_for_response_tasks
609+
)
610+
611+
for result in build_and_clean_markdown_for_response_results:
612+
if len(result["content"]) == 0:
613+
logging.error(
614+
"No content found in the cleaned result for slide %s.",
615+
result["pageNumber"],
616+
)
617+
else:
618+
cleaned_result.append(result)
616619

617620
else:
618621
markdown_content = result.content
@@ -629,7 +632,7 @@ async def process_adi_2_ai_search(record: dict, chunk_by_page: bool = False) ->
629632
page_number=None,
630633
)
631634

632-
cleaned_result = build_and_clean_markdown_for_response(
635+
cleaned_result = await build_and_clean_markdown_for_response(
633636
extracted_content, figures, remove_irrelevant_figures=True
634637
)
635638
except Exception as e:

0 commit comments

Comments
 (0)