Skip to content

Commit 001c86f

Browse files
committed
Prepdocs progress
1 parent 74fdf48 commit 001c86f

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

app/backend/prepdocs.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def setup_blob_manager(
8383
storage_container: str,
8484
storage_resource_group: str,
8585
subscription_id: str,
86-
search_images: bool,
86+
store_page_images: bool,
8787
storage_key: Union[str, None] = None,
8888
):
8989
storage_creds: Union[AsyncTokenCredential, str] = azure_credential if storage_key is None else storage_key
@@ -94,7 +94,7 @@ def setup_blob_manager(
9494
credential=storage_creds,
9595
resourceGroup=storage_resource_group,
9696
subscriptionId=subscription_id,
97-
store_page_images=search_images,
97+
store_page_images=store_page_images,
9898
)
9999

100100

@@ -176,7 +176,6 @@ def setup_file_processors(
176176
document_intelligence_key: Union[str, None] = None,
177177
local_pdf_parser: bool = False,
178178
local_html_parser: bool = False,
179-
search_images: bool = False,
180179
use_content_understanding: bool = False,
181180
use_multimodal: bool = False,
182181
openai_client: Union[AsyncOpenAI, None] = None,
@@ -248,6 +247,20 @@ def setup_file_processors(
248247
return file_processors
249248

250249

250+
def setup_image_embeddings_service(
251+
azure_credential: AsyncTokenCredential, vision_endpoint: Union[str, None], use_multimodal: bool
252+
) -> Union[ImageEmbeddings, None]:
253+
image_embeddings_service: Optional[ImageEmbeddings] = None
254+
if use_multimodal:
255+
if vision_endpoint is None:
256+
raise ValueError("A computer vision endpoint is required when GPT-4-vision is enabled.")
257+
image_embeddings_service = ImageEmbeddings(
258+
endpoint=vision_endpoint,
259+
token_provider=get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default"),
260+
)
261+
return image_embeddings_service
262+
263+
251264
async def main(strategy: Strategy, setup_index: bool = True):
252265
if setup_index:
253266
await strategy.setup()
@@ -372,7 +385,7 @@ async def main(strategy: Strategy, setup_index: bool = True):
372385
storage_container=os.environ["AZURE_STORAGE_CONTAINER"],
373386
storage_resource_group=os.environ["AZURE_STORAGE_RESOURCE_GROUP"],
374387
subscription_id=os.environ["AZURE_SUBSCRIPTION_ID"],
375-
search_images=use_gptvision,
388+
store_page_images=use_multimodal,
376389
storage_key=clean_key_if_exists(args.storagekey),
377390
)
378391
list_file_strategy = setup_list_file_strategy(
@@ -436,12 +449,17 @@ async def main(strategy: Strategy, setup_index: bool = True):
436449
document_intelligence_key=clean_key_if_exists(args.documentintelligencekey),
437450
local_pdf_parser=os.getenv("USE_LOCAL_PDF_PARSER") == "true",
438451
local_html_parser=os.getenv("USE_LOCAL_HTML_PARSER") == "true",
439-
search_images=use_gptvision,
440452
use_content_understanding=use_content_understanding,
441453
use_multimodal=use_multimodal,
442454
content_understanding_endpoint=os.getenv("AZURE_CONTENTUNDERSTANDING_ENDPOINT"),
443455
)
444456

457+
image_embeddings_service = setup_image_embeddings_service(
458+
azure_credential=azd_credential,
459+
vision_endpoint=os.getenv("AZURE_VISION_ENDPOINT"),
460+
use_multimodal=use_multimodal,
461+
)
462+
445463
ingestion_strategy = FileStrategy(
446464
search_info=search_info,
447465
list_file_strategy=list_file_strategy,

app/backend/prepdocslib/searchmanager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ async def create_index(self):
316316
existing_index.vector_search.profiles.append(text_vector_search_profile)
317317
if existing_index.vector_search.algorithms is None:
318318
existing_index.vector_search.algorithms = []
319-
existing_index.vector_search.algorithms.append(text_vector_algorithm)
319+
#existing_index.vector_search.algorithms.append(text_vector_algorithm)
320320
if existing_index.vector_search.compressions is None:
321321
existing_index.vector_search.compressions = []
322322
existing_index.vector_search.compressions.append(text_vector_compression)

0 commit comments

Comments
 (0)