Skip to content

Commit e0a8843

Browse files
committed
Rename vision variables, fix mypy
1 parent c4086c2 commit e0a8843

File tree

18 files changed

+83
-81
lines changed

18 files changed

+83
-81
lines changed

.azdo/pipelines/azure-dev.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ steps:
8989
USE_MULTIMODAL: $(USE_MULTIMODAL)
9090
AZURE_VISION_ENDPOINT: $(AZURE_VISION_ENDPOINT)
9191
VISION_SECRET_NAME: $(VISION_SECRET_NAME)
92-
AZURE_COMPUTER_VISION_SERVICE: $(AZURE_COMPUTER_VISION_SERVICE)
93-
AZURE_COMPUTER_VISION_RESOURCE_GROUP: $(AZURE_COMPUTER_VISION_RESOURCE_GROUP)
94-
AZURE_COMPUTER_VISION_LOCATION: $(AZURE_COMPUTER_VISION_LOCATION)
95-
AZURE_COMPUTER_VISION_SKU: $(AZURE_COMPUTER_VISION_SKU)
92+
AZURE_VISION_SERVICE: $(AZURE_VISION_SERVICE)
93+
AZURE_VISION_RESOURCE_GROUP: $(AZURE_VISION_RESOURCE_GROUP)
94+
AZURE_VISION_LOCATION: $(AZURE_VISION_LOCATION)
95+
AZURE_VISION_SKU: $(AZURE_VISION_SKU)
9696
ENABLE_LANGUAGE_PICKER: $(ENABLE_LANGUAGE_PICKER)
9797
USE_SPEECH_INPUT_BROWSER: $(USE_SPEECH_INPUT_BROWSER)
9898
USE_SPEECH_OUTPUT_BROWSER: $(USE_SPEECH_OUTPUT_BROWSER)

.github/workflows/azure-dev.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ jobs:
3737
AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP }}
3838
AZURE_DOCUMENTINTELLIGENCE_SKU: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SKU }}
3939
AZURE_DOCUMENTINTELLIGENCE_LOCATION: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_LOCATION }}
40-
AZURE_COMPUTER_VISION_SERVICE: ${{ vars.AZURE_COMPUTER_VISION_SERVICE }}
41-
AZURE_COMPUTER_VISION_RESOURCE_GROUP: ${{ vars.AZURE_COMPUTER_VISION_RESOURCE_GROUP }}
42-
AZURE_COMPUTER_VISION_LOCATION: ${{ vars.AZURE_COMPUTER_VISION_LOCATION }}
43-
AZURE_COMPUTER_VISION_SKU: ${{ vars.AZURE_COMPUTER_VISION_SKU }}
40+
AZURE_VISION_SERVICE: ${{ vars.AZURE_VISION_SERVICE }}
41+
AZURE_VISION_RESOURCE_GROUP: ${{ vars.AZURE_VISION_RESOURCE_GROUP }}
42+
AZURE_VISION_LOCATION: ${{ vars.AZURE_VISION_LOCATION }}
43+
AZURE_VISION_SKU: ${{ vars.AZURE_VISION_SKU }}
4444
AZURE_SEARCH_INDEX: ${{ vars.AZURE_SEARCH_INDEX }}
4545
AZURE_SEARCH_SERVICE: ${{ vars.AZURE_SEARCH_SERVICE }}
4646
AZURE_SEARCH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SEARCH_SERVICE_RESOURCE_GROUP }}

.github/workflows/evaluate.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ jobs:
3535
AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP }}
3636
AZURE_DOCUMENTINTELLIGENCE_SKU: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SKU }}
3737
AZURE_DOCUMENTINTELLIGENCE_LOCATION: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_LOCATION }}
38-
AZURE_COMPUTER_VISION_SERVICE: ${{ vars.AZURE_COMPUTER_VISION_SERVICE }}
39-
AZURE_COMPUTER_VISION_RESOURCE_GROUP: ${{ vars.AZURE_COMPUTER_VISION_RESOURCE_GROUP }}
40-
AZURE_COMPUTER_VISION_LOCATION: ${{ vars.AZURE_COMPUTER_VISION_LOCATION }}
41-
AZURE_COMPUTER_VISION_SKU: ${{ vars.AZURE_COMPUTER_VISION_SKU }}
38+
AZURE_VISION_SERVICE: ${{ vars.AZURE_VISION_SERVICE }}
39+
AZURE_VISION_RESOURCE_GROUP: ${{ vars.AZURE_VISION_RESOURCE_GROUP }}
40+
AZURE_VISION_LOCATION: ${{ vars.AZURE_VISION_LOCATION }}
41+
AZURE_VISION_SKU: ${{ vars.AZURE_VISION_SKU }}
4242
AZURE_SEARCH_INDEX: ${{ vars.AZURE_SEARCH_INDEX }}
4343
AZURE_SEARCH_SERVICE: ${{ vars.AZURE_SEARCH_SERVICE }}
4444
AZURE_SEARCH_SERVICE_RESOURCE_GROUP: ${{ vars.AZURE_SEARCH_SERVICE_RESOURCE_GROUP }}

app/backend/approaches/approach.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from abc import ABC
22
from collections.abc import AsyncGenerator, Awaitable
3-
from dataclasses import dataclass
3+
from dataclasses import dataclass, field
44
from enum import Enum
55
from typing import Any, Callable, Optional, TypedDict, Union, cast
66
from urllib.parse import urljoin
@@ -116,7 +116,7 @@ class DataPoints:
116116
@dataclass
117117
class ExtraInfo:
118118
data_points: DataPoints
119-
thoughts: Optional[list[ThoughtStep]] = None
119+
thoughts: list[ThoughtStep] = field(default_factory=list)
120120
followup_questions: Optional[list[Any]] = None
121121

122122

@@ -395,6 +395,8 @@ def nonewlines(s: str) -> str:
395395
text_sources.append(f"{citation}: {nonewlines(doc.content or '')}")
396396

397397
if use_image_sources and hasattr(doc, "images") and doc.images:
398+
if self.images_blob_container_client is None:
399+
raise ValueError("The images blob container client must be set to use image sources.")
398400
for img in doc.images:
399401
# Skip if we've already processed this URL
400402
if img["url"] in seen_urls:
@@ -440,11 +442,15 @@ class ExtraArgs(TypedDict, total=False):
440442
return VectorizedQuery(vector=query_vector, k_nearest_neighbors=50, fields=self.embedding_field)
441443

442444
async def compute_image_embedding(self, q: str):
445+
if not self.vision_endpoint:
446+
raise ValueError("Azure AI Vision endpoint must be set to compute image embedding.")
443447
endpoint = urljoin(self.vision_endpoint, "computervision/retrieval:vectorizeText")
444448
headers = {"Content-Type": "application/json"}
445449
params = {"api-version": "2024-02-01", "model-version": "2023-04-15"}
446450
data = {"text": q}
447451

452+
if not self.vision_token_provider:
453+
raise ValueError("Azure AI Vision token provider must be set to compute image embedding.")
448454
headers["Authorization"] = "Bearer " + await self.vision_token_provider()
449455

450456
async with aiohttp.ClientSession() as session:

app/backend/prepdocs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def setup_image_embeddings_service(
331331
image_embeddings_service: Optional[ImageEmbeddings] = None
332332
if use_multimodal:
333333
if vision_endpoint is None:
334-
raise ValueError("A computer vision endpoint is required when GPT-4-vision is enabled.")
334+
raise ValueError("An Azure AI Vision endpoint must be provided to use multimodal features.")
335335
image_embeddings_service = ImageEmbeddings(
336336
endpoint=vision_endpoint,
337337
token_provider=get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default"),

app/backend/prepdocslib/mediadescriber.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ async def describe_image(self, image_bytes) -> str:
1919
raise NotImplementedError # pragma: no cover
2020

2121

22-
class ContentUnderstandingDescriber:
22+
class ContentUnderstandingDescriber(MediaDescriber):
2323
CU_API_VERSION = "2024-12-01-preview"
2424

2525
analyzer_schema = {

app/backend/prepdocslib/searchmanager.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ async def create_index(self):
150150
)
151151

152152
if self.search_images:
153+
if not self.search_info.azure_vision_endpoint:
154+
raise ValueError("Azure AI Vision endpoint must be provided to use image embeddings")
153155
image_vector_algorithm = HnswAlgorithmConfiguration(
154156
name="images_hnsw_config",
155157
parameters=HnswParameters(metric="cosine"),
@@ -366,7 +368,11 @@ async def create_index(self):
366368
existing_index.vector_search.compressions.append(text_vector_compression)
367369
await search_index_client.create_or_update_index(existing_index)
368370

369-
if images_field and not any(field.name == "images" for field in existing_index.fields):
371+
if (
372+
images_field
373+
and images_field.fields
374+
and not any(field.name == "images" for field in existing_index.fields)
375+
):
370376
logger.info("Adding %s field for image embeddings", images_field.name)
371377
images_field.fields[0].stored = True
372378
existing_index.fields.append(images_field)

azure.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,10 @@ pipeline:
9393
- USE_MULTIMODAL
9494
- AZURE_VISION_ENDPOINT
9595
- VISION_SECRET_NAME
96-
- AZURE_COMPUTER_VISION_SERVICE
97-
- AZURE_COMPUTER_VISION_RESOURCE_GROUP
98-
- AZURE_COMPUTER_VISION_LOCATION
99-
- AZURE_COMPUTER_VISION_SKU
96+
- AZURE_VISION_SERVICE
97+
- AZURE_VISION_RESOURCE_GROUP
98+
- AZURE_VISION_LOCATION
99+
- AZURE_VISION_SKU
100100
- ENABLE_LANGUAGE_PICKER
101101
- USE_SPEECH_INPUT_BROWSER
102102
- USE_SPEECH_OUTPUT_BROWSER

docs/customization.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ TODO FIX THIS!
5353
If you followed the instructions in [the multimodal guide](multimodal.md) to enable the vision approach and the "Use GPT vision model" option is selected, then the chat tab will use the `chatreadretrievereadvision.py` approach instead. This approach is similar to the `chatreadretrieveread.py` approach, with a few differences:
5454

5555
1. Step 1 is the same as before, except it uses the GPT-4 Vision model instead of the default GPT-3.5 model.
56-
2. For this step, it also calculates a vector embedding for the user question using [the Computer Vision vectorize text API](https://learn.microsoft.com/azure/ai-services/computer-vision/how-to/image-retrieval#call-the-vectorize-text-api), and passes that to the Azure AI Search to compare against the `imageEmbeddings` fields in the indexed documents. For each matching document, it downloads the image blob and converts it to a base 64 encoding.
56+
2. For this step, it also calculates a vector embedding for the user question using [the Azure AI Vision vectorize text API](https://learn.microsoft.com/azure/ai-services/computer-vision/how-to/image-retrieval#call-the-vectorize-text-api), and passes that to the Azure AI Search to compare against the `imageEmbeddings` fields in the indexed documents. For each matching document, it downloads the image blob and converts it to a base 64 encoding.
5757
3. When it combines the search results and user question, it includes the base 64 encoded images, and sends along both the text and images to the GPT4 Vision model (similar to this [documentation example](https://platform.openai.com/docs/guides/vision/quick-start)). The model generates a response that includes citations to the images, and the UI renders the base64 encoded images when a citation is clicked.
5858

5959
The prompt for step 2 is currently tailored to the sample data since it starts with "You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd.". Modify the [chat_answer_question_vision.prompty](https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/app/backend/approaches/prompts/chat_answer_question_vision.prompty) prompt to match your data.
@@ -72,7 +72,7 @@ The prompt for step 2 is currently tailored to the sample data since it starts w
7272
TODO FIX THIS!
7373
If you followed the instructions in [the multimodal guide](multimodal.md) to enable the vision approach and the "Use GPT vision model" option is selected, then the ask tab will use the `retrievethenreadvision.py` approach instead. This approach is similar to the `retrievethenread.py` approach, with a few differences:
7474

75-
1. For this step, it also calculates a vector embedding for the user question using [the Computer Vision vectorize text API](https://learn.microsoft.com/azure/ai-services/computer-vision/how-to/image-retrieval#call-the-vectorize-text-api), and passes that to the Azure AI Search to compare against the `imageEmbeddings` fields in the indexed documents. For each matching document, it downloads the image blob and converts it to a base 64 encoding.
75+
1. For this step, it also calculates a vector embedding for the user question using [the Azure AI Vision vectorize text API](https://learn.microsoft.com/azure/ai-services/computer-vision/how-to/image-retrieval#call-the-vectorize-text-api), and passes that to the Azure AI Search to compare against the `imageEmbeddings` fields in the indexed documents. For each matching document, it downloads the image blob and converts it to a base 64 encoding.
7676
2. When it combines the search results and user question, it includes the base 64 encoded images, and sends along both the text and images to the GPT4 Vision model (similar to this [documentation example](https://platform.openai.com/docs/guides/vision/quick-start)). The model generates a response that includes citations to the images, and the UI renders the base64 encoded images when a citation is clicked.
7777

7878
The prompt for step 2 is currently tailored to the sample data since it starts with "You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd". Modify the [ask_answer_question_vision.prompty](https://github.com/Azure-Samples/azure-search-openai-demo/blob/main/app/backend/approaches/prompts/ask_answer_question_vision.prompty) prompt to match your data.

docs/deploy_existing.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ You should set these values before running `azd up`. Once you've set them, retur
99
* [Azure AI Search resource](#azure-ai-search-resource)
1010
* [Azure App Service Plan and App Service resources](#azure-app-service-plan-and-app-service-resources)
1111
* [Azure Application Insights and related resources](#azure-application-insights-and-related-resources)
12-
* [Azure Computer Vision resources](#azure-computer-vision-resources)
12+
* [Azure AI Vision resources](#azure-ai-vision-resources)
1313
* [Azure Document Intelligence resource](#azure-document-intelligence-resource)
1414
* [Azure Speech resource](#azure-speech-resource)
1515
* [Other Azure resources](#other-azure-resources)
@@ -78,12 +78,12 @@ You can also customize the search service (new or existing) for non-English sear
7878
1. Run `azd env set AZURE_APPLICATION_INSIGHTS_DASHBOARD {Name of existing Azure App Insights Dashboard}`.
7979
1. Run `azd env set AZURE_LOG_ANALYTICS {Name of existing Azure Log Analytics Workspace Name}`.
8080

81-
## Azure Computer Vision resources
81+
## Azure AI Vision resources
8282

83-
1. Run `azd env set AZURE_COMPUTER_VISION_SERVICE {Name of existing Azure Computer Vision Service Name}`
84-
1. Run `azd env set AZURE_COMPUTER_VISION_RESOURCE_GROUP {Name of existing Azure Computer Vision Resource Group Name}`
85-
1. Run `azd env set AZURE_COMPUTER_VISION_LOCATION {Name of existing Azure Computer Vision Location}`
86-
1. Run `azd env set AZURE_COMPUTER_VISION_SKU {SKU of Azure Computer Vision service, defaults to F0}`
83+
1. Run `azd env set AZURE_VISION_SERVICE {Name of existing Azure AI Vision Service Name}`
84+
1. Run `azd env set AZURE_VISION_RESOURCE_GROUP {Name of existing Azure AI Vision Resource Group Name}`
85+
1. Run `azd env set AZURE_VISION_LOCATION {Name of existing Azure AI Vision Location}`
86+
1. Run `azd env set AZURE_VISION_SKU {SKU of Azure AI Vision service, defaults to F0}`
8787

8888
## Azure Document Intelligence resource
8989

0 commit comments

Comments
 (0)