Skip to content

Commit 2a6e604

Browse files
committed
Output the endpoint from Bicep
1 parent 5a3040a commit 2a6e604

File tree

6 files changed

+24
-8
lines changed

6 files changed

+24
-8
lines changed

app/backend/prepdocslib/mediadescriber.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,4 @@ async def describe_image(self, image_bytes) -> str:
105105
results = await self.poll_api(session, poll_url, headers)
106106

107107
fields = results["result"]["contents"][0]["fields"]
108-
return fields["DescriptionHTML"]["valueString"]
108+
return fields["Description"]["valueString"]

app/backend/prepdocslib/pdfparser.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,19 @@ class ObjectType(Enum):
138138
added_objects = set() # set of object types todo mypy
139139
for idx, mask_char in enumerate(mask_chars):
140140
object_type, object_idx = mask_char
141-
if object_idx is None:
142-
raise ValueError("object_idx should not be None")
143141
if object_type == ObjectType.NONE:
144142
page_text += form_recognizer_results.content[page_offset + idx]
145143
elif object_type == ObjectType.TABLE:
144+
if object_idx is None:
145+
raise ValueError("Expected object_idx to be set")
146146
if mask_char not in added_objects:
147147
page_text += DocumentAnalysisParser.table_to_html(tables_on_page[object_idx])
148148
added_objects.add(mask_char)
149149
elif object_type == ObjectType.FIGURE:
150150
if cu_describer is None:
151151
raise ValueError("cu_describer should not be None, unable to describe figure")
152+
if object_idx is None:
153+
raise ValueError("Expected object_idx to be set")
152154
if mask_char not in added_objects:
153155
figure_html = await DocumentAnalysisParser.figure_to_html(
154156
doc_for_pymupdf, cu_describer, figures_on_page[object_idx]
@@ -176,7 +178,7 @@ async def figure_to_html(
176178
doc: pymupdf.Document, cu_describer: ContentUnderstandingDescriber, figure: DocumentFigure
177179
) -> str:
178180
figure_title = (figure.caption and figure.caption.content) or ""
179-
logger.info("Describing figure '%s' with title", figure.id, figure_title)
181+
logger.info("Describing figure %s with title '%s'", figure.id, figure_title)
180182
if not figure.bounding_regions:
181183
return f"<figure><figcaption>{figure_title}</figcaption></figure>"
182184
for region in figure.bounding_regions:

infra/main.bicep

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ var appEnvVariables = {
412412
USE_LOCAL_PDF_PARSER: useLocalPdfParser
413413
USE_LOCAL_HTML_PARSER: useLocalHtmlParser
414414
USE_MEDIA_DESCRIBER_AZURE_CU: useMediaDescriberAzureCU
415+
AZURE_CONTENTUNDERSTANDING_ENDPOINT: useMediaDescriberAzureCU ? contentUnderstanding.outputs.endpoint : ''
415416
RUNNING_IN_PRODUCTION: 'true'
416417
}
417418

@@ -1193,6 +1194,7 @@ output AZURE_SPEECH_SERVICE_ID string = useSpeechOutputAzure ? speech.outputs.re
11931194
output AZURE_SPEECH_SERVICE_LOCATION string = useSpeechOutputAzure ? speech.outputs.location : ''
11941195

11951196
output AZURE_VISION_ENDPOINT string = useGPT4V ? computerVision.outputs.endpoint : ''
1197+
output AZURE_CONTENTUNDERSTANDING_ENDPOINT string = useMediaDescriberAzureCU ? contentUnderstanding.outputs.endpoint : ''
11961198

11971199
output AZURE_DOCUMENTINTELLIGENCE_SERVICE string = documentIntelligence.outputs.name
11981200
output AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP string = documentIntelligenceResourceGroup.name

scripts/prepdocs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ if [ $# -gt 0 ]; then
99
additionalArgs="$@"
1010
fi
1111

12-
./.venv/bin/python ./app/backend/prepdocs.py './data/GPT4V_Examples/Financial Market Analysis Report 2023.pdf' --verbose $additionalArgs
12+
./.venv/bin/python ./app/backend/prepdocs.py './data/*' --verbose $additionalArgs

tests/conftest.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
MockBlobClient,
3535
MockResponse,
3636
mock_computervision_response,
37+
mock_contentunderstanding_response,
3738
mock_speak_text_cancelled,
3839
mock_speak_text_failed,
3940
mock_speak_text_success,
@@ -54,10 +55,12 @@ async def mock_search(self, *args, **kwargs):
5455

5556

5657
@pytest.fixture
57-
def mock_compute_embeddings_call(monkeypatch):
58+
def mock_azurehttp_calls(monkeypatch):
5859
def mock_post(*args, **kwargs):
5960
if kwargs.get("url").endswith("computervision/retrieval:vectorizeText"):
6061
return mock_computervision_response()
62+
elif kwargs.get("url").endswith("/contentunderstanding/analyzers/image_analyzer:analyze"):
63+
return mock_contentunderstanding_response()
6164
else:
6265
raise Exception("Unexpected URL for mock call to ClientSession.post()")
6366

@@ -327,7 +330,7 @@ async def client(
327330
mock_openai_embedding,
328331
mock_acs_search,
329332
mock_blob_container_client,
330-
mock_compute_embeddings_call,
333+
mock_azurehttp_calls,
331334
):
332335
quart_app = app.create_app()
333336

@@ -346,7 +349,7 @@ async def client_with_expiring_token(
346349
mock_openai_embedding,
347350
mock_acs_search,
348351
mock_blob_container_client,
349-
mock_compute_embeddings_call,
352+
mock_azurehttp_calls,
350353
):
351354
quart_app = app.create_app()
352355

tests/mocks.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,15 @@ def mock_computervision_response():
203203
)
204204

205205

206+
def mock_contentunderstanding_response():
207+
return MockResponse(
208+
status=200,
209+
headers={
210+
"Operation-Location": "https://cu-ztmfrxlgtk3nq.cognitiveservices.azure.com/contentunderstanding/analyzers/image_analyzer/results/53e4c016-d2c0-48a9-a9f4-38891f7d45f0?api-version=2024-12-01-preview"
211+
},
212+
)
213+
214+
206215
class MockAudio:
207216
def __init__(self, audio_data):
208217
self.audio_data = audio_data

0 commit comments

Comments
 (0)