Skip to content

Commit da5a077

Browse files
Merge pull request #264537 from laujan/202682-error-in-python-code
202682 error in python code
2 parents f4d0251 + cc692d3 commit da5a077

File tree

1 file changed

+55
-31
lines changed
  • articles/ai-services/document-intelligence/quickstarts/includes

1 file changed

+55
-31
lines changed

articles/ai-services/document-intelligence/quickstarts/includes/python-sdk.md

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ author: laujan
66
manager: nitinme
77
ms.service: azure-ai-document-intelligence
88
ms.topic: include
9-
ms.date: 12/18/2023
9+
ms.date: 01/29/2024
1010
ms.author: lajanuar
1111
---
1212
<!-- markdownlint-disable MD025 -->
@@ -135,11 +135,30 @@ Extract text, selection marks, text styles, table structures, and bounding regio
135135
import os
136136
from azure.core.credentials import AzureKeyCredential
137137
from azure.ai.documentintelligence import DocumentIntelligenceClient
138+
from azure.ai.documentintelligence.models import AnalyzeResult
138139

139140
# set `<your-endpoint>` and `<your-key>` variables with the values from the Azure portal
140141
endpoint = "<your-endpoint>"
141142
key = "<your-key>"
142143

144+
# helper functions
145+
146+
def get_words(page, line):
147+
result = []
148+
for word in page.words:
149+
if _in_span(word, line.spans):
150+
result.append(word)
151+
return result
152+
153+
154+
def _in_span(word, spans):
155+
for span in spans:
156+
if word.span.offset >= span.offset and (
157+
word.span.offset + word.span.length
158+
) <= (span.offset + span.length):
159+
return True
160+
return False
161+
143162

144163
def analyze_layout():
145164
# sample document
@@ -152,9 +171,10 @@ def analyze_layout():
152171
poller = document_intelligence_client.begin_analyze_document_from_url(
153172
"prebuilt-layout", formUrl
154173
)
155-
result = poller.result()
156174

157-
if any([style.is_handwritten for style in result.styles]):
175+
result: AnalyzeResult = poller.result()
176+
177+
if result.styles and any([style.is_handwritten for style in result.styles]):
158178
print("Document contains handwritten content")
159179
else:
160180
print("Document does not contain handwritten content")
@@ -165,49 +185,53 @@ def analyze_layout():
165185
f"Page has width: {page.width} and height: {page.height}, measured with unit: {page.unit}"
166186
)
167187

168-
for line_idx, line in enumerate(page.lines):
169-
words = get_words(page, line)
170-
print(
171-
f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
172-
f"within bounding polygon '{line.polygon}'"
173-
)
174-
175-
for word in words:
188+
if page.lines:
189+
for line_idx, line in enumerate(page.lines):
190+
words = get_words(page, line)
176191
print(
177-
f"......Word '{word.content}' has a confidence of {word.confidence}"
192+
f"...Line # {line_idx} has word count {len(words)} and text '{line.content}' "
193+
f"within bounding polygon '{line.polygon}'"
178194
)
179195

180-
for selection_mark in page.selection_marks:
181-
print(
182-
f"Selection mark is '{selection_mark.state}' within bounding polygon "
183-
f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
184-
)
196+
for word in words:
197+
print(
198+
f"......Word '{word.content}' has a confidence of {word.confidence}"
199+
)
185200

186-
for table_idx, table in enumerate(result.tables):
187-
print(
188-
f"Table # {table_idx} has {table.row_count} rows and "
189-
f"{table.column_count} columns"
190-
)
191-
for region in table.bounding_regions:
192-
print(
193-
f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}"
194-
)
195-
for cell in table.cells:
201+
if page.selection_marks:
202+
for selection_mark in page.selection_marks:
203+
print(
204+
f"Selection mark is '{selection_mark.state}' within bounding polygon "
205+
f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
206+
)
207+
208+
if result.tables:
209+
for table_idx, table in enumerate(result.tables):
196210
print(
197-
f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'"
211+
f"Table # {table_idx} has {table.row_count} rows and "
212+
f"{table.column_count} columns"
198213
)
199-
for region in cell.bounding_regions:
214+
if table.bounding_regions:
215+
for region in table.bounding_regions:
216+
print(
217+
f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}"
218+
)
219+
for cell in table.cells:
200220
print(
201-
f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'"
221+
f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'"
202222
)
223+
if cell.bounding_regions:
224+
for region in cell.bounding_regions:
225+
print(
226+
f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'"
227+
)
203228

204229
print("----------------------------------------")
205230

206231

207232
if __name__ == "__main__":
208233
analyze_layout()
209234

210-
211235
```
212236

213237
**Run the application**

0 commit comments

Comments
 (0)