Skip to content

Commit 658f2f8

Browse files
authored
Update docs (#17)
1 parent 319be6f commit 658f2f8

File tree

5 files changed

+22
-191
lines changed

5 files changed

+22
-191
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
6161

6262
Navigate to the `notebooks` directory and select the sample notebook you are interested in. Since Codespaces is pre-configured with the necessary environment, you can directly execute each step in the notebook.
6363

64+
## More Samples using Azure Content Understanding
65+
[Azure Search with Content Understanding](https://github.com/Azure-Samples/azure-ai-search-with-content-understanding-python)
66+
67+
[Azure Content Understanding with OpenAI](https://github.com/Azure-Samples/azure-ai-content-understanding-with-azure-openai-python)
68+
6469
## Notes
6570

6671
* **Trademarks** - This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos is subject to those third-party’s policies.

notebooks/analyzer_training.ipynb

Lines changed: 8 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
},
5050
{
5151
"cell_type": "code",
52-
"execution_count": 2,
52+
"execution_count": null,
5353
"metadata": {},
5454
"outputs": [],
5555
"source": [
@@ -93,7 +93,7 @@
9393
" endpoint=os.getenv(\"AZURE_AI_ENDPOINT\"),\n",
9494
" api_version=os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\"),\n",
9595
" token_provider=token_provider,\n",
96-
" x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_training\",\n",
96+
" x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_training\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n",
9797
")"
9898
]
9999
},
@@ -109,73 +109,9 @@
109109
},
110110
{
111111
"cell_type": "code",
112-
"execution_count": 4,
112+
"execution_count": null,
113113
"metadata": {},
114-
"outputs": [
115-
{
116-
"name": "stderr",
117-
"output_type": "stream",
118-
"text": [
119-
"INFO:python.content_understanding_client:Analyzer train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59 create request accepted.\n",
120-
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
121-
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
122-
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
123-
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
124-
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
125-
"INFO:python.content_understanding_client:Request result is ready after 13.72 seconds.\n",
126-
"INFO:root:Here is the analyzer detail for train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\n",
127-
"INFO:root:{\n",
128-
" \"id\": \"7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e\",\n",
129-
" \"status\": \"Succeeded\",\n",
130-
" \"result\": {\n",
131-
" \"analyzerId\": \"train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\",\n",
132-
" \"description\": \"Extract useful information from purchase order\",\n",
133-
" \"createdAt\": \"2024-12-09T23:58:59Z\",\n",
134-
" \"lastModifiedAt\": \"2024-12-09T23:59:14Z\",\n",
135-
" \"config\": {\n",
136-
" \"returnDetails\": false,\n",
137-
" \"enableOcr\": true,\n",
138-
" \"enableLayout\": true,\n",
139-
" \"enableBarcode\": false,\n",
140-
" \"enableFormula\": false\n",
141-
" },\n",
142-
" \"fieldSchema\": {\n",
143-
" \"fields\": {\n",
144-
" \"PurchaseOrderNumber\": {\n",
145-
" \"type\": \"string\",\n",
146-
" \"method\": \"extract\",\n",
147-
" \"description\": \"\"\n",
148-
" },\n",
149-
" \"PurchaseDate\": {\n",
150-
" \"type\": \"date\",\n",
151-
" \"method\": \"extract\",\n",
152-
" \"description\": \"\"\n",
153-
" },\n",
154-
" \"TotalPayment\": {\n",
155-
" \"type\": \"number\",\n",
156-
" \"method\": \"extract\",\n",
157-
" \"description\": \"\"\n",
158-
" },\n",
159-
" \"ShippedToAddress\": {\n",
160-
" \"type\": \"string\",\n",
161-
" \"method\": \"extract\",\n",
162-
" \"description\": \"\"\n",
163-
" }\n",
164-
" }\n",
165-
" },\n",
166-
" \"trainingData\": {\n",
167-
" \"containerUrl\": \"https://chethodevusw2.blob.core.windows.net/test?sv=2023-01-03&st=2024-12-09T19%3A27%3A04Z&se=2024-12-10T19%3A27%3A04Z&skoid=4de42c4d-0fe6-4b96-93a4-161b1303ea3b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-09T19%3A27%3A04Z&ske=2024-12-10T19%3A27%3A04Z&sks=b&skv=2023-01-03&sr=c&sp=rl&sig=sbUVLXK2JfJdUG7Fxyakg1e9lR%2B%2B6oMISoZqfudtvgw%3D\",\n",
168-
" \"kind\": \"blob\",\n",
169-
" \"prefix\": \"train\"\n",
170-
" },\n",
171-
" \"warnings\": [],\n",
172-
" \"status\": \"ready\",\n",
173-
" \"scenario\": \"document\"\n",
174-
" }\n",
175-
"}\n"
176-
]
177-
}
178-
],
114+
"outputs": [],
179115
"source": [
180116
"import uuid\n",
181117
"ANALYZER_ID = \"train-sample-\" + str(uuid.uuid4())\n",
@@ -206,101 +142,9 @@
206142
},
207143
{
208144
"cell_type": "code",
209-
"execution_count": 5,
145+
"execution_count": null,
210146
"metadata": {},
211-
"outputs": [
212-
{
213-
"name": "stderr",
214-
"output_type": "stream",
215-
"text": [
216-
"INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\n",
217-
"INFO:python.content_understanding_client:Request dced30f5-bb4d-473b-8b7a-13a7e29ed3ac in progress ...\n",
218-
"INFO:python.content_understanding_client:Request dced30f5-bb4d-473b-8b7a-13a7e29ed3ac in progress ...\n",
219-
"INFO:python.content_understanding_client:Request result is ready after 5.52 seconds.\n",
220-
"INFO:root:{\n",
221-
" \"id\": \"dced30f5-bb4d-473b-8b7a-13a7e29ed3ac\",\n",
222-
" \"status\": \"Succeeded\",\n",
223-
" \"result\": {\n",
224-
" \"analyzerId\": \"train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\",\n",
225-
" \"apiVersion\": \"2024-12-01-preview\",\n",
226-
" \"createdAt\": \"2024-12-09T23:59:16Z\",\n",
227-
" \"warnings\": [],\n",
228-
" \"contents\": [\n",
229-
" {\n",
230-
" \"markdown\": \"Purchase Order\\n\\n\\n# Hero Limited\\n\\nCompany Phone: 555-348-6512\\nWebsite: www.herolimited.com\\nEmail:\\[email protected]\\n\\nPurchase Order\\n\\nDated As: 12/20/2020\\nPurchase Order #: 948284\\n\\nShipped To\\n\\nVendor Name: Hillary Swank\\nCompany Name: Higgly Wiggly Books\\nAddress: 938 NE Burner Road\\nBoulder City, CO 92848\\nPhone: 938-294-2949\\n\\nShipped From\\n\\nName: Bernie Sanders\\nCompany Name: Jupiter Book Supply\\nAddress: 383 N Kinnick Road\\nSeattle, WA 38383\\n\\nPhone: 932-299-0292\\n\\n\\n<table>\\n<tr>\\n<th>Details</th>\\n<th>Quantity</th>\\n<th>Unit Price</th>\\n<th>Total</th>\\n</tr>\\n<tr>\\n<td>Bindings</td>\\n<td>20</td>\\n<td>1.00</td>\\n<td>20.00</td>\\n</tr>\\n<tr>\\n<td>Covers Small</td>\\n<td>20</td>\\n<td>1.00</td>\\n<td>20.00</td>\\n</tr>\\n<tr>\\n<td>Feather Bookmark</td>\\n<td>20</td>\\n<td>5.00</td>\\n<td>100.00</td>\\n</tr>\\n<tr>\\n<td>Copper Swirl Marker</td>\\n<td>20</td>\\n<td>5.00</td>\\n<td>100.00</td>\\n</tr>\\n</table>\\n\\n\\n<table>\\n<tr>\\n<td>SUBTOTAL</td>\\n<td>$140.00</td>\\n</tr>\\n<tr>\\n<td>TAX</td>\\n<td>$4.00</td>\\n</tr>\\n<tr>\\n<td>TOTAL</td>\\n<td>$144.00</td>\\n</tr>\\n</table>\\n\\n\\nBernie Sanders\\n\\nBernie Sanders\\nManager\\n\\nAdditional Notes:\\n\\nDo not Jostle Box. Unpack carefully. Enjoy.\\n\\nJupiter Book Supply will refund you 50% per book if returned within 60 days of reading and\\n\\noffer you 25% off you next total purchase.\\n\",\n",
231-
" \"fields\": {\n",
232-
" \"PurchaseDate\": {\n",
233-
" \"type\": \"date\",\n",
234-
" \"valueDate\": \"2020-12-20\",\n",
235-
" \"spans\": [\n",
236-
" {\n",
237-
" \"offset\": 149,\n",
238-
" \"length\": 10\n",
239-
" }\n",
240-
" ],\n",
241-
" \"confidence\": 0.998,\n",
242-
" \"source\": \"D(1,1168,418,1318,418,1318,450,1168,450)\"\n",
243-
" },\n",
244-
" \"PurchaseOrderNumber\": {\n",
245-
" \"type\": \"string\",\n",
246-
" \"valueString\": \"948284\",\n",
247-
" \"spans\": [\n",
248-
" {\n",
249-
" \"offset\": 178,\n",
250-
" \"length\": 6\n",
251-
" }\n",
252-
" ],\n",
253-
" \"confidence\": 0.998,\n",
254-
" \"source\": \"D(1,1281,459,1376,459,1376,490,1281,490)\"\n",
255-
" },\n",
256-
" \"ShippedToAddress\": {\n",
257-
" \"type\": \"string\",\n",
258-
" \"valueString\": \"938 NE Burner Road Boulder City, CO 92848\",\n",
259-
" \"spans\": [\n",
260-
" {\n",
261-
" \"offset\": 268,\n",
262-
" \"length\": 18\n",
263-
" },\n",
264-
" {\n",
265-
" \"offset\": 287,\n",
266-
" \"length\": 22\n",
267-
" }\n",
268-
" ],\n",
269-
" \"confidence\": 0.998,\n",
270-
" \"source\": \"D(1,278,683,527,683,527,714,278,714);D(1,279,720,564,720,564,752,279,752)\"\n",
271-
" },\n",
272-
" \"TotalPayment\": {\n",
273-
" \"type\": \"number\",\n",
274-
" \"valueNumber\": 144,\n",
275-
" \"spans\": [\n",
276-
" {\n",
277-
" \"offset\": 991,\n",
278-
" \"length\": 7\n",
279-
" }\n",
280-
" ],\n",
281-
" \"confidence\": 0.997,\n",
282-
" \"source\": \"D(1,1428,1669,1531,1669,1531,1699,1428,1699)\"\n",
283-
" }\n",
284-
" },\n",
285-
" \"kind\": \"document\",\n",
286-
" \"startPageNumber\": 1,\n",
287-
" \"endPageNumber\": 1,\n",
288-
" \"unit\": \"pixel\",\n",
289-
" \"pages\": [\n",
290-
" {\n",
291-
" \"pageNumber\": 1,\n",
292-
" \"angle\": 0.05652412,\n",
293-
" \"width\": 1700,\n",
294-
" \"height\": 2200\n",
295-
" }\n",
296-
" ]\n",
297-
" }\n",
298-
" ]\n",
299-
" }\n",
300-
"}\n"
301-
]
302-
}
303-
],
147+
"outputs": [],
304148
"source": [
305149
"response = client.begin_analyze(ANALYZER_ID, file_location='../data/purchase_order.jpg')\n",
306150
"result = client.poll_result(response)\n",
@@ -318,27 +162,9 @@
318162
},
319163
{
320164
"cell_type": "code",
321-
"execution_count": 6,
165+
"execution_count": null,
322166
"metadata": {},
323-
"outputs": [
324-
{
325-
"name": "stderr",
326-
"output_type": "stream",
327-
"text": [
328-
"INFO:python.content_understanding_client:Analyzer train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59 deleted.\n"
329-
]
330-
},
331-
{
332-
"data": {
333-
"text/plain": [
334-
"<Response [204]>"
335-
]
336-
},
337-
"execution_count": 6,
338-
"metadata": {},
339-
"output_type": "execute_result"
340-
}
341-
],
167+
"outputs": [],
342168
"source": [
343169
"client.delete_analyzer(ANALYZER_ID)"
344170
]

notebooks/content_extraction.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
" endpoint=AZURE_AI_ENDPOINT,\n",
7575
" api_version=AZURE_AI_API_VERSION,\n",
7676
" token_provider=token_provider,\n",
77-
" x_ms_useragent=\"azure-ai-content-understanding-python/content_extraction\",\n",
77+
" x_ms_useragent=\"azure-ai-content-understanding-python/content_extraction\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n",
7878
")"
7979
]
8080
},

notebooks/field_extraction.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,16 @@
5050
},
5151
{
5252
"cell_type": "code",
53-
"execution_count": 8,
53+
"execution_count": null,
5454
"metadata": {},
5555
"outputs": [],
5656
"source": [
5757
"extraction_templates = {\n",
58-
" \"invoice\": ('../analyzer_templates/invoice.json', '../data/invoice.pdf' ),\n",
59-
" \"chart\": ('../analyzer_templates/image_chart.json', '../data/pieChart.jpg' ),\n",
60-
" \"call recording analytics\": ('../analyzer_templates/call_recording_analytics.json', '../data/callCenterRecording.mp3'),\n",
61-
" \"conversational audio analytics\": ('../analyzer_templates/conversational_audio_analytics.json', '../data/callCenterRecording.mp3'),\n",
62-
" \"marketing_video\": ('../analyzer_templates/marketing_video.json', '../data/video.mp4' )\n",
58+
" \"invoice\": ('../analyzer_templates/invoice.json', '../data/invoice.pdf' ),\n",
59+
" \"chart\": ('../analyzer_templates/image_chart.json', '../data/pieChart.jpg' ),\n",
60+
" \"call_recording\": ('../analyzer_templates/call_recording_analytics.json', '../data/callCenterRecording.mp3'),\n",
61+
" \"conversation_audio\": ('../analyzer_templates/conversational_audio_analytics.json', '../data/callCenterRecording.mp3'),\n",
62+
" \"marketing_video\": ('../analyzer_templates/marketing_video.json', '../data/video.mp4' )\n",
6363
"}"
6464
]
6565
},
@@ -125,7 +125,7 @@
125125
" endpoint=AZURE_AI_ENDPOINT,\n",
126126
" api_version=AZURE_AI_API_VERSION,\n",
127127
" token_provider=token_provider,\n",
128-
" x_ms_useragent=\"azure-ai-content-understanding-python/field_extraction\",\n",
128+
" x_ms_useragent=\"azure-ai-content-understanding-python/field_extraction\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n",
129129
")"
130130
]
131131
},

notebooks/management.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
" endpoint=AZURE_AI_ENDPOINT,\n",
7474
" api_version=AZURE_AI_API_VERSION,\n",
7575
" token_provider=token_provider,\n",
76-
" x_ms_useragent=\"azure-ai-content-understanding-python/field_extraction\",\n",
76+
" x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_management\", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.\n",
7777
")"
7878
]
7979
},

0 commit comments

Comments
 (0)