Skip to content

Commit 319be6f

Browse files
authored
Updating audio in notebooks (#15)
* Updating the audio templates and aligning them with our key scenarions in AI Studio. * Updated content_extraction to reflect our hero scenario for callcenter analytics which also contains the most output results from content understanding to ensure best presentation of our audio processing capability. * Updating audio in notebooks
1 parent 5752956 commit 319be6f

File tree

2 files changed

+13
-245
lines changed

2 files changed

+13
-245
lines changed

notebooks/content_extraction.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@
135135
"outputs": [],
136136
"source": [
137137
"ANALYZER_ID = \"content-audio-sample-\" + str(uuid.uuid4())\n",
138-
"ANALYZER_TEMPLATE_FILE = '../analyzer_templates/call_recording_analytics.json'\n",
139-
"ANALYZER_SAMPLE_FILE = '../data/callCenterRecording.mp3'\n",
138+
"ANALYZER_TEMPLATE_FILE = '../analyzer_templates/audio_transcription.json'\n",
139+
"ANALYZER_SAMPLE_FILE = '../data/audio.wav'\n",
140140
"\n",
141141
"# Create analyzer\n",
142142
"response = client.begin_create_analyzer(ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_FILE)\n",

notebooks/field_extraction.ipynb

Lines changed: 11 additions & 243 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,15 @@
5050
},
5151
{
5252
"cell_type": "code",
53-
"execution_count": 31,
53+
"execution_count": 8,
5454
"metadata": {},
5555
"outputs": [],
5656
"source": [
5757
"extraction_templates = {\n",
5858
" \"invoice\": ('../analyzer_templates/invoice.json', '../data/invoice.pdf' ),\n",
5959
" \"chart\": ('../analyzer_templates/image_chart.json', '../data/pieChart.jpg' ),\n",
60-
" \"call_transcript\": ('../analyzer_templates/call_transcript.json', '../data/callCenterRecording.mp3'),\n",
60+
" \"call recording analytics\": ('../analyzer_templates/call_recording_analytics.json', '../data/callCenterRecording.mp3'),\n",
61+
" \"conversational audio analytics\": ('../analyzer_templates/conversational_audio_analytics.json', '../data/callCenterRecording.mp3'),\n",
6162
" \"marketing_video\": ('../analyzer_templates/marketing_video.json', '../data/video.mp4' )\n",
6263
"}"
6364
]
@@ -71,7 +72,7 @@
7172
},
7273
{
7374
"cell_type": "code",
74-
"execution_count": 32,
75+
"execution_count": 9,
7576
"metadata": {},
7677
"outputs": [],
7778
"source": [
@@ -137,73 +138,9 @@
137138
},
138139
{
139140
"cell_type": "code",
140-
"execution_count": 34,
141+
"execution_count": null,
141142
"metadata": {},
142-
"outputs": [
143-
{
144-
"name": "stderr",
145-
"output_type": "stream",
146-
"text": [
147-
"INFO:python.content_understanding_client:Analyzer field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557 create request accepted.\n",
148-
"INFO:python.content_understanding_client:Request 9a5b5c93-916c-4206-9afe-6d4870f19bf9 in progress ...\n",
149-
"INFO:python.content_understanding_client:Request result is ready after 2.27 seconds.\n"
150-
]
151-
},
152-
{
153-
"name": "stdout",
154-
"output_type": "stream",
155-
"text": [
156-
"{\n",
157-
" \"id\": \"9a5b5c93-916c-4206-9afe-6d4870f19bf9\",\n",
158-
" \"status\": \"Succeeded\",\n",
159-
" \"result\": {\n",
160-
" \"analyzerId\": \"field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557\",\n",
161-
" \"description\": \"Sample invoice analyzer\",\n",
162-
" \"createdAt\": \"2024-12-10T23:39:42Z\",\n",
163-
" \"lastModifiedAt\": \"2024-12-10T23:39:44Z\",\n",
164-
" \"config\": {\n",
165-
" \"returnDetails\": false,\n",
166-
" \"enableOcr\": true,\n",
167-
" \"enableLayout\": true,\n",
168-
" \"enableBarcode\": false,\n",
169-
" \"enableFormula\": false\n",
170-
" },\n",
171-
" \"fieldSchema\": {\n",
172-
" \"fields\": {\n",
173-
" \"VendorName\": {\n",
174-
" \"type\": \"string\",\n",
175-
" \"method\": \"extract\",\n",
176-
" \"description\": \"Vendor issuing the invoice\"\n",
177-
" },\n",
178-
" \"Items\": {\n",
179-
" \"type\": \"array\",\n",
180-
" \"method\": \"extract\",\n",
181-
" \"items\": {\n",
182-
" \"type\": \"object\",\n",
183-
" \"properties\": {\n",
184-
" \"Description\": {\n",
185-
" \"type\": \"string\",\n",
186-
" \"method\": \"extract\",\n",
187-
" \"description\": \"Description of the item\"\n",
188-
" },\n",
189-
" \"Amount\": {\n",
190-
" \"type\": \"number\",\n",
191-
" \"method\": \"extract\",\n",
192-
" \"description\": \"Amount of the item\"\n",
193-
" }\n",
194-
" }\n",
195-
" }\n",
196-
" }\n",
197-
" }\n",
198-
" },\n",
199-
" \"warnings\": [],\n",
200-
" \"status\": \"ready\",\n",
201-
" \"scenario\": \"document\"\n",
202-
" }\n",
203-
"}\n"
204-
]
205-
}
206-
],
143+
"outputs": [],
207144
"source": [
208145
"response = client.begin_create_analyzer(ANALYZER_ID, analyzer_template_path=analyzer_template_path)\n",
209146
"result = client.poll_result(response)\n",
@@ -227,160 +164,9 @@
227164
},
228165
{
229166
"cell_type": "code",
230-
"execution_count": 35,
167+
"execution_count": null,
231168
"metadata": {},
232-
"outputs": [
233-
{
234-
"name": "stderr",
235-
"output_type": "stream",
236-
"text": [
237-
"INFO:python.content_understanding_client:Analyzing file ../data/invoice.pdf with analyzer: field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557\n",
238-
"INFO:python.content_understanding_client:Request a0a20152-53a2-4608-8567-4be90dc37e39 in progress ...\n",
239-
"INFO:python.content_understanding_client:Request a0a20152-53a2-4608-8567-4be90dc37e39 in progress ...\n",
240-
"INFO:python.content_understanding_client:Request a0a20152-53a2-4608-8567-4be90dc37e39 in progress ...\n",
241-
"INFO:python.content_understanding_client:Request result is ready after 6.62 seconds.\n"
242-
]
243-
},
244-
{
245-
"name": "stdout",
246-
"output_type": "stream",
247-
"text": [
248-
"{\n",
249-
" \"id\": \"a0a20152-53a2-4608-8567-4be90dc37e39\",\n",
250-
" \"status\": \"Succeeded\",\n",
251-
" \"result\": {\n",
252-
" \"analyzerId\": \"field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557\",\n",
253-
" \"apiVersion\": \"2024-12-01-preview\",\n",
254-
" \"createdAt\": \"2024-12-10T23:39:45Z\",\n",
255-
" \"warnings\": [],\n",
256-
" \"contents\": [\n",
257-
" {\n",
258-
" \"markdown\": \"CONTOSO LTD.\\n\\n\\n# INVOICE\\n\\nContoso Headquarters\\n123 456th St\\nNew York, NY, 10001\\n\\nINVOICE: INV-100\\n\\nINVOICE DATE: 11/15/2019\\n\\nDUE DATE: 12/15/2019\\n\\nCUSTOMER NAME: MICROSOFT CORPORATION\\n\\nSERVICE PERIOD: 10/14/2019 - 11/14/2019\\n\\nCUSTOMER ID: CID-12345\\n\\nMicrosoft Corp\\n123 Other St,\\nRedmond WA, 98052\\n\\nBILL TO:\\n\\nMicrosoft Finance\\n\\n123 Bill St,\\n\\nRedmond WA, 98052\\n\\nSHIP TO:\\n\\nMicrosoft Delivery\\n\\n123 Ship St,\\n\\nRedmond WA, 98052\\n\\nSERVICE ADDRESS:\\nMicrosoft Services\\n123 Service St,\\nRedmond WA, 98052\\n\\n\\n<table>\\n<tr>\\n<th>SALESPERSON</th>\\n<th>P.O. NUMBER</th>\\n<th>REQUISITIONER</th>\\n<th>SHIPPED VIA</th>\\n<th>F.O.B. POINT</th>\\n<th>TERMS</th>\\n</tr>\\n<tr>\\n<td></td>\\n<td>PO-3333</td>\\n<td></td>\\n<td></td>\\n<td></td>\\n<td></td>\\n</tr>\\n</table>\\n\\n\\n<table>\\n<tr>\\n<th>DATE</th>\\n<th>ITEM CODE</th>\\n<th>DESCRIPTION</th>\\n<th>QTY</th>\\n<th>UM</th>\\n<th>PRICE</th>\\n<th>TAX</th>\\n<th>AMOUNT</th>\\n</tr>\\n<tr>\\n<td>3/4/2021</td>\\n<td>A123</td>\\n<td>Consulting Services</td>\\n<td>2</td>\\n<td>hours</td>\\n<td>$30.00</td>\\n<td>$6.00</td>\\n<td>$60.00</td>\\n</tr>\\n<tr>\\n<td>3/5/2021</td>\\n<td>B456</td>\\n<td>Document Fee</td>\\n<td>3</td>\\n<td></td>\\n<td>$10.00</td>\\n<td>$3.00</td>\\n<td>$30.00</td>\\n</tr>\\n<tr>\\n<td>3/6/2021</td>\\n<td>C789</td>\\n<td>Printing Fee</td>\\n<td>10</td>\\n<td>pages</td>\\n<td>$1.00</td>\\n<td>$1.00</td>\\n<td>$10.00</td>\\n</tr>\\n</table>\\n\\n\\n<table>\\n<tr>\\n<td>SUBTOTAL</td>\\n<td>$100.00</td>\\n</tr>\\n<tr>\\n<td>SALES TAX</td>\\n<td>$10.00</td>\\n</tr>\\n<tr>\\n<td>TOTAL</td>\\n<td>$110.00</td>\\n</tr>\\n<tr>\\n<td>PREVIOUS UNPAID BALANCE</td>\\n<td>$500.00</td>\\n</tr>\\n<tr>\\n<td>AMOUNT DUE</td>\\n<td>$610.00</td>\\n</tr>\\n</table>\\n\\n\\nTHANK YOU FOR YOUR BUSINESS!\\n\\nREMIT TO:\\n\\nContoso Billing\\n\\n123 Remit St\\n\\nNew York, NY, 10001\\n\",\n",
259-
" \"fields\": {\n",
260-
" \"VendorName\": {\n",
261-
" \"type\": \"string\",\n",
262-
" \"valueString\": \"CONTOSO LTD.\",\n",
263-
" \"spans\": [\n",
264-
" {\n",
265-
" \"offset\": 0,\n",
266-
" \"length\": 12\n",
267-
" }\n",
268-
" ],\n",
269-
" \"confidence\": 0.941,\n",
270-
" \"source\": \"D(1,0.5729,0.6582,2.3353,0.6582,2.3353,0.8957,0.5729,0.8957)\"\n",
271-
" },\n",
272-
" \"Items\": {\n",
273-
" \"type\": \"array\",\n",
274-
" \"valueArray\": [\n",
275-
" {\n",
276-
" \"type\": \"object\",\n",
277-
" \"valueObject\": {\n",
278-
" \"Description\": {\n",
279-
" \"type\": \"string\",\n",
280-
" \"valueString\": \"Consulting Services\",\n",
281-
" \"spans\": [\n",
282-
" {\n",
283-
" \"offset\": 909,\n",
284-
" \"length\": 19\n",
285-
" }\n",
286-
" ],\n",
287-
" \"confidence\": 0.971,\n",
288-
" \"source\": \"D(1,2.3264,5.673,3.6413,5.673,3.6413,5.8402,2.3264,5.8402)\"\n",
289-
" },\n",
290-
" \"Amount\": {\n",
291-
" \"type\": \"number\",\n",
292-
" \"valueNumber\": 60,\n",
293-
" \"spans\": [\n",
294-
" {\n",
295-
" \"offset\": 995,\n",
296-
" \"length\": 6\n",
297-
" }\n",
298-
" ],\n",
299-
" \"confidence\": 0.989,\n",
300-
" \"source\": \"D(1,7.4507,5.6684,7.9245,5.6684,7.9245,5.8323,7.4507,5.8323)\"\n",
301-
" }\n",
302-
" }\n",
303-
" },\n",
304-
" {\n",
305-
" \"type\": \"object\",\n",
306-
" \"valueObject\": {\n",
307-
" \"Description\": {\n",
308-
" \"type\": \"string\",\n",
309-
" \"valueString\": \"Document Fee\",\n",
310-
" \"spans\": [\n",
311-
" {\n",
312-
" \"offset\": 1054,\n",
313-
" \"length\": 12\n",
314-
" }\n",
315-
" ],\n",
316-
" \"confidence\": 0.984,\n",
317-
" \"source\": \"D(1,2.3255,5.9758,3.3258,5.9758,3.3258,6.1319,2.3255,6.1319)\"\n",
318-
" },\n",
319-
" \"Amount\": {\n",
320-
" \"type\": \"number\",\n",
321-
" \"valueNumber\": 30,\n",
322-
" \"spans\": [\n",
323-
" {\n",
324-
" \"offset\": 1128,\n",
325-
" \"length\": 6\n",
326-
" }\n",
327-
" ],\n",
328-
" \"confidence\": 0.992,\n",
329-
" \"source\": \"D(1,7.4519,5.9669,7.9245,5.9669,7.9245,6.1322,7.4519,6.1322)\"\n",
330-
" }\n",
331-
" }\n",
332-
" },\n",
333-
" {\n",
334-
" \"type\": \"object\",\n",
335-
" \"valueObject\": {\n",
336-
" \"Description\": {\n",
337-
" \"type\": \"string\",\n",
338-
" \"valueString\": \"Printing Fee\",\n",
339-
" \"spans\": [\n",
340-
" {\n",
341-
" \"offset\": 1187,\n",
342-
" \"length\": 12\n",
343-
" }\n",
344-
" ],\n",
345-
" \"confidence\": 0.978,\n",
346-
" \"source\": \"D(1,2.3234,6.2686,3.1488,6.2686,3.1488,6.4426,2.3234,6.4426)\"\n",
347-
" },\n",
348-
" \"Amount\": {\n",
349-
" \"type\": \"number\",\n",
350-
" \"valueNumber\": 10,\n",
351-
" \"spans\": [\n",
352-
" {\n",
353-
" \"offset\": 1266,\n",
354-
" \"length\": 6\n",
355-
" }\n",
356-
" ],\n",
357-
" \"confidence\": 0.997,\n",
358-
" \"source\": \"D(1,7.4516,6.2612,7.9245,6.2612,7.9245,6.4318,7.4516,6.4318)\"\n",
359-
" }\n",
360-
" }\n",
361-
" }\n",
362-
" ]\n",
363-
" }\n",
364-
" },\n",
365-
" \"kind\": \"document\",\n",
366-
" \"startPageNumber\": 1,\n",
367-
" \"endPageNumber\": 1,\n",
368-
" \"unit\": \"inch\",\n",
369-
" \"pages\": [\n",
370-
" {\n",
371-
" \"pageNumber\": 1,\n",
372-
" \"angle\": -0.0039,\n",
373-
" \"width\": 8.5,\n",
374-
" \"height\": 11\n",
375-
" }\n",
376-
" ]\n",
377-
" }\n",
378-
" ]\n",
379-
" }\n",
380-
"}\n"
381-
]
382-
}
383-
],
169+
"outputs": [],
384170
"source": [
385171
"response = client.begin_analyze(ANALYZER_ID, file_location=analyzer_sample_file_path)\n",
386172
"result = client.poll_result(response)\n",
@@ -398,27 +184,9 @@
398184
},
399185
{
400186
"cell_type": "code",
401-
"execution_count": 36,
187+
"execution_count": null,
402188
"metadata": {},
403-
"outputs": [
404-
{
405-
"name": "stderr",
406-
"output_type": "stream",
407-
"text": [
408-
"INFO:python.content_understanding_client:Analyzer field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557 deleted.\n"
409-
]
410-
},
411-
{
412-
"data": {
413-
"text/plain": [
414-
"<Response [204]>"
415-
]
416-
},
417-
"execution_count": 36,
418-
"metadata": {},
419-
"output_type": "execute_result"
420-
}
421-
],
189+
"outputs": [],
422190
"source": [
423191
"client.delete_analyzer(ANALYZER_ID)"
424192
]
@@ -440,7 +208,7 @@
440208
"name": "python",
441209
"nbconvert_exporter": "python",
442210
"pygments_lexer": "ipython3",
443-
"version": "3.11.11"
211+
"version": "3.11.10"
444212
}
445213
},
446214
"nbformat": 4,

0 commit comments

Comments
 (0)