|
50 | 50 | },
|
51 | 51 | {
|
52 | 52 | "cell_type": "code",
|
53 |
| - "execution_count": 31, |
| 53 | + "execution_count": 8, |
54 | 54 | "metadata": {},
|
55 | 55 | "outputs": [],
|
56 | 56 | "source": [
|
57 | 57 | "extraction_templates = {\n",
|
58 | 58 | " \"invoice\": ('../analyzer_templates/invoice.json', '../data/invoice.pdf' ),\n",
|
59 | 59 | " \"chart\": ('../analyzer_templates/image_chart.json', '../data/pieChart.jpg' ),\n",
|
60 |
| - " \"call_transcript\": ('../analyzer_templates/call_transcript.json', '../data/callCenterRecording.mp3'),\n", |
| 60 | + " \"call recording analytics\": ('../analyzer_templates/call_recording_analytics.json', '../data/callCenterRecording.mp3'),\n", |
| 61 | + " \"conversational audio analytics\": ('../analyzer_templates/conversational_audio_analytics.json', '../data/callCenterRecording.mp3'),\n", |
61 | 62 | " \"marketing_video\": ('../analyzer_templates/marketing_video.json', '../data/video.mp4' )\n",
|
62 | 63 | "}"
|
63 | 64 | ]
|
|
71 | 72 | },
|
72 | 73 | {
|
73 | 74 | "cell_type": "code",
|
74 |
| - "execution_count": 32, |
| 75 | + "execution_count": 9, |
75 | 76 | "metadata": {},
|
76 | 77 | "outputs": [],
|
77 | 78 | "source": [
|
|
137 | 138 | },
|
138 | 139 | {
|
139 | 140 | "cell_type": "code",
|
140 |
| - "execution_count": 34, |
| 141 | + "execution_count": null, |
141 | 142 | "metadata": {},
|
142 |
| - "outputs": [ |
143 |
| - { |
144 |
| - "name": "stderr", |
145 |
| - "output_type": "stream", |
146 |
| - "text": [ |
147 |
| - "INFO:python.content_understanding_client:Analyzer field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557 create request accepted.\n", |
148 |
| - "INFO:python.content_understanding_client:Request 9a5b5c93-916c-4206-9afe-6d4870f19bf9 in progress ...\n", |
149 |
| - "INFO:python.content_understanding_client:Request result is ready after 2.27 seconds.\n" |
150 |
| - ] |
151 |
| - }, |
152 |
| - { |
153 |
| - "name": "stdout", |
154 |
| - "output_type": "stream", |
155 |
| - "text": [ |
156 |
| - "{\n", |
157 |
| - " \"id\": \"9a5b5c93-916c-4206-9afe-6d4870f19bf9\",\n", |
158 |
| - " \"status\": \"Succeeded\",\n", |
159 |
| - " \"result\": {\n", |
160 |
| - " \"analyzerId\": \"field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557\",\n", |
161 |
| - " \"description\": \"Sample invoice analyzer\",\n", |
162 |
| - " \"createdAt\": \"2024-12-10T23:39:42Z\",\n", |
163 |
| - " \"lastModifiedAt\": \"2024-12-10T23:39:44Z\",\n", |
164 |
| - " \"config\": {\n", |
165 |
| - " \"returnDetails\": false,\n", |
166 |
| - " \"enableOcr\": true,\n", |
167 |
| - " \"enableLayout\": true,\n", |
168 |
| - " \"enableBarcode\": false,\n", |
169 |
| - " \"enableFormula\": false\n", |
170 |
| - " },\n", |
171 |
| - " \"fieldSchema\": {\n", |
172 |
| - " \"fields\": {\n", |
173 |
| - " \"VendorName\": {\n", |
174 |
| - " \"type\": \"string\",\n", |
175 |
| - " \"method\": \"extract\",\n", |
176 |
| - " \"description\": \"Vendor issuing the invoice\"\n", |
177 |
| - " },\n", |
178 |
| - " \"Items\": {\n", |
179 |
| - " \"type\": \"array\",\n", |
180 |
| - " \"method\": \"extract\",\n", |
181 |
| - " \"items\": {\n", |
182 |
| - " \"type\": \"object\",\n", |
183 |
| - " \"properties\": {\n", |
184 |
| - " \"Description\": {\n", |
185 |
| - " \"type\": \"string\",\n", |
186 |
| - " \"method\": \"extract\",\n", |
187 |
| - " \"description\": \"Description of the item\"\n", |
188 |
| - " },\n", |
189 |
| - " \"Amount\": {\n", |
190 |
| - " \"type\": \"number\",\n", |
191 |
| - " \"method\": \"extract\",\n", |
192 |
| - " \"description\": \"Amount of the item\"\n", |
193 |
| - " }\n", |
194 |
| - " }\n", |
195 |
| - " }\n", |
196 |
| - " }\n", |
197 |
| - " }\n", |
198 |
| - " },\n", |
199 |
| - " \"warnings\": [],\n", |
200 |
| - " \"status\": \"ready\",\n", |
201 |
| - " \"scenario\": \"document\"\n", |
202 |
| - " }\n", |
203 |
| - "}\n" |
204 |
| - ] |
205 |
| - } |
206 |
| - ], |
| 143 | + "outputs": [], |
207 | 144 | "source": [
|
208 | 145 | "response = client.begin_create_analyzer(ANALYZER_ID, analyzer_template_path=analyzer_template_path)\n",
|
209 | 146 | "result = client.poll_result(response)\n",
|
|
227 | 164 | },
|
228 | 165 | {
|
229 | 166 | "cell_type": "code",
|
230 |
| - "execution_count": 35, |
| 167 | + "execution_count": null, |
231 | 168 | "metadata": {},
|
232 |
| - "outputs": [ |
233 |
| - { |
234 |
| - "name": "stderr", |
235 |
| - "output_type": "stream", |
236 |
| - "text": [ |
237 |
| - "INFO:python.content_understanding_client:Analyzing file ../data/invoice.pdf with analyzer: field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557\n", |
238 |
| - "INFO:python.content_understanding_client:Request a0a20152-53a2-4608-8567-4be90dc37e39 in progress ...\n", |
239 |
| - "INFO:python.content_understanding_client:Request a0a20152-53a2-4608-8567-4be90dc37e39 in progress ...\n", |
240 |
| - "INFO:python.content_understanding_client:Request a0a20152-53a2-4608-8567-4be90dc37e39 in progress ...\n", |
241 |
| - "INFO:python.content_understanding_client:Request result is ready after 6.62 seconds.\n" |
242 |
| - ] |
243 |
| - }, |
244 |
| - { |
245 |
| - "name": "stdout", |
246 |
| - "output_type": "stream", |
247 |
| - "text": [ |
248 |
| - "{\n", |
249 |
| - " \"id\": \"a0a20152-53a2-4608-8567-4be90dc37e39\",\n", |
250 |
| - " \"status\": \"Succeeded\",\n", |
251 |
| - " \"result\": {\n", |
252 |
| - " \"analyzerId\": \"field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557\",\n", |
253 |
| - " \"apiVersion\": \"2024-12-01-preview\",\n", |
254 |
| - " \"createdAt\": \"2024-12-10T23:39:45Z\",\n", |
255 |
| - " \"warnings\": [],\n", |
256 |
| - " \"contents\": [\n", |
257 |
| - " {\n", |
258 |
| - " \"markdown\": \"CONTOSO LTD.\\n\\n\\n# INVOICE\\n\\nContoso Headquarters\\n123 456th St\\nNew York, NY, 10001\\n\\nINVOICE: INV-100\\n\\nINVOICE DATE: 11/15/2019\\n\\nDUE DATE: 12/15/2019\\n\\nCUSTOMER NAME: MICROSOFT CORPORATION\\n\\nSERVICE PERIOD: 10/14/2019 - 11/14/2019\\n\\nCUSTOMER ID: CID-12345\\n\\nMicrosoft Corp\\n123 Other St,\\nRedmond WA, 98052\\n\\nBILL TO:\\n\\nMicrosoft Finance\\n\\n123 Bill St,\\n\\nRedmond WA, 98052\\n\\nSHIP TO:\\n\\nMicrosoft Delivery\\n\\n123 Ship St,\\n\\nRedmond WA, 98052\\n\\nSERVICE ADDRESS:\\nMicrosoft Services\\n123 Service St,\\nRedmond WA, 98052\\n\\n\\n<table>\\n<tr>\\n<th>SALESPERSON</th>\\n<th>P.O. NUMBER</th>\\n<th>REQUISITIONER</th>\\n<th>SHIPPED VIA</th>\\n<th>F.O.B. POINT</th>\\n<th>TERMS</th>\\n</tr>\\n<tr>\\n<td></td>\\n<td>PO-3333</td>\\n<td></td>\\n<td></td>\\n<td></td>\\n<td></td>\\n</tr>\\n</table>\\n\\n\\n<table>\\n<tr>\\n<th>DATE</th>\\n<th>ITEM CODE</th>\\n<th>DESCRIPTION</th>\\n<th>QTY</th>\\n<th>UM</th>\\n<th>PRICE</th>\\n<th>TAX</th>\\n<th>AMOUNT</th>\\n</tr>\\n<tr>\\n<td>3/4/2021</td>\\n<td>A123</td>\\n<td>Consulting Services</td>\\n<td>2</td>\\n<td>hours</td>\\n<td>$30.00</td>\\n<td>$6.00</td>\\n<td>$60.00</td>\\n</tr>\\n<tr>\\n<td>3/5/2021</td>\\n<td>B456</td>\\n<td>Document Fee</td>\\n<td>3</td>\\n<td></td>\\n<td>$10.00</td>\\n<td>$3.00</td>\\n<td>$30.00</td>\\n</tr>\\n<tr>\\n<td>3/6/2021</td>\\n<td>C789</td>\\n<td>Printing Fee</td>\\n<td>10</td>\\n<td>pages</td>\\n<td>$1.00</td>\\n<td>$1.00</td>\\n<td>$10.00</td>\\n</tr>\\n</table>\\n\\n\\n<table>\\n<tr>\\n<td>SUBTOTAL</td>\\n<td>$100.00</td>\\n</tr>\\n<tr>\\n<td>SALES TAX</td>\\n<td>$10.00</td>\\n</tr>\\n<tr>\\n<td>TOTAL</td>\\n<td>$110.00</td>\\n</tr>\\n<tr>\\n<td>PREVIOUS UNPAID BALANCE</td>\\n<td>$500.00</td>\\n</tr>\\n<tr>\\n<td>AMOUNT DUE</td>\\n<td>$610.00</td>\\n</tr>\\n</table>\\n\\n\\nTHANK YOU FOR YOUR BUSINESS!\\n\\nREMIT TO:\\n\\nContoso Billing\\n\\n123 Remit St\\n\\nNew York, NY, 10001\\n\",\n", |
259 |
| - " \"fields\": {\n", |
260 |
| - " \"VendorName\": {\n", |
261 |
| - " \"type\": \"string\",\n", |
262 |
| - " \"valueString\": \"CONTOSO LTD.\",\n", |
263 |
| - " \"spans\": [\n", |
264 |
| - " {\n", |
265 |
| - " \"offset\": 0,\n", |
266 |
| - " \"length\": 12\n", |
267 |
| - " }\n", |
268 |
| - " ],\n", |
269 |
| - " \"confidence\": 0.941,\n", |
270 |
| - " \"source\": \"D(1,0.5729,0.6582,2.3353,0.6582,2.3353,0.8957,0.5729,0.8957)\"\n", |
271 |
| - " },\n", |
272 |
| - " \"Items\": {\n", |
273 |
| - " \"type\": \"array\",\n", |
274 |
| - " \"valueArray\": [\n", |
275 |
| - " {\n", |
276 |
| - " \"type\": \"object\",\n", |
277 |
| - " \"valueObject\": {\n", |
278 |
| - " \"Description\": {\n", |
279 |
| - " \"type\": \"string\",\n", |
280 |
| - " \"valueString\": \"Consulting Services\",\n", |
281 |
| - " \"spans\": [\n", |
282 |
| - " {\n", |
283 |
| - " \"offset\": 909,\n", |
284 |
| - " \"length\": 19\n", |
285 |
| - " }\n", |
286 |
| - " ],\n", |
287 |
| - " \"confidence\": 0.971,\n", |
288 |
| - " \"source\": \"D(1,2.3264,5.673,3.6413,5.673,3.6413,5.8402,2.3264,5.8402)\"\n", |
289 |
| - " },\n", |
290 |
| - " \"Amount\": {\n", |
291 |
| - " \"type\": \"number\",\n", |
292 |
| - " \"valueNumber\": 60,\n", |
293 |
| - " \"spans\": [\n", |
294 |
| - " {\n", |
295 |
| - " \"offset\": 995,\n", |
296 |
| - " \"length\": 6\n", |
297 |
| - " }\n", |
298 |
| - " ],\n", |
299 |
| - " \"confidence\": 0.989,\n", |
300 |
| - " \"source\": \"D(1,7.4507,5.6684,7.9245,5.6684,7.9245,5.8323,7.4507,5.8323)\"\n", |
301 |
| - " }\n", |
302 |
| - " }\n", |
303 |
| - " },\n", |
304 |
| - " {\n", |
305 |
| - " \"type\": \"object\",\n", |
306 |
| - " \"valueObject\": {\n", |
307 |
| - " \"Description\": {\n", |
308 |
| - " \"type\": \"string\",\n", |
309 |
| - " \"valueString\": \"Document Fee\",\n", |
310 |
| - " \"spans\": [\n", |
311 |
| - " {\n", |
312 |
| - " \"offset\": 1054,\n", |
313 |
| - " \"length\": 12\n", |
314 |
| - " }\n", |
315 |
| - " ],\n", |
316 |
| - " \"confidence\": 0.984,\n", |
317 |
| - " \"source\": \"D(1,2.3255,5.9758,3.3258,5.9758,3.3258,6.1319,2.3255,6.1319)\"\n", |
318 |
| - " },\n", |
319 |
| - " \"Amount\": {\n", |
320 |
| - " \"type\": \"number\",\n", |
321 |
| - " \"valueNumber\": 30,\n", |
322 |
| - " \"spans\": [\n", |
323 |
| - " {\n", |
324 |
| - " \"offset\": 1128,\n", |
325 |
| - " \"length\": 6\n", |
326 |
| - " }\n", |
327 |
| - " ],\n", |
328 |
| - " \"confidence\": 0.992,\n", |
329 |
| - " \"source\": \"D(1,7.4519,5.9669,7.9245,5.9669,7.9245,6.1322,7.4519,6.1322)\"\n", |
330 |
| - " }\n", |
331 |
| - " }\n", |
332 |
| - " },\n", |
333 |
| - " {\n", |
334 |
| - " \"type\": \"object\",\n", |
335 |
| - " \"valueObject\": {\n", |
336 |
| - " \"Description\": {\n", |
337 |
| - " \"type\": \"string\",\n", |
338 |
| - " \"valueString\": \"Printing Fee\",\n", |
339 |
| - " \"spans\": [\n", |
340 |
| - " {\n", |
341 |
| - " \"offset\": 1187,\n", |
342 |
| - " \"length\": 12\n", |
343 |
| - " }\n", |
344 |
| - " ],\n", |
345 |
| - " \"confidence\": 0.978,\n", |
346 |
| - " \"source\": \"D(1,2.3234,6.2686,3.1488,6.2686,3.1488,6.4426,2.3234,6.4426)\"\n", |
347 |
| - " },\n", |
348 |
| - " \"Amount\": {\n", |
349 |
| - " \"type\": \"number\",\n", |
350 |
| - " \"valueNumber\": 10,\n", |
351 |
| - " \"spans\": [\n", |
352 |
| - " {\n", |
353 |
| - " \"offset\": 1266,\n", |
354 |
| - " \"length\": 6\n", |
355 |
| - " }\n", |
356 |
| - " ],\n", |
357 |
| - " \"confidence\": 0.997,\n", |
358 |
| - " \"source\": \"D(1,7.4516,6.2612,7.9245,6.2612,7.9245,6.4318,7.4516,6.4318)\"\n", |
359 |
| - " }\n", |
360 |
| - " }\n", |
361 |
| - " }\n", |
362 |
| - " ]\n", |
363 |
| - " }\n", |
364 |
| - " },\n", |
365 |
| - " \"kind\": \"document\",\n", |
366 |
| - " \"startPageNumber\": 1,\n", |
367 |
| - " \"endPageNumber\": 1,\n", |
368 |
| - " \"unit\": \"inch\",\n", |
369 |
| - " \"pages\": [\n", |
370 |
| - " {\n", |
371 |
| - " \"pageNumber\": 1,\n", |
372 |
| - " \"angle\": -0.0039,\n", |
373 |
| - " \"width\": 8.5,\n", |
374 |
| - " \"height\": 11\n", |
375 |
| - " }\n", |
376 |
| - " ]\n", |
377 |
| - " }\n", |
378 |
| - " ]\n", |
379 |
| - " }\n", |
380 |
| - "}\n" |
381 |
| - ] |
382 |
| - } |
383 |
| - ], |
| 169 | + "outputs": [], |
384 | 170 | "source": [
|
385 | 171 | "response = client.begin_analyze(ANALYZER_ID, file_location=analyzer_sample_file_path)\n",
|
386 | 172 | "result = client.poll_result(response)\n",
|
|
398 | 184 | },
|
399 | 185 | {
|
400 | 186 | "cell_type": "code",
|
401 |
| - "execution_count": 36, |
| 187 | + "execution_count": null, |
402 | 188 | "metadata": {},
|
403 |
| - "outputs": [ |
404 |
| - { |
405 |
| - "name": "stderr", |
406 |
| - "output_type": "stream", |
407 |
| - "text": [ |
408 |
| - "INFO:python.content_understanding_client:Analyzer field-extraction-sample-7e48bcbc-c551-4e17-ba09-cea051ba1557 deleted.\n" |
409 |
| - ] |
410 |
| - }, |
411 |
| - { |
412 |
| - "data": { |
413 |
| - "text/plain": [ |
414 |
| - "<Response [204]>" |
415 |
| - ] |
416 |
| - }, |
417 |
| - "execution_count": 36, |
418 |
| - "metadata": {}, |
419 |
| - "output_type": "execute_result" |
420 |
| - } |
421 |
| - ], |
| 189 | + "outputs": [], |
422 | 190 | "source": [
|
423 | 191 | "client.delete_analyzer(ANALYZER_ID)"
|
424 | 192 | ]
|
|
440 | 208 | "name": "python",
|
441 | 209 | "nbconvert_exporter": "python",
|
442 | 210 | "pygments_lexer": "ipython3",
|
443 |
| - "version": "3.11.11" |
| 211 | + "version": "3.11.10" |
444 | 212 | }
|
445 | 213 | },
|
446 | 214 | "nbformat": 4,
|
|
0 commit comments