Skip to content

Commit a29178b

Browse files
add second sample
1 parent b23f551 commit a29178b

File tree

1 file changed

+76
-1
lines changed

1 file changed

+76
-1
lines changed

notebooks/field_extraction_pro_mode.ipynb

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@
241241
"with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
242242
" json.dump(result_json, file, indent=2)\n",
243243
"\n",
244-
"logging.info(\"Full analyzer result saved to:\")\n",
244+
"logging.info(f\"Full analyzer result saved to: {output_path}\")\n",
245245
"display(FileLink(output_path))"
246246
]
247247
},
@@ -285,6 +285,81 @@
285285
"source": [
286286
"client.delete_analyzer(CUSTOM_ANALYZER_ID)"
287287
]
288+
},
289+
{
290+
"cell_type": "markdown",
291+
"metadata": {},
292+
"source": [
293+
"## Bonus sample\n",
294+
"We would like to introduce another sample with multiple inputs."
295+
]
296+
},
297+
{
298+
"cell_type": "code",
299+
"execution_count": null,
300+
"metadata": {},
301+
"outputs": [],
302+
"source": [
303+
"# First, we need to set up variables for the second sample\n",
304+
"\n",
305+
"# Define paths for analyzer template, input documents, and reference documents of the second sample\n",
306+
"analyzer_template_2 = \"../analyzer_templates/insurance_claims_review_pro_mode.json\"\n",
307+
"input_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/input_docs\"\n",
308+
"reference_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/reference_docs\"\n",
309+
"\n",
310+
"# Load reference storage configuration from environment\n",
311+
"REFERENCE_DOC_SAS_URL_2 = os.getenv(\"REFERENCE_DOC_SAS_URL\") # Reuse the same blob container\n",
312+
"REFERENCE_DOC_PATH_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\" # NOTE: Use a different path for the second sample\n",
313+
"CUSTOM_ANALYZER_ID_2 = \"pro-mode-sample-\" + str(uuid.uuid4())\n",
314+
"\n",
315+
"# Let's try reference docuemnts with existing OCR results for the second sample\n",
316+
"logging.info(\"Start generating knowledge base for the second sample...\")\n",
317+
"await client.generate_knowledge_base_on_blob(reference_docs_2, REFERENCE_DOC_SAS_URL_2, REFERENCE_DOC_PATH_2, skip_analyze=True)\n",
318+
"\n",
319+
"# We can reuse previous AzureContentUnderstandingClient\n",
320+
"logging.info(\"Start creating analyzer for the second sample...\")\n",
321+
"response = client.begin_create_analyzer(\n",
322+
" CUSTOM_ANALYZER_ID_2,\n",
323+
" analyzer_template_path=analyzer_template_2,\n",
324+
" pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL_2,\n",
325+
" pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH_2,\n",
326+
")\n",
327+
"result = client.poll_result(response)\n",
328+
"if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n",
329+
" logging.info(f\"Analyzer details for {result['result']['analyzerId']}\")\n",
330+
" logging.info(json.dumps(result, indent=2))\n",
331+
"else:\n",
332+
" logging.warning(\n",
333+
" \"An issue was encountered when trying to create the analyzer. \"\n",
334+
" \"Please double-check your deployment and configurations for potential problems.\"\n",
335+
" )\n",
336+
"\n",
337+
"# Analyze the multiple input documents with the second analyzer\n",
338+
"logging.info(\"Start analyzing input documents for the second sample...\")\n",
339+
"response = client.begin_analyze(CUSTOM_ANALYZER_ID_2, file_location=input_docs_2)\n",
340+
"result_json = client.poll_result(response, timeout_seconds=600) # set a longer timeout for pro mode\n",
341+
"\n",
342+
"# Save the result to a JSON file\n",
343+
"# Create the output directory if it doesn't exist\n",
344+
"output_dir = \"output\"\n",
345+
"os.makedirs(output_dir, exist_ok=True)\n",
346+
"output_path = os.path.join(output_dir, f\"{CUSTOM_ANALYZER_ID_2}_result.json\")\n",
347+
"with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
348+
" json.dump(result_json, file, indent=2)\n",
349+
"\n",
350+
"logging.info(f\"Full analyzer result saved to: {output_path}\")\n",
351+
"display(FileLink(output_path))\n"
352+
]
353+
},
354+
{
355+
"cell_type": "code",
356+
"execution_count": null,
357+
"metadata": {},
358+
"outputs": [],
359+
"source": [
360+
"# [Optional] Delete the analyzer for second sample after use\n",
361+
"client.delete_analyzer(CUSTOM_ANALYZER_ID_2)"
362+
]
288363
}
289364
],
290365
"metadata": {

0 commit comments

Comments
 (0)