|
241 | 241 | "with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
|
242 | 242 | " json.dump(result_json, file, indent=2)\n",
|
243 | 243 | "\n",
|
244 |
| - "logging.info(\"Full analyzer result saved to:\")\n", |
| 244 | + "logging.info(f\"Full analyzer result saved to: {output_path}\")\n", |
245 | 245 | "display(FileLink(output_path))"
|
246 | 246 | ]
|
247 | 247 | },
|
|
285 | 285 | "source": [
|
286 | 286 | "client.delete_analyzer(CUSTOM_ANALYZER_ID)"
|
287 | 287 | ]
|
| 288 | + }, |
| 289 | + { |
| 290 | + "cell_type": "markdown", |
| 291 | + "metadata": {}, |
| 292 | + "source": [ |
| 293 | + "## Bonus sample\n", |
| 294 | + "We would like to introduce another sample with multiple inputs." |
| 295 | + ] |
| 296 | + }, |
| 297 | + { |
| 298 | + "cell_type": "code", |
| 299 | + "execution_count": null, |
| 300 | + "metadata": {}, |
| 301 | + "outputs": [], |
| 302 | + "source": [ |
| 303 | + "# First, we need to set up variables for the second sample\n", |
| 304 | + "\n", |
| 305 | + "# Define paths for analyzer template, input documents, and reference documents of the second sample\n", |
| 306 | + "analyzer_template_2 = \"../analyzer_templates/insurance_claims_review_pro_mode.json\"\n", |
| 307 | + "input_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/input_docs\"\n", |
| 308 | + "reference_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/reference_docs\"\n", |
| 309 | + "\n", |
| 310 | + "# Load reference storage configuration from environment\n", |
| 311 | + "REFERENCE_DOC_SAS_URL_2 = os.getenv(\"REFERENCE_DOC_SAS_URL\") # Reuse the same blob container\n", |
| 312 | + "REFERENCE_DOC_PATH_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\" # NOTE: Use a different path for the second sample\n", |
| 313 | + "CUSTOM_ANALYZER_ID_2 = \"pro-mode-sample-\" + str(uuid.uuid4())\n", |
| 314 | + "\n", |
| 315 | + "# Let's try reference docuemnts with existing OCR results for the second sample\n", |
| 316 | + "logging.info(\"Start generating knowledge base for the second sample...\")\n", |
| 317 | + "await client.generate_knowledge_base_on_blob(reference_docs_2, REFERENCE_DOC_SAS_URL_2, REFERENCE_DOC_PATH_2, skip_analyze=True)\n", |
| 318 | + "\n", |
| 319 | + "# We can reuse previous AzureContentUnderstandingClient\n", |
| 320 | + "logging.info(\"Start creating analyzer for the second sample...\")\n", |
| 321 | + "response = client.begin_create_analyzer(\n", |
| 322 | + " CUSTOM_ANALYZER_ID_2,\n", |
| 323 | + " analyzer_template_path=analyzer_template_2,\n", |
| 324 | + " pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL_2,\n", |
| 325 | + " pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH_2,\n", |
| 326 | + ")\n", |
| 327 | + "result = client.poll_result(response)\n", |
| 328 | + "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n", |
| 329 | + " logging.info(f\"Analyzer details for {result['result']['analyzerId']}\")\n", |
| 330 | + " logging.info(json.dumps(result, indent=2))\n", |
| 331 | + "else:\n", |
| 332 | + " logging.warning(\n", |
| 333 | + " \"An issue was encountered when trying to create the analyzer. \"\n", |
| 334 | + " \"Please double-check your deployment and configurations for potential problems.\"\n", |
| 335 | + " )\n", |
| 336 | + "\n", |
| 337 | + "# Analyze the multiple input documents with the second analyzer\n", |
| 338 | + "logging.info(\"Start analyzing input documents for the second sample...\")\n", |
| 339 | + "response = client.begin_analyze(CUSTOM_ANALYZER_ID_2, file_location=input_docs_2)\n", |
| 340 | + "result_json = client.poll_result(response, timeout_seconds=600) # set a longer timeout for pro mode\n", |
| 341 | + "\n", |
| 342 | + "# Save the result to a JSON file\n", |
| 343 | + "# Create the output directory if it doesn't exist\n", |
| 344 | + "output_dir = \"output\"\n", |
| 345 | + "os.makedirs(output_dir, exist_ok=True)\n", |
| 346 | + "output_path = os.path.join(output_dir, f\"{CUSTOM_ANALYZER_ID_2}_result.json\")\n", |
| 347 | + "with open(output_path, \"w\", encoding=\"utf-8\") as file:\n", |
| 348 | + " json.dump(result_json, file, indent=2)\n", |
| 349 | + "\n", |
| 350 | + "logging.info(f\"Full analyzer result saved to: {output_path}\")\n", |
| 351 | + "display(FileLink(output_path))\n" |
| 352 | + ] |
| 353 | + }, |
| 354 | + { |
| 355 | + "cell_type": "code", |
| 356 | + "execution_count": null, |
| 357 | + "metadata": {}, |
| 358 | + "outputs": [], |
| 359 | + "source": [ |
| 360 | + "# [Optional] Delete the analyzer for second sample after use\n", |
| 361 | + "client.delete_analyzer(CUSTOM_ANALYZER_ID_2)" |
| 362 | + ] |
288 | 363 | }
|
289 | 364 | ],
|
290 | 365 | "metadata": {
|
|
0 commit comments