add second sample

chienyuanchang · chienyuanchang · commit a29178b8a11f · 2025-07-18T22:01:33.000Z
diff --git a/notebooks/field_extraction_pro_mode.ipynb b/notebooks/field_extraction_pro_mode.ipynb
@@ -241,7 +241,7 @@
     "with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
     "    json.dump(result_json, file, indent=2)\n",
     "\n",
-    "logging.info(\"Full analyzer result saved to:\")\n",
+    "logging.info(f\"Full analyzer result saved to: {output_path}\")\n",
     "display(FileLink(output_path))"
    ]
   },
@@ -285,6 +285,81 @@
    "source": [
     "client.delete_analyzer(CUSTOM_ANALYZER_ID)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Bonus sample\n",
+    "We would like to introduce another sample with multiple inputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First, we need to set up variables for the second sample\n",
+    "\n",
+    "# Define paths for analyzer template, input documents, and reference documents of the second sample\n",
+    "analyzer_template_2 = \"../analyzer_templates/insurance_claims_review_pro_mode.json\"\n",
+    "input_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/input_docs\"\n",
+    "reference_docs_2 = \"../data/field_extraction_pro_mode/insurance_claims_review/reference_docs\"\n",
+    "\n",
+    "# Load reference storage configuration from environment\n",
+    "REFERENCE_DOC_SAS_URL_2 = os.getenv(\"REFERENCE_DOC_SAS_URL\")  # Reuse the same blob container\n",
+    "REFERENCE_DOC_PATH_2 = os.getenv(\"REFERENCE_DOC_PATH\").rstrip(\"/\") + \"_2/\"  # NOTE: Use a different path for the second sample\n",
+    "CUSTOM_ANALYZER_ID_2 = \"pro-mode-sample-\" + str(uuid.uuid4())\n",
+    "\n",
+    "# Let's try reference docuemnts with existing OCR results for the second sample\n",
+    "logging.info(\"Start generating knowledge base for the second sample...\")\n",
+    "await client.generate_knowledge_base_on_blob(reference_docs_2, REFERENCE_DOC_SAS_URL_2, REFERENCE_DOC_PATH_2, skip_analyze=True)\n",
+    "\n",
+    "# We can reuse previous AzureContentUnderstandingClient\n",
+    "logging.info(\"Start creating analyzer for the second sample...\")\n",
+    "response = client.begin_create_analyzer(\n",
+    "    CUSTOM_ANALYZER_ID_2,\n",
+    "    analyzer_template_path=analyzer_template_2,\n",
+    "    pro_mode_reference_docs_storage_container_sas_url=REFERENCE_DOC_SAS_URL_2,\n",
+    "    pro_mode_reference_docs_storage_container_path_prefix=REFERENCE_DOC_PATH_2,\n",
+    ")\n",
+    "result = client.poll_result(response)\n",
+    "if result is not None and \"status\" in result and result[\"status\"] == \"Succeeded\":\n",
+    "    logging.info(f\"Analyzer details for {result['result']['analyzerId']}\")\n",
+    "    logging.info(json.dumps(result, indent=2))\n",
+    "else:\n",
+    "    logging.warning(\n",
+    "        \"An issue was encountered when trying to create the analyzer. \"\n",
+    "        \"Please double-check your deployment and configurations for potential problems.\"\n",
+    "    )\n",
+    "\n",
+    "# Analyze the multiple input documents with the second analyzer\n",
+    "logging.info(\"Start analyzing input documents for the second sample...\")\n",
+    "response = client.begin_analyze(CUSTOM_ANALYZER_ID_2, file_location=input_docs_2)\n",
+    "result_json = client.poll_result(response, timeout_seconds=600)  # set a longer timeout for pro mode\n",
+    "\n",
+    "# Save the result to a JSON file\n",
+    "# Create the output directory if it doesn't exist\n",
+    "output_dir = \"output\"\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "output_path = os.path.join(output_dir, f\"{CUSTOM_ANALYZER_ID_2}_result.json\")\n",
+    "with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "    json.dump(result_json, file, indent=2)\n",
+    "\n",
+    "logging.info(f\"Full analyzer result saved to: {output_path}\")\n",
+    "display(FileLink(output_path))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# [Optional] Delete the analyzer for second sample after use\n",
+    "client.delete_analyzer(CUSTOM_ANALYZER_ID_2)"
+   ]
   }
  ],
  "metadata": {