Azure-Samples
diff --git a/‎data/MS_Annual_Report_2024.pdf
-132 Bytes b/‎data/MS_Annual_Report_2024.pdf
-132 Bytes
diff --git a/‎data/mixed_financial_docs.pdf
131 Bytes b/‎data/mixed_financial_docs.pdf
131 Bytes
diff --git a/‎notebooks/classifier.ipynb
Lines changed: 40 additions & 36 deletions b/‎notebooks/classifier.ipynb
Lines changed: 40 additions & 36 deletions
@@ -108,7 +108,7 @@
     "# IMPORTANT: Replace with your actual subscription key or set up in \".env\" file if not using token auth\n",
     "AZURE_AI_API_KEY = os.getenv(\"AZURE_AI_API_KEY\")\n",
     "AZURE_AI_API_VERSION = os.getenv(\"AZURE_AI_API_VERSION\", \"2025-05-01-preview\")\n",
-    "ANALYZER_SAMPLE_FILE = \"../data/MS_Annual_Report_2024.pdf\"  # Update this path to your PDF file\n",
+    "ANALYZER_SAMPLE_FILE = \"../data/mixed_financial_docs.pdf\"  # Update this path to your PDF file\n",
     "\n",
     "# Authentication - Using DefaultAzureCredential for token-based auth\n",
     "credential = DefaultAzureCredential()\n",
@@ -151,18 +151,15 @@
     "# Define document categories and their descriptions\n",
     "classifier_schema = {\n",
     "    \"categories\": {\n",
-    "        \"Executive Summary & Strategy\": {\n",
-    "            \"description\": \"Leadership messages, strategic vision, and future outlook.\"\n",
+    "        \"Loan application\": {\n",
+    "            \"description\": \"Documents submitted by individuals or businesses to request funding, typically including personal or business details, financial history, loan amount, purpose, and supporting documentation.\"\n",
     "        },\n",
-    "        \"Financial Performance & Segment Reporting\": {\n",
-    "            \"description\": \"Overall financial results and detailed performance by business units.\"\n",
+    "        \"Invoice\": {\n",
+    "            \"description\": \"Billing documents issued by sellers or service providers to request payment for goods or services, detailing items, prices, taxes, totals, and payment terms.\"\n",
     "        },\n",
-    "        \"Operations & Corporate Governance\": {\n",
-    "            \"description\": \"Business operations, governance structure, and risk management.\"\n",
+    "        \"Bank_Statement\": {\n",
+    "            \"description\": \"Official statements issued by banks that summarize account activity over a period, including deposits, withdrawals, fees, and balances.\"\n",
     "        },\n",
-    "        \"Shareholder Information & Relations\": {\n",
-    "            \"description\": \"Annual meeting details, stock information, and shareholder services.\"\n",
-    "        }\n",
     "    },\n",
     "    \"splitMode\": \"auto\"  # IMPORTANT: Automatically detect document boundaries. Can change mode for your needs.\n",
     "}\n",
@@ -331,7 +328,7 @@
     "\n",
     "Now let's create a custom analyzer that can extract specific fields from documents.\n",
     "This analyzer will:\n",
-    "- Extract visit dates from medical documents\n",
+    "- Extract common fields from loan application documents\n",
     "- Generate document excerpts"
    ]
   },
@@ -343,7 +340,7 @@
    "source": [
     "# Define analyzer schema with custom fields\n",
     "analyzer_schema = {\n",
-    "    \"description\": \"Medical encounter analyzer - extracts key information from medical records\",\n",
+    "    \"description\": \"Loan application analyzer - extracts key information from loan application\",\n",
     "    \"baseAnalyzerId\": \"prebuilt-documentAnalyzer\",  # Built on top of the general document analyzer\n",
     "    \"config\": {\n",
     "        \"returnDetails\": True,\n",
@@ -355,37 +352,42 @@
     "    },\n",
     "    \"fieldSchema\": {\n",
     "        \"fields\": {\n",
-    "            \"ReportDate\": {\n",
+    "            \"ApplicationDate\": {\n",
     "                \"type\": \"date\",\n",
     "                \"method\": \"generate\",\n",
-    "                \"description\": \"The publication or filing date of the annual report.\"\n",
+    "                \"description\": \"The date when the loan application was submitted.\"\n",
     "            },\n",
-    "            \"CompanyName\": {\n",
+    "            \"ApplicantName\": {\n",
     "                \"type\": \"string\",\n",
     "                \"method\": \"generate\",\n",
-    "                \"description\": \"The name of the company issuing the report.\"\n",
+    "                \"description\": \"The full name of the loan applicant or company.\"\n",
+    "            },\n",
+    "            \"LoanAmountRequested\": {\n",
+    "                \"type\": \"number\",\n",
+    "                \"method\": \"generate\",\n",
+    "                \"description\": \"The total amount of loan money requested by the applicant.\"\n",
     "            },\n",
-    "            \"FiscalYear\": {\n",
+    "            \"LoanPurpose\": {\n",
     "                \"type\": \"string\",\n",
     "                \"method\": \"generate\",\n",
-    "                \"description\": \"The fiscal year the report covers.\"\n",
+    "                \"description\": \"The stated purpose or reason for the loan.\"\n",
     "            },\n",
-    "            \"NetIncome\": {\n",
+    "            \"CreditScore\": {\n",
     "                \"type\": \"number\",\n",
     "                \"method\": \"generate\",\n",
-    "                \"description\": \"Net income or profit reported for the fiscal year.\"\n",
+    "                \"description\": \"The credit score of the applicant, if available.\"\n",
     "            },\n",
     "            \"Summary\": {\n",
     "                \"type\": \"string\",\n",
     "                \"method\": \"generate\",\n",
-    "                \"description\": \"Brief summary of the annual report\"\n",
+    "                \"description\": \"A brief overview of the loan application details.\"\n",
     "            }\n",
     "        }\n",
     "    }\n",
     "}\n",
     "\n",
     "# Generate unique analyzer ID\n",
-    "analyzer_id = \"analyzer-medical-\" + str(uuid.uuid4())\n",
+    "analyzer_id = \"analyzer-loan-application-\" + str(uuid.uuid4())\n",
     "\n",
     "# Create the analyzer\n",
     "try:\n",
@@ -411,7 +413,7 @@
    "source": [
     "## 10. Create an Enhanced Classifier with Custom Analyzer\n",
     "\n",
-    "Now we'll create a new classifier that uses our custom analyzer for medical documents.\n",
+    "Now we'll create a new classifier that uses the prebuilt invoice analyzer for invoices and our custom analyzer for loan application documents.\n",
     "This combines classification with field extraction in one operation."
    ]
   },
@@ -427,16 +429,17 @@
     "# Create enhanced classifier schema\n",
     "enhanced_classifier_schema = {\n",
     "    \"categories\": {\n",
-    "        \"Legal\": {\n",
-    "            \"description\": \"Legal documents including subpoenas, declarations, contracts, and other legal paperwork.\"\n",
-    "            # No analyzer specified - uses default processing\n",
+    "        \"Loan application\": {\n",
+    "            \"description\": \"Documents submitted by individuals or businesses to request funding, typically including personal or business details, financial history, loan amount, purpose, and supporting documentation.\",\n",
+    "            \"analyzerId\": analyzer_id  # IMPORTANT: Use created custom analyzer in previous step for load applications\n",
     "        },\n",
-    "        \"Annual Report\": {\n",
-    "            \"description\": \"Each document must ends with 'end of encounter'. Dont rely on page numbers\",\n",
-    "            \"analyzerId\": analyzer_id  # IMPORTANT: Use created custom analyzer in previous step for annual reports\n",
+    "        \"Invoice\": {\n",
+    "            \"description\": \"Billing documents issued by sellers or service providers to request payment for goods or services, detailing items, prices, taxes, totals, and payment terms.\",\n",
+    "            \"analyzerId\": \"prebuilt-invoice\"  # IMPORTANT: Use created custom analyzer in previous step for annual reports\n",
     "        },\n",
-    "        \"Declaration_of_custodian\": {\n",
-    "            \"description\": \"Declarations of custodian documents, often used in legal contexts.\"\n",
+    "        \"Bank_Statement\": {\n",
+    "            \"description\": \"Official statements issued by banks that summarize account activity over a period, including deposits, withdrawals, fees, and balances.\"\n",
+    "            # No analyzer specified - uses default processing\n",
     "        }\n",
     "    },\n",
     "    \"splitMode\": \"auto\"\n",
@@ -447,8 +450,9 @@
     "    try:\n",
     "        print(f\"🔨 Creating enhanced classifier: {enhanced_classifier_id}\")\n",
     "        print(\"\\n📋 Configuration:\")\n",
-    "        print(\"   • Legal documents → Standard processing\")\n",
-    "        print(\"   • Medical documents → Custom analyzer with field extraction\")\n",
+    "        print(\"   • Loan application documents → Custom analyzer with field extraction\")\n",
+    "        print(\"   • Invoice documents → Prebuilt invoice analyzer\")\n",
+    "        print(\"   • Bank_Statement documents → Standard processing\")\n",
     "        \n",
     "        response = content_understanding_client.begin_create_classifier(enhanced_classifier_id, enhanced_classifier_schema)\n",
     "        result = content_understanding_client.poll_result(response)\n",
@@ -468,7 +472,7 @@
     "## 11. Process Document with Enhanced Classifier\n",
     "\n",
     "Let's process the document again using our enhanced classifier.\n",
-    "Medical documents will now have additional fields extracted."
+    "Invoices and loan application documents will now have additional fields extracted."
    ]
   },
   {
@@ -505,7 +509,7 @@
    "source": [
     "## 12. View Enhanced Results with Extracted Fields\n",
     "\n",
-    "Let's see the classification results along with the extracted fields from medical documents."
+    "Let's see the classification results along with the extracted fields from loan application documents."
    ]
   },
   {
@@ -533,7 +537,7 @@
     "        print(f\"\\n📁 Category: {category}\")\n",
     "        print(f\"📄 Pages: {content.get('startPageNumber', '?')} - {content.get('endPageNumber', '?')}\")\n",
     "        \n",
-    "        # Show extracted fields for medical documents\n",
+    "        # Show extracted fields from field extraction\n",
     "        fields = content.get('fields', {})\n",
     "        if fields:\n",
     "            print(\"\\n🔍 Extracted Information:\")\n",