diff --git a/README.md b/README.md
index 58ac866..210e09c 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Demo: PDF Layout Extraction with Doc Intelligence
Supporting Multiple Document Versions with Visual Selection Cues (full-code approach)
+# Demo: PDF Layout Extraction with Doc Intelligence (full-code approach)
`Azure Storage + Document Intelligence + Function App + Cosmos DB`
@@ -8,16 +8,9 @@ Costa Rica
[](https://github.com/)
[brown9804](https://github.com/brown9804)
-Last updated: 2025-07-21
+Last updated: 2025-07-16
------------------------------
-
-> This solution is designed to be flexible and robust, supporting multiple versions of PDF documents with varying layouts—including those that use visual selection cues such as gray fills, hand-drawn Xs, checkmarks, or circles. By building on the [PDFs-Layouts-Processing-Fapp-DocIntelligence](https://github.com/MicrosoftCloudEssentials-LearningHub/PDFs-Layouts-Processing-Fapp-DocIntelligence) repository, we ensure that:
-
-- Table structure and text are extracted using Azure Document Intelligence (Layout model).
-- Visual selection cues are detected using Azure AI Vision or image preprocessing.
-- Visual indicators are mapped to structured data, returning only the selected values in a clean JSON format.
-- The logic is abstracted to support multiple layout variations, so the system adapts easily to new document formats and selection styles.
+----------
> [!IMPORTANT]
> This example is based on a `public network site and is intended for demonstration purposes only`. It showcases how several Azure resources can work together to achieve the desired result. Consider the section below about [Important Considerations for Production Environment](#important-considerations-for-production-environment). Please note that `these demos are intended as a guide and are based on my personal experiences. For official guidance, support, or more detailed information, please refer to Microsoft's official documentation or contact Microsoft directly`: [Microsoft Sales and Support](https://support.microsoft.com/contactus?ContactUsExperienceEntryPointAssetId=S.HP.SMC-HOME)
diff --git a/docs/automated-PDFLayoutprocessing-FunctionApp-DocIntellig-AI-Vision.drawio b/docs/automatedPDFLayoutprocessingFunctionAppDocIntellig.drawio
similarity index 81%
rename from docs/automated-PDFLayoutprocessing-FunctionApp-DocIntellig-AI-Vision.drawio
rename to docs/automatedPDFLayoutprocessingFunctionAppDocIntellig.drawio
index b05d1f0..8a5e0fc 100644
--- a/docs/automated-PDFLayoutprocessing-FunctionApp-DocIntellig-AI-Vision.drawio
+++ b/docs/automatedPDFLayoutprocessingFunctionAppDocIntellig.drawio
@@ -1,105 +1,95 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/function_app.py b/src/function_app.py
index 2efed7e..370c713 100644
--- a/src/function_app.py
+++ b/src/function_app.py
@@ -8,12 +8,6 @@
import uuid
import json
-# For image conversion and vision API
-from typing import List
-from io import BytesIO
-import requests # For REST API to Vision
-from pdf2image import convert_from_bytes # For PDF to image conversion
-
app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION)
## DEFINITIONS
@@ -41,14 +35,13 @@ def analyze_pdf(form_recognizer_client, pdf_bytes):
logging.info(f"Document has {len(result.pages)} page(s), {len(result.tables)} table(s), and {len(result.styles)} style(s).")
return result
-def extract_layout_data(result, visual_cues: List[dict] = None):
+def extract_layout_data(result):
logging.info("Extracting layout data from analysis result.")
layout_data = {
"id": str(uuid.uuid4()),
"pages": []
}
- visual_cues = visual_cues or [] # List of dicts with visual cue info per cell
# Log styles
for idx, style in enumerate(result.styles):
@@ -95,16 +88,12 @@ def extract_layout_data(result, visual_cues: List[dict] = None):
for cell in table.cells:
content = cell.content.strip()
- # Find matching visual cue for this cell (if any)
- cue = next((vc for vc in visual_cues if vc.get("page_number") == page.page_number and vc.get("row_index") == cell.row_index and vc.get("column_index") == cell.column_index), None)
- cell_info = {
+ table_data["cells"].append({
"row_index": cell.row_index,
"column_index": cell.column_index,
- "content": content,
- "visual_cue": cue["cue_type"] if cue else None
- }
- table_data["cells"].append(cell_info)
- logging.info(f"Cell[{cell.row_index}][{cell.column_index}]: '{content}', visual_cue: {cell_info['visual_cue']}")
+ "content": content
+ })
+ logging.info(f"Cell[{cell.row_index}][{cell.column_index}]: '{content}'")
page_data["tables"].append(table_data)
@@ -167,31 +156,6 @@ def save_layout_data_to_cosmos(layout_data):
## MAIN
@app.blob_trigger(arg_name="myblob", path="pdfinvoices/{name}",
connection="invoicecontosostorage_STORAGE")
-def call_vision_api(image_bytes, subscription_key, endpoint):
- vision_url = endpoint + "/vision/v3.2/analyze"
- headers = {
- 'Ocp-Apim-Subscription-Key': subscription_key,
- 'Content-Type': 'application/octet-stream'
- }
- params = {
- 'visualFeatures': 'Objects,Color', # Add more features if needed
- }
- response = requests.post(vision_url, headers=headers, params=params, data=image_bytes)
- response.raise_for_status()
- return response.json()
-
-def extract_visual_cues_from_vision(vision_result, page_number):
- # Example: Detect gray fills, checkmarks, hand-drawn marks
- cues = []
- # This is a placeholder. You need to parse vision_result for your cues.
- # For example, if vision_result['objects'] contains a 'checkmark' or color info for gray fill
- # cues.append({"page_number": page_number, "row_index": ..., "column_index": ..., "cue_type": "gray_fill"})
- return cues
-
-def convert_pdf_to_images(pdf_bytes):
- images = convert_from_bytes(pdf_bytes)
- return images
-
def BlobTriggerContosoPDFLayoutsDocIntelligence(myblob: func.InputStream):
logging.info(f"Python blob trigger function processed blob\n"
f"Name: {myblob.name}\n"
@@ -212,26 +176,9 @@ def BlobTriggerContosoPDFLayoutsDocIntelligence(myblob: func.InputStream):
logging.error(f"Error analyzing PDF: {e}")
return
- # --- Step: Convert PDF to image and call Azure AI Vision ---
- visual_cues = []
- try:
- images = convert_pdf_to_images(pdf_bytes)
- vision_key = os.getenv("VISION_API_KEY")
- vision_endpoint = os.getenv("VISION_API_ENDPOINT")
- for page_num, image in enumerate(images, start=1):
- img_bytes_io = BytesIO()
- image.save(img_bytes_io, format='JPEG')
- img_bytes = img_bytes_io.getvalue()
- vision_result = call_vision_api(img_bytes, vision_key, vision_endpoint)
- cues = extract_visual_cues_from_vision(vision_result, page_num)
- visual_cues.extend(cues)
- logging.info(f"Visual cues extracted: {visual_cues}")
- except Exception as e:
- logging.error(f"Error processing visual cues with AI Vision: {e}")
-
try:
- layout_data = extract_layout_data(result, visual_cues)
- logging.info("Successfully extracted and merged layout data.")
+ layout_data = extract_layout_data(result)
+ logging.info("Successfully extracted layout data.")
except Exception as e:
logging.error(f"Error extracting layout data: {e}")
return
diff --git a/terraform-infrastructure/main.tf b/terraform-infrastructure/main.tf
index e476224..956b5cb 100644
--- a/terraform-infrastructure/main.tf
+++ b/terraform-infrastructure/main.tf
@@ -289,7 +289,6 @@ resource "azurerm_role_assignment" "contributor" {
]
}
-
# Azure Form Recognizer (Document Intelligence)
resource "azurerm_cognitive_account" "form_recognizer" {
name = var.form_recognizer_name
@@ -300,27 +299,12 @@ resource "azurerm_cognitive_account" "form_recognizer" {
depends_on = [azurerm_resource_group.rg]
+ # Output the Form Recognizer name
provisioner "local-exec" {
command = "echo Form Recognizer: ${self.name}"
}
}
-# Azure AI Vision (Cognitive Services)
-resource "azurerm_cognitive_account" "ai_vision" {
- name = var.ai_vision_name
- location = azurerm_resource_group.rg.location
- resource_group_name = azurerm_resource_group.rg.name
- kind = "CognitiveServices"
- sku_name = var.ai_vision_sku
- tags = var.ai_vision_tags
-
- depends_on = [azurerm_resource_group.rg]
-
- provisioner "local-exec" {
- command = "echo AI Vision: ${self.name}"
- }
-}
-
# We need to assign custom or built-in Cosmos DB SQL roles
# (like Cosmos DB Built-in Data Reader, etc.) at the data plane level,
# which is not currently supported directly in Terraform as of now.
@@ -389,10 +373,6 @@ resource "azurerm_linux_function_app" "function_app" {
"APPINSIGHTS_INSTRUMENTATIONKEY" = azurerm_application_insights.appinsights.instrumentation_key
"APPLICATIONINSIGHTS_CONNECTION_STRING" = azurerm_application_insights.appinsights.connection_string
-
- # Azure AI Vision settings
- "VISION_API_ENDPOINT" = azurerm_cognitive_account.ai_vision.endpoint
- "VISION_API_KEY" = azurerm_cognitive_account.ai_vision.primary_access_key
}
depends_on = [
diff --git a/terraform-infrastructure/terraform.tfvars b/terraform-infrastructure/terraform.tfvars
index 2f978b5..12a8bea 100644
--- a/terraform-infrastructure/terraform.tfvars
+++ b/terraform-infrastructure/terraform.tfvars
@@ -1,30 +1,21 @@
# Sample values
-subscription_id = "407f4106-0fd3-42e0-9348-3686dd1e7347" # "your-subscription_id"
-resource_group_name = "RG-PDFLayout-Processing-DocIntelligence" # "your-resource-group-name"
-location = "West US" # "your-location"
+subscription_id = "" # "your-subscription_id"
+resource_group_name = "RG-PDFLayout-Processing-DocIntelligence" # "your-resource-group-name"
+location = "West US" # "your-location"
# Storage Account
-storage_account_name = "storageaccbrownpdfix2" # "your-storage-account-name"
+storage_account_name = "storageaccbrownpdfix2" # "your-storage-account-name"
storage_account_name_runtime = "runtimestorebrownix2" # "your-runtime-storage-account-name"
# Function App
-function_app_name = "fapdfbrownix2" # "your-function-app-name"
+function_app_name = "fapdfbrownix2" # "your-function-app-name"
# App Service Plan
app_service_plan_name = "asppdfbrownix2" # "your-app-service-plan-name"
# Application Insights
-app_insights_name = "apppdfbrownix2" # "your-app-insights-name"
+app_insights_name = "apppdfbrownix2" # "your-app-insights-name"
# Log Analytics Workspace
log_analytics_workspace_name = "logwspdfbrownix2" # "your-log-analytics-workspace-name"
# Key Vault
-key_vault_name = "kvpdfrbrownrix2" # "your-key-vault-name"
+key_vault_name = "kvpdfrbrownix2" # "your-key-vault-name"
# CosmosDB
cosmosdb_account_name = "cosmospdfbrownix2" # "your-cosmosdb-account-name"
# Form Recognizer -> Document Intelligence
-form_recognizer_name = "docintelligt01ix2" # "your-document-intelligence-name"
-
-# AI Vision Service
-ai_vision_name = "aivisionpdfrbrownix2" # "your-ai-vision-name"
-ai_vision_sku = "S0"
-ai_vision_tags = {
- Environment = "Development"
- Project = "PDF Processing"
- Service = "AI Vision"
-}
+form_recognizer_name = "docintelligt01ix2" # "your-document-intelligence-name"
diff --git a/terraform-infrastructure/variables.tf b/terraform-infrastructure/variables.tf
index 33a04dd..a9ea20a 100644
--- a/terraform-infrastructure/variables.tf
+++ b/terraform-infrastructure/variables.tf
@@ -48,26 +48,6 @@ variable "key_vault_name" {
description = "The name of the Key Vault"
type = string
}
-
-variable "ai_vision_name" {
- description = "The name of the AI Vision Cognitive Services account"
- type = string
-}
-
-variable "ai_vision_sku" {
- description = "The SKU of the AI Vision Cognitive Services account"
- type = string
- default = "S0"
-}
-
-variable "ai_vision_tags" {
- description = "Tags to be applied to the AI Vision resource"
- type = map(string)
- default = {
- Environment = "Development"
- Service = "AI Vision"
- }
-}
variable "cosmosdb_account_name" {
description = "The name of the CosmosDB account."
type = string