Skip to content

Commit 51cfed3

Browse files
authored
original state
1 parent 2a2b52f commit 51cfed3

File tree

5 files changed

+19
-121
lines changed

5 files changed

+19
-121
lines changed

src/function_app.py

Lines changed: 7 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,6 @@
88
import uuid
99
import json
1010

11-
# For image conversion and vision API
12-
from typing import List
13-
from io import BytesIO
14-
import requests # For REST API to Vision
15-
from pdf2image import convert_from_bytes # For PDF to image conversion
16-
1711
app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION)
1812

1913
## DEFINITIONS
@@ -41,14 +35,13 @@ def analyze_pdf(form_recognizer_client, pdf_bytes):
4135
logging.info(f"Document has {len(result.pages)} page(s), {len(result.tables)} table(s), and {len(result.styles)} style(s).")
4236
return result
4337

44-
def extract_layout_data(result, visual_cues: List[dict] = None):
38+
def extract_layout_data(result):
4539
logging.info("Extracting layout data from analysis result.")
4640

4741
layout_data = {
4842
"id": str(uuid.uuid4()),
4943
"pages": []
5044
}
51-
visual_cues = visual_cues or [] # List of dicts with visual cue info per cell
5245

5346
# Log styles
5447
for idx, style in enumerate(result.styles):
@@ -95,16 +88,12 @@ def extract_layout_data(result, visual_cues: List[dict] = None):
9588

9689
for cell in table.cells:
9790
content = cell.content.strip()
98-
# Find matching visual cue for this cell (if any)
99-
cue = next((vc for vc in visual_cues if vc.get("page_number") == page.page_number and vc.get("row_index") == cell.row_index and vc.get("column_index") == cell.column_index), None)
100-
cell_info = {
91+
table_data["cells"].append({
10192
"row_index": cell.row_index,
10293
"column_index": cell.column_index,
103-
"content": content,
104-
"visual_cue": cue["cue_type"] if cue else None
105-
}
106-
table_data["cells"].append(cell_info)
107-
logging.info(f"Cell[{cell.row_index}][{cell.column_index}]: '{content}', visual_cue: {cell_info['visual_cue']}")
94+
"content": content
95+
})
96+
logging.info(f"Cell[{cell.row_index}][{cell.column_index}]: '{content}'")
10897

10998
page_data["tables"].append(table_data)
11099

@@ -167,31 +156,6 @@ def save_layout_data_to_cosmos(layout_data):
167156
## MAIN
168157
@app.blob_trigger(arg_name="myblob", path="pdfinvoices/{name}",
169158
connection="invoicecontosostorage_STORAGE")
170-
def call_vision_api(image_bytes, subscription_key, endpoint):
171-
vision_url = endpoint + "/vision/v3.2/analyze"
172-
headers = {
173-
'Ocp-Apim-Subscription-Key': subscription_key,
174-
'Content-Type': 'application/octet-stream'
175-
}
176-
params = {
177-
'visualFeatures': 'Objects,Color', # Add more features if needed
178-
}
179-
response = requests.post(vision_url, headers=headers, params=params, data=image_bytes)
180-
response.raise_for_status()
181-
return response.json()
182-
183-
def extract_visual_cues_from_vision(vision_result, page_number):
184-
# Example: Detect gray fills, checkmarks, hand-drawn marks
185-
cues = []
186-
# This is a placeholder. You need to parse vision_result for your cues.
187-
# For example, if vision_result['objects'] contains a 'checkmark' or color info for gray fill
188-
# cues.append({"page_number": page_number, "row_index": ..., "column_index": ..., "cue_type": "gray_fill"})
189-
return cues
190-
191-
def convert_pdf_to_images(pdf_bytes):
192-
images = convert_from_bytes(pdf_bytes)
193-
return images
194-
195159
def BlobTriggerContosoPDFLayoutsDocIntelligence(myblob: func.InputStream):
196160
logging.info(f"Python blob trigger function processed blob\n"
197161
f"Name: {myblob.name}\n"
@@ -212,26 +176,9 @@ def BlobTriggerContosoPDFLayoutsDocIntelligence(myblob: func.InputStream):
212176
logging.error(f"Error analyzing PDF: {e}")
213177
return
214178

215-
# --- Step: Convert PDF to image and call Azure AI Vision ---
216-
visual_cues = []
217-
try:
218-
images = convert_pdf_to_images(pdf_bytes)
219-
vision_key = os.getenv("VISION_API_KEY")
220-
vision_endpoint = os.getenv("VISION_API_ENDPOINT")
221-
for page_num, image in enumerate(images, start=1):
222-
img_bytes_io = BytesIO()
223-
image.save(img_bytes_io, format='JPEG')
224-
img_bytes = img_bytes_io.getvalue()
225-
vision_result = call_vision_api(img_bytes, vision_key, vision_endpoint)
226-
cues = extract_visual_cues_from_vision(vision_result, page_num)
227-
visual_cues.extend(cues)
228-
logging.info(f"Visual cues extracted: {visual_cues}")
229-
except Exception as e:
230-
logging.error(f"Error processing visual cues with AI Vision: {e}")
231-
232179
try:
233-
layout_data = extract_layout_data(result, visual_cues)
234-
logging.info("Successfully extracted and merged layout data.")
180+
layout_data = extract_layout_data(result)
181+
logging.info("Successfully extracted layout data.")
235182
except Exception as e:
236183
logging.error(f"Error extracting layout data: {e}")
237184
return

terraform-infrastructure/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Costa Rica
55
[![GitHub](https://img.shields.io/badge/--181717?logo=github&logoColor=ffffff)](https://github.com/)
66
[brown9804](https://github.com/brown9804)
77

8-
Last updated: 2025-07-21
8+
Last updated: 2025-07-16
99

1010
----------
1111

@@ -109,7 +109,7 @@ graph TD;
109109

110110
<!-- START BADGE -->
111111
<div align="center">
112-
<img src="https://img.shields.io/badge/Total%20views-180-limegreen" alt="Total views">
113-
<p>Refresh Date: 2025-07-21</p>
112+
<img src="https://img.shields.io/badge/Total%20views-55-limegreen" alt="Total views">
113+
<p>Refresh Date: 2025-07-16</p>
114114
</div>
115115
<!-- END BADGE -->

terraform-infrastructure/main.tf

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ resource "azurerm_role_assignment" "contributor" {
289289
]
290290
}
291291

292-
293292
# Azure Form Recognizer (Document Intelligence)
294293
resource "azurerm_cognitive_account" "form_recognizer" {
295294
name = var.form_recognizer_name
@@ -300,27 +299,12 @@ resource "azurerm_cognitive_account" "form_recognizer" {
300299

301300
depends_on = [azurerm_resource_group.rg]
302301

302+
# Output the Form Recognizer name
303303
provisioner "local-exec" {
304304
command = "echo Form Recognizer: ${self.name}"
305305
}
306306
}
307307

308-
# Azure AI Vision (Cognitive Services)
309-
resource "azurerm_cognitive_account" "ai_vision" {
310-
name = var.ai_vision_name
311-
location = azurerm_resource_group.rg.location
312-
resource_group_name = azurerm_resource_group.rg.name
313-
kind = "CognitiveServices"
314-
sku_name = var.ai_vision_sku
315-
tags = var.ai_vision_tags
316-
317-
depends_on = [azurerm_resource_group.rg]
318-
319-
provisioner "local-exec" {
320-
command = "echo AI Vision: ${self.name}"
321-
}
322-
}
323-
324308
# We need to assign custom or built-in Cosmos DB SQL roles
325309
# (like Cosmos DB Built-in Data Reader, etc.) at the data plane level,
326310
# which is not currently supported directly in Terraform as of now.
@@ -389,10 +373,6 @@ resource "azurerm_linux_function_app" "function_app" {
389373

390374
"APPINSIGHTS_INSTRUMENTATIONKEY" = azurerm_application_insights.appinsights.instrumentation_key
391375
"APPLICATIONINSIGHTS_CONNECTION_STRING" = azurerm_application_insights.appinsights.connection_string
392-
393-
# Azure AI Vision settings
394-
"VISION_API_ENDPOINT" = azurerm_cognitive_account.ai_vision.endpoint
395-
"VISION_API_KEY" = azurerm_cognitive_account.ai_vision.primary_access_key
396376
}
397377

398378
depends_on = [
Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,21 @@
11
# Sample values
2-
subscription_id = "407f4106-0fd3-42e0-9348-3686dd1e7347" # "your-subscription_id"
3-
resource_group_name = "RG-PDFLayout-Processing-DocIntelligence" # "your-resource-group-name"
4-
location = "West US" # "your-location"
2+
subscription_id = "" # "your-subscription_id"
3+
resource_group_name = "RG-PDFLayout-Processing-DocIntelligence" # "your-resource-group-name"
4+
location = "West US" # "your-location"
55
# Storage Account
6-
storage_account_name = "storageaccbrownpdfix2" # "your-storage-account-name"
6+
storage_account_name = "storageaccbrownpdfix2" # "your-storage-account-name"
77
storage_account_name_runtime = "runtimestorebrownix2" # "your-runtime-storage-account-name"
88
# Function App
9-
function_app_name = "fapdfbrownix2" # "your-function-app-name"
9+
function_app_name = "fapdfbrownix2" # "your-function-app-name"
1010
# App Service Plan
1111
app_service_plan_name = "asppdfbrownix2" # "your-app-service-plan-name"
1212
# Application Insights
13-
app_insights_name = "apppdfbrownix2" # "your-app-insights-name"
13+
app_insights_name = "apppdfbrownix2" # "your-app-insights-name"
1414
# Log Analytics Workspace
1515
log_analytics_workspace_name = "logwspdfbrownix2" # "your-log-analytics-workspace-name"
1616
# Key Vault
17-
key_vault_name = "kvpdfrbrownrix2" # "your-key-vault-name"
17+
key_vault_name = "kvpdfrbrownix2" # "your-key-vault-name"
1818
# CosmosDB
1919
cosmosdb_account_name = "cosmospdfbrownix2" # "your-cosmosdb-account-name"
2020
# Form Recognizer -> Document Intelligence
21-
form_recognizer_name = "docintelligt01ix2" # "your-document-intelligence-name"
22-
23-
# AI Vision Service
24-
ai_vision_name = "aivisionpdfrbrownix2" # "your-ai-vision-name"
25-
ai_vision_sku = "S0"
26-
ai_vision_tags = {
27-
Environment = "Development"
28-
Project = "PDF Processing"
29-
Service = "AI Vision"
30-
}
21+
form_recognizer_name = "docintelligt01ix2" # "your-document-intelligence-name"

terraform-infrastructure/variables.tf

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,6 @@ variable "key_vault_name" {
4848
description = "The name of the Key Vault"
4949
type = string
5050
}
51-
52-
variable "ai_vision_name" {
53-
description = "The name of the AI Vision Cognitive Services account"
54-
type = string
55-
}
56-
57-
variable "ai_vision_sku" {
58-
description = "The SKU of the AI Vision Cognitive Services account"
59-
type = string
60-
default = "S0"
61-
}
62-
63-
variable "ai_vision_tags" {
64-
description = "Tags to be applied to the AI Vision resource"
65-
type = map(string)
66-
default = {
67-
Environment = "Development"
68-
Service = "AI Vision"
69-
}
70-
}
7151
variable "cosmosdb_account_name" {
7252
description = "The name of the CosmosDB account."
7353
type = string

0 commit comments

Comments
 (0)