Skip to content

Commit 25ce42d

Browse files
authored
Merge pull request #3 from MicrosoftCloudEssentials-LearningHub/functionApp-code
function app - for reuse
2 parents 23ff9ff + ab2d507 commit 25ce42d

File tree

9 files changed

+383
-0
lines changed

9 files changed

+383
-0
lines changed

src/.funcignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.git*
2+
.vscode
3+
__azurite_db*__.json
4+
__blobstorage__
5+
__queuestorage__
6+
local.settings.json
7+
test
8+
.venv

src/.gitignore

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
.hypothesis/
51+
.pytest_cache/
52+
53+
# Translations
54+
*.mo
55+
*.pot
56+
57+
# Django stuff:
58+
*.log
59+
local_settings.py
60+
db.sqlite3
61+
62+
# Flask stuff:
63+
instance/
64+
.webassets-cache
65+
66+
# Scrapy stuff:
67+
.scrapy
68+
69+
# Sphinx documentation
70+
docs/_build/
71+
72+
# PyBuilder
73+
target/
74+
75+
# Jupyter Notebook
76+
.ipynb_checkpoints
77+
78+
# IPython
79+
profile_default/
80+
ipython_config.py
81+
82+
# pyenv
83+
.python-version
84+
85+
# pipenv
86+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
88+
# having no cross-platform support, pipenv may install dependencies that don’t work, or not
89+
# install all needed dependencies.
90+
#Pipfile.lock
91+
92+
# celery beat schedule file
93+
celerybeat-schedule
94+
95+
# SageMath parsed files
96+
*.sage.py
97+
98+
# Environments
99+
.env
100+
.venv
101+
env/
102+
venv/
103+
ENV/
104+
env.bak/
105+
venv.bak/
106+
107+
# Spyder project settings
108+
.spyderproject
109+
.spyproject
110+
111+
# Rope project settings
112+
.ropeproject
113+
114+
# mkdocs documentation
115+
/site
116+
117+
# mypy
118+
.mypy_cache/
119+
.dmypy.json
120+
dmypy.json
121+
122+
# Pyre type checker
123+
.pyre/
124+
125+
# Azure Functions artifacts
126+
bin
127+
obj
128+
appsettings.json
129+
local.settings.json
130+
131+
# Azurite artifacts
132+
__blobstorage__
133+
__queuestorage__
134+
__azurite_db*__.json
135+
.python_packages

src/.vscode/extensions.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"recommendations": [
3+
"ms-azuretools.vscode-azurefunctions",
4+
"ms-python.python"
5+
]
6+
}

src/.vscode/launch.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Attach to Python Functions",
6+
"type": "debugpy",
7+
"request": "attach",
8+
"connect": {
9+
"host": "localhost",
10+
"port": 9091
11+
},
12+
"preLaunchTask": "func: host start"
13+
}
14+
]
15+
}

src/.vscode/settings.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"azureFunctions.deploySubpath": ".",
3+
"azureFunctions.scmDoBuildDuringDeployment": true,
4+
"azureFunctions.pythonVenv": ".venv",
5+
"azureFunctions.projectLanguage": "Python",
6+
"azureFunctions.projectRuntime": "~4",
7+
"debug.internalConsoleOptions": "neverOpen",
8+
"azureFunctions.projectLanguageModel": 2
9+
}

src/.vscode/tasks.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"version": "2.0.0",
3+
"tasks": [
4+
{
5+
"type": "func",
6+
"label": "func: host start",
7+
"command": "host start",
8+
"problemMatcher": "$func-python-watch",
9+
"isBackground": true,
10+
"dependsOn": "pip install (functions)"
11+
},
12+
{
13+
"label": "pip install (functions)",
14+
"type": "shell",
15+
"osx": {
16+
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
17+
},
18+
"windows": {
19+
"command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt"
20+
},
21+
"linux": {
22+
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
23+
},
24+
"problemMatcher": []
25+
}
26+
]
27+
}

src/function_app.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import logging
2+
import azure.functions as func
3+
from azure.ai.formrecognizer import DocumentAnalysisClient
4+
from azure.core.credentials import AzureKeyCredential
5+
from azure.cosmos import CosmosClient, PartitionKey, exceptions
6+
from azure.identity import DefaultAzureCredential
7+
import os
8+
import uuid
9+
10+
app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION)
11+
12+
## DEFINITIONS
13+
def initialize_form_recognizer_client():
14+
endpoint = os.getenv("FORM_RECOGNIZER_ENDPOINT")
15+
key = os.getenv("FORM_RECOGNIZER_KEY")
16+
if not isinstance(key, str):
17+
raise ValueError("FORM_RECOGNIZER_KEY must be a string")
18+
logging.info(f"Form Recognizer endpoint: {endpoint}")
19+
return DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))
20+
21+
def read_pdf_content(myblob):
22+
logging.info(f"Reading PDF content from blob: {myblob.name}")
23+
return myblob.read()
24+
25+
def analyze_pdf(form_recognizer_client, pdf_bytes):
26+
logging.info("Starting PDF analysis.")
27+
poller = form_recognizer_client.begin_analyze_document(
28+
model_id="prebuilt-invoice",
29+
document=pdf_bytes
30+
)
31+
logging.info("PDF analysis in progress.")
32+
return poller.result()
33+
34+
def extract_invoice_data(result):
35+
logging.info("Extracting invoice data from analysis result.")
36+
invoice_data = {
37+
"id": str(uuid.uuid4()),
38+
"customer_name": "",
39+
"customer_email": "",
40+
"customer_address": "",
41+
"company_name": "",
42+
"company_phone": "",
43+
"company_address": "",
44+
"rentals": []
45+
}
46+
47+
def serialize_field(field):
48+
if field:
49+
return str(field.value) # Convert to string
50+
return ""
51+
52+
for document in result.documents:
53+
fields = document.fields
54+
invoice_data["customer_name"] = serialize_field(fields.get("CustomerName"))
55+
invoice_data["customer_email"] = serialize_field(fields.get("CustomerEmail"))
56+
invoice_data["customer_address"] = serialize_field(fields.get("CustomerAddress"))
57+
invoice_data["company_name"] = serialize_field(fields.get("VendorName"))
58+
invoice_data["company_phone"] = serialize_field(fields.get("VendorPhoneNumber"))
59+
invoice_data["company_address"] = serialize_field(fields.get("VendorAddress"))
60+
61+
items = fields.get("Items").value if fields.get("Items") else []
62+
for item in items:
63+
item_value = item.value if item.value else {}
64+
rental = {
65+
"rental_date": serialize_field(item_value.get("Date")),
66+
"title": serialize_field(item_value.get("Description")),
67+
"description": serialize_field(item_value.get("Description")),
68+
"quantity": serialize_field(item_value.get("Quantity")),
69+
"total_price": serialize_field(item_value.get("TotalPrice"))
70+
}
71+
invoice_data["rentals"].append(rental)
72+
73+
logging.info(f"Successfully extracted invoice data: {invoice_data}")
74+
return invoice_data
75+
76+
def save_invoice_data_to_cosmos(invoice_data):
77+
try:
78+
endpoint = os.getenv("COSMOS_DB_ENDPOINT")
79+
key = os.getenv("COSMOS_DB_KEY")
80+
aad_credentials = DefaultAzureCredential()
81+
client = CosmosClient(endpoint, credential=aad_credentials, consistency_level='Session')
82+
logging.info("Successfully connected to Cosmos DB using AAD default credential")
83+
except Exception as e:
84+
logging.error(f"Error connecting to Cosmos DB: {e}")
85+
return
86+
87+
database_name = "ContosoDBDocIntellig"
88+
container_name = "Invoices"
89+
90+
91+
try: # Check if the database exists
92+
# If the database does not exist, create it
93+
database = client.create_database_if_not_exists(database_name)
94+
logging.info(f"Database '{database_name}' does not exist. Creating it.")
95+
except exceptions.CosmosResourceExistsError: # If error get name, keep going
96+
database = client.get_database_client(database_name)
97+
logging.info(f"Database '{database_name}' already exists.")
98+
99+
database.read()
100+
logging.info(f"Reading into '{database_name}' DB")
101+
102+
try: # Check if the container exists
103+
# If the container does not exist, create it
104+
container = database.create_container(
105+
id=container_name,
106+
partition_key=PartitionKey(path="/transactionId"),
107+
offer_throughput=400
108+
)
109+
logging.info(f"Container '{container_name}' does not exist. Creating it.")
110+
except exceptions.CosmosResourceExistsError:
111+
container = database.get_container_client(container_name)
112+
logging.info(f"Container '{container_name}' already exists.")
113+
except exceptions.CosmosHttpResponseError:
114+
raise
115+
116+
container.read()
117+
logging.info(f"Reading into '{container}' container")
118+
119+
try:
120+
response = container.upsert_item(invoice_data)
121+
logging.info(f"Saved processed invoice data to Cosmos DB: {response}")
122+
except Exception as e:
123+
logging.error(f"Error inserting item into Cosmos DB: {e}")
124+
125+
## MAIN
126+
@app.blob_trigger(arg_name="myblob", path="pdfinvoices/{name}",
127+
connection="invoicecontosostorage_STORAGE")
128+
def BlobTriggerContosoPDFInvoicesDocIntelligence(myblob: func.InputStream):
129+
logging.info(f"Python blob trigger function processed blob\n"
130+
f"Name: {myblob.name}\n"
131+
f"Blob Size: {myblob.length} bytes")
132+
133+
try:
134+
form_recognizer_client = initialize_form_recognizer_client()
135+
pdf_bytes = read_pdf_content(myblob)
136+
logging.info("Successfully read PDF content from blob.")
137+
except Exception as e:
138+
logging.error(f"Error reading PDF: {e}")
139+
return
140+
141+
try:
142+
result = analyze_pdf(form_recognizer_client, pdf_bytes)
143+
logging.info("Successfully analyzed PDF using Document Intelligence.")
144+
except Exception as e:
145+
logging.error(f"Error analyzing PDF: {e}")
146+
return
147+
148+
try:
149+
invoice_data = extract_invoice_data(result)
150+
logging.info(f"Extracted invoice data: {invoice_data}")
151+
except Exception as e:
152+
logging.error(f"Error extracting invoice data: {e}")
153+
return
154+
155+
try:
156+
save_invoice_data_to_cosmos(invoice_data)
157+
logging.info("Successfully saved invoice data to Cosmos DB.")
158+
except Exception as e:
159+
logging.error(f"Error saving invoice data to Cosmos DB: {e}")

src/host.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "2.0",
3+
"logging": {
4+
"applicationInsights": {
5+
"samplingSettings": {
6+
"isEnabled": true,
7+
"excludedTypes": "Request"
8+
}
9+
}
10+
},
11+
"extensionBundle": {
12+
"id": "Microsoft.Azure.Functions.ExtensionBundle",
13+
"version": "[4.*, 5.0.0)"
14+
}
15+
}

src/requirements.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# DO NOT include azure-functions-worker in this file
2+
# The Python Worker is managed by Azure Functions platform
3+
# Manually managing azure-functions-worker may cause unexpected issues
4+
5+
azure-functions
6+
azure-ai-formrecognizer
7+
azure-core
8+
azure-cosmos==4.3.0
9+
azure-identity==1.7.0

0 commit comments

Comments
 (0)