1+ import logging
2+ import azure .functions as func
3+ from azure .ai .formrecognizer import DocumentAnalysisClient
4+ from azure .core .credentials import AzureKeyCredential
5+ from azure .cosmos import CosmosClient , PartitionKey , exceptions
6+ from azure .identity import DefaultAzureCredential
7+ import os
8+ import uuid
9+
10+ app = func .FunctionApp (http_auth_level = func .AuthLevel .FUNCTION )
11+
12+ ## DEFINITIONS
13+ def initialize_form_recognizer_client ():
14+ endpoint = os .getenv ("FORM_RECOGNIZER_ENDPOINT" )
15+ key = os .getenv ("FORM_RECOGNIZER_KEY" )
16+ if not isinstance (key , str ):
17+ raise ValueError ("FORM_RECOGNIZER_KEY must be a string" )
18+ logging .info (f"Form Recognizer endpoint: { endpoint } " )
19+ return DocumentAnalysisClient (endpoint = endpoint , credential = AzureKeyCredential (key ))
20+
21+ def read_pdf_content (myblob ):
22+ logging .info (f"Reading PDF content from blob: { myblob .name } " )
23+ return myblob .read ()
24+
25+ def analyze_pdf (form_recognizer_client , pdf_bytes ):
26+ logging .info ("Starting PDF analysis." )
27+ poller = form_recognizer_client .begin_analyze_document (
28+ model_id = "prebuilt-invoice" ,
29+ document = pdf_bytes
30+ )
31+ logging .info ("PDF analysis in progress." )
32+ return poller .result ()
33+
34+ def extract_invoice_data (result ):
35+ logging .info ("Extracting invoice data from analysis result." )
36+ invoice_data = {
37+ "id" : str (uuid .uuid4 ()),
38+ "customer_name" : "" ,
39+ "customer_email" : "" ,
40+ "customer_address" : "" ,
41+ "company_name" : "" ,
42+ "company_phone" : "" ,
43+ "company_address" : "" ,
44+ "rentals" : []
45+ }
46+
47+ def serialize_field (field ):
48+ if field :
49+ return str (field .value ) # Convert to string
50+ return ""
51+
52+ for document in result .documents :
53+ fields = document .fields
54+ invoice_data ["customer_name" ] = serialize_field (fields .get ("CustomerName" ))
55+ invoice_data ["customer_email" ] = serialize_field (fields .get ("CustomerEmail" ))
56+ invoice_data ["customer_address" ] = serialize_field (fields .get ("CustomerAddress" ))
57+ invoice_data ["company_name" ] = serialize_field (fields .get ("VendorName" ))
58+ invoice_data ["company_phone" ] = serialize_field (fields .get ("VendorPhoneNumber" ))
59+ invoice_data ["company_address" ] = serialize_field (fields .get ("VendorAddress" ))
60+
61+ items = fields .get ("Items" ).value if fields .get ("Items" ) else []
62+ for item in items :
63+ item_value = item .value if item .value else {}
64+ rental = {
65+ "rental_date" : serialize_field (item_value .get ("Date" )),
66+ "title" : serialize_field (item_value .get ("Description" )),
67+ "description" : serialize_field (item_value .get ("Description" )),
68+ "quantity" : serialize_field (item_value .get ("Quantity" )),
69+ "total_price" : serialize_field (item_value .get ("TotalPrice" ))
70+ }
71+ invoice_data ["rentals" ].append (rental )
72+
73+ logging .info (f"Successfully extracted invoice data: { invoice_data } " )
74+ return invoice_data
75+
76+ def save_invoice_data_to_cosmos (invoice_data ):
77+ try :
78+ endpoint = os .getenv ("COSMOS_DB_ENDPOINT" )
79+ key = os .getenv ("COSMOS_DB_KEY" )
80+ aad_credentials = DefaultAzureCredential ()
81+ client = CosmosClient (endpoint , credential = aad_credentials , consistency_level = 'Session' )
82+ logging .info ("Successfully connected to Cosmos DB using AAD default credential" )
83+ except Exception as e :
84+ logging .error (f"Error connecting to Cosmos DB: { e } " )
85+ return
86+
87+ database_name = "ContosoDBDocIntellig"
88+ container_name = "Invoices"
89+
90+
91+ try : # Check if the database exists
92+ # If the database does not exist, create it
93+ database = client .create_database_if_not_exists (database_name )
94+ logging .info (f"Database '{ database_name } ' does not exist. Creating it." )
95+ except exceptions .CosmosResourceExistsError : # If error get name, keep going
96+ database = client .get_database_client (database_name )
97+ logging .info (f"Database '{ database_name } ' already exists." )
98+
99+ database .read ()
100+ logging .info (f"Reading into '{ database_name } ' DB" )
101+
102+ try : # Check if the container exists
103+ # If the container does not exist, create it
104+ container = database .create_container (
105+ id = container_name ,
106+ partition_key = PartitionKey (path = "/transactionId" ),
107+ offer_throughput = 400
108+ )
109+ logging .info (f"Container '{ container_name } ' does not exist. Creating it." )
110+ except exceptions .CosmosResourceExistsError :
111+ container = database .get_container_client (container_name )
112+ logging .info (f"Container '{ container_name } ' already exists." )
113+ except exceptions .CosmosHttpResponseError :
114+ raise
115+
116+ container .read ()
117+ logging .info (f"Reading into '{ container } ' container" )
118+
119+ try :
120+ response = container .upsert_item (invoice_data )
121+ logging .info (f"Saved processed invoice data to Cosmos DB: { response } " )
122+ except Exception as e :
123+ logging .error (f"Error inserting item into Cosmos DB: { e } " )
124+
125+ ## MAIN
126+ @app .blob_trigger (arg_name = "myblob" , path = "pdfinvoices/{name}" ,
127+ connection = "invoicecontosostorage_STORAGE" )
128+ def BlobTriggerContosoPDFInvoicesDocIntelligence (myblob : func .InputStream ):
129+ logging .info (f"Python blob trigger function processed blob\n "
130+ f"Name: { myblob .name } \n "
131+ f"Blob Size: { myblob .length } bytes" )
132+
133+ try :
134+ form_recognizer_client = initialize_form_recognizer_client ()
135+ pdf_bytes = read_pdf_content (myblob )
136+ logging .info ("Successfully read PDF content from blob." )
137+ except Exception as e :
138+ logging .error (f"Error reading PDF: { e } " )
139+ return
140+
141+ try :
142+ result = analyze_pdf (form_recognizer_client , pdf_bytes )
143+ logging .info ("Successfully analyzed PDF using Document Intelligence." )
144+ except Exception as e :
145+ logging .error (f"Error analyzing PDF: { e } " )
146+ return
147+
148+ try :
149+ invoice_data = extract_invoice_data (result )
150+ logging .info (f"Extracted invoice data: { invoice_data } " )
151+ except Exception as e :
152+ logging .error (f"Error extracting invoice data: { e } " )
153+ return
154+
155+ try :
156+ save_invoice_data_to_cosmos (invoice_data )
157+ logging .info ("Successfully saved invoice data to Cosmos DB." )
158+ except Exception as e :
159+ logging .error (f"Error saving invoice data to Cosmos DB: { e } " )
0 commit comments