88import uuid
99import json
1010
11- # For image conversion and vision API
12- from typing import List
13- from io import BytesIO
14- import requests # For REST API to Vision
15- from pdf2image import convert_from_bytes # For PDF to image conversion
16-
1711app = func .FunctionApp (http_auth_level = func .AuthLevel .FUNCTION )
1812
1913## DEFINITIONS
@@ -41,14 +35,13 @@ def analyze_pdf(form_recognizer_client, pdf_bytes):
4135 logging .info (f"Document has { len (result .pages )} page(s), { len (result .tables )} table(s), and { len (result .styles )} style(s)." )
4236 return result
4337
44- def extract_layout_data (result , visual_cues : List [ dict ] = None ):
38+ def extract_layout_data (result ):
4539 logging .info ("Extracting layout data from analysis result." )
4640
4741 layout_data = {
4842 "id" : str (uuid .uuid4 ()),
4943 "pages" : []
5044 }
51- visual_cues = visual_cues or [] # List of dicts with visual cue info per cell
5245
5346 # Log styles
5447 for idx , style in enumerate (result .styles ):
@@ -95,16 +88,12 @@ def extract_layout_data(result, visual_cues: List[dict] = None):
9588
9689 for cell in table .cells :
9790 content = cell .content .strip ()
98- # Find matching visual cue for this cell (if any)
99- cue = next ((vc for vc in visual_cues if vc .get ("page_number" ) == page .page_number and vc .get ("row_index" ) == cell .row_index and vc .get ("column_index" ) == cell .column_index ), None )
100- cell_info = {
91+ table_data ["cells" ].append ({
10192 "row_index" : cell .row_index ,
10293 "column_index" : cell .column_index ,
103- "content" : content ,
104- "visual_cue" : cue ["cue_type" ] if cue else None
105- }
106- table_data ["cells" ].append (cell_info )
107- logging .info (f"Cell[{ cell .row_index } ][{ cell .column_index } ]: '{ content } ', visual_cue: { cell_info ['visual_cue' ]} " )
94+ "content" : content
95+ })
96+ logging .info (f"Cell[{ cell .row_index } ][{ cell .column_index } ]: '{ content } '" )
10897
10998 page_data ["tables" ].append (table_data )
11099
@@ -167,31 +156,6 @@ def save_layout_data_to_cosmos(layout_data):
167156## MAIN
168157@app .blob_trigger (arg_name = "myblob" , path = "pdfinvoices/{name}" ,
169158 connection = "invoicecontosostorage_STORAGE" )
170- def call_vision_api (image_bytes , subscription_key , endpoint ):
171- vision_url = endpoint + "/vision/v3.2/analyze"
172- headers = {
173- 'Ocp-Apim-Subscription-Key' : subscription_key ,
174- 'Content-Type' : 'application/octet-stream'
175- }
176- params = {
177- 'visualFeatures' : 'Objects,Color' , # Add more features if needed
178- }
179- response = requests .post (vision_url , headers = headers , params = params , data = image_bytes )
180- response .raise_for_status ()
181- return response .json ()
182-
183- def extract_visual_cues_from_vision (vision_result , page_number ):
184- # Example: Detect gray fills, checkmarks, hand-drawn marks
185- cues = []
186- # This is a placeholder. You need to parse vision_result for your cues.
187- # For example, if vision_result['objects'] contains a 'checkmark' or color info for gray fill
188- # cues.append({"page_number": page_number, "row_index": ..., "column_index": ..., "cue_type": "gray_fill"})
189- return cues
190-
191- def convert_pdf_to_images (pdf_bytes ):
192- images = convert_from_bytes (pdf_bytes )
193- return images
194-
195159def BlobTriggerContosoPDFLayoutsDocIntelligence (myblob : func .InputStream ):
196160 logging .info (f"Python blob trigger function processed blob\n "
197161 f"Name: { myblob .name } \n "
@@ -212,26 +176,9 @@ def BlobTriggerContosoPDFLayoutsDocIntelligence(myblob: func.InputStream):
212176 logging .error (f"Error analyzing PDF: { e } " )
213177 return
214178
215- # --- Step: Convert PDF to image and call Azure AI Vision ---
216- visual_cues = []
217- try :
218- images = convert_pdf_to_images (pdf_bytes )
219- vision_key = os .getenv ("VISION_API_KEY" )
220- vision_endpoint = os .getenv ("VISION_API_ENDPOINT" )
221- for page_num , image in enumerate (images , start = 1 ):
222- img_bytes_io = BytesIO ()
223- image .save (img_bytes_io , format = 'JPEG' )
224- img_bytes = img_bytes_io .getvalue ()
225- vision_result = call_vision_api (img_bytes , vision_key , vision_endpoint )
226- cues = extract_visual_cues_from_vision (vision_result , page_num )
227- visual_cues .extend (cues )
228- logging .info (f"Visual cues extracted: { visual_cues } " )
229- except Exception as e :
230- logging .error (f"Error processing visual cues with AI Vision: { e } " )
231-
232179 try :
233- layout_data = extract_layout_data (result , visual_cues )
234- logging .info ("Successfully extracted and merged layout data." )
180+ layout_data = extract_layout_data (result )
181+ logging .info ("Successfully extracted layout data." )
235182 except Exception as e :
236183 logging .error (f"Error extracting layout data: { e } " )
237184 return
0 commit comments