Skip to content

Commit c481596

Browse files
authored
Merge pull request #6 from MicrosoftCloudEssentials-LearningHub/returning-state
Returning state
2 parents 9831cd3 + ccc4dd7 commit c481596

File tree

6 files changed

+114
-233
lines changed

6 files changed

+114
-233
lines changed

README.md

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Demo: PDF Layout Extraction with Doc Intelligence <br/> Supporting Multiple Document Versions with Visual Selection Cues (full-code approach)
1+
# Demo: PDF Layout Extraction with Doc Intelligence (full-code approach)
22

33
`Azure Storage + Document Intelligence + Function App + Cosmos DB`
44

@@ -8,16 +8,9 @@ Costa Rica
88
[![GitHub](https://img.shields.io/badge/--181717?logo=github&logoColor=ffffff)](https://github.com/)
99
[brown9804](https://github.com/brown9804)
1010

11-
Last updated: 2025-07-21
11+
Last updated: 2025-07-16
1212

13-
-----------------------------
14-
15-
> This solution is designed to be flexible and robust, supporting multiple versions of PDF documents with varying layouts—including those that use visual selection cues such as gray fills, hand-drawn Xs, checkmarks, or circles. By building on the [PDFs-Layouts-Processing-Fapp-DocIntelligence](https://github.com/MicrosoftCloudEssentials-LearningHub/PDFs-Layouts-Processing-Fapp-DocIntelligence) repository, we ensure that:
16-
17-
- Table structure and text are extracted using Azure Document Intelligence (Layout model).
18-
- Visual selection cues are detected using Azure AI Vision or image preprocessing.
19-
- Visual indicators are mapped to structured data, returning only the selected values in a clean JSON format.
20-
- The logic is abstracted to support multiple layout variations, so the system adapts easily to new document formats and selection styles.
13+
----------
2114

2215
> [!IMPORTANT]
2316
> This example is based on a `public network site and is intended for demonstration purposes only`. It showcases how several Azure resources can work together to achieve the desired result. Consider the section below about [Important Considerations for Production Environment](#important-considerations-for-production-environment). Please note that `these demos are intended as a guide and are based on my personal experiences. For official guidance, support, or more detailed information, please refer to Microsoft's official documentation or contact Microsoft directly`: [Microsoft Sales and Support](https://support.microsoft.com/contactus?ContactUsExperienceEntryPointAssetId=S.HP.SMC-HOME)
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,95 @@
1-
<mxfile host="Electron" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/28.0.4 Chrome/138.0.7204.97 Electron/37.2.1 Safari/537.36" version="28.0.4">
2-
<diagram name="Page-1" id="_ZzkEdzZPlF0T37kGrCl">
3-
<mxGraphModel dx="732" dy="1532" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
4-
<root>
5-
<mxCell id="0" />
6-
<mxCell id="1" parent="0" />
7-
<mxCell id="SBEox3NDaokPfLYJbtWu-15" value="" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
8-
<mxGeometry x="20" y="-90" width="920" height="710" as="geometry" />
9-
</mxCell>
10-
<mxCell id="SBEox3NDaokPfLYJbtWu-2" value="Storage Account" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/storage/Storage_Accounts.svg;" parent="1" vertex="1">
11-
<mxGeometry x="240" y="136" width="75" height="60" as="geometry" />
12-
</mxCell>
13-
<mxCell id="SBEox3NDaokPfLYJbtWu-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.278;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-1" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
14-
<mxGeometry relative="1" as="geometry" />
15-
</mxCell>
16-
<mxCell id="SBEox3NDaokPfLYJbtWu-11" value="Upload" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-5" vertex="1" connectable="0">
17-
<mxGeometry x="-0.2575" y="-1" relative="1" as="geometry">
18-
<mxPoint as="offset" />
19-
</mxGeometry>
20-
</mxCell>
21-
<mxCell id="SBEox3NDaokPfLYJbtWu-10" value="" style="group" parent="1" vertex="1" connectable="0">
22-
<mxGeometry x="50" y="350" width="86" height="90" as="geometry" />
23-
</mxCell>
24-
<mxCell id="SBEox3NDaokPfLYJbtWu-1" value="PDF Layouts" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/File.svg;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
25-
<mxGeometry x="40" width="46" height="56.68" as="geometry" />
26-
</mxCell>
27-
<mxCell id="SBEox3NDaokPfLYJbtWu-3" value="Employee" style="shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;html=1;outlineConnect=0;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
28-
<mxGeometry y="30" width="30" height="60" as="geometry" />
29-
</mxCell>
30-
<mxCell id="_wiV1sLz3M6k8l1JJ68s-4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.126;entryY=0.408;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="qB0o09IW0mbKmVrXtbLM-1" edge="1">
31-
<mxGeometry relative="1" as="geometry">
32-
<Array as="points">
33-
<mxPoint x="510" y="18" />
34-
</Array>
35-
</mxGeometry>
36-
</mxCell>
37-
<mxCell id="_wiV1sLz3M6k8l1JJ68s-5" value="Call API&amp;nbsp;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-4" vertex="1" connectable="0">
38-
<mxGeometry x="-0.2392" y="2" relative="1" as="geometry">
39-
<mxPoint y="1" as="offset" />
40-
</mxGeometry>
41-
</mxCell>
42-
<mxCell id="SBEox3NDaokPfLYJbtWu-12" value="Function App&lt;div&gt;&lt;br&gt;&lt;/div&gt;" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/compute/Function_Apps.svg;" parent="1" vertex="1">
43-
<mxGeometry x="510" y="300" width="68" height="60" as="geometry" />
44-
</mxCell>
45-
<mxCell id="SBEox3NDaokPfLYJbtWu-13" value="Resource Group" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/ResourceGroup.svg;" parent="1" vertex="1">
46-
<mxGeometry x="20" y="-90" width="50" height="40" as="geometry" />
47-
</mxCell>
48-
<mxCell id="SBEox3NDaokPfLYJbtWu-14" value="Subscription" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/Subscriptions.svg;" parent="1" vertex="1">
49-
<mxGeometry x="890" y="-90" width="44" height="71" as="geometry" />
50-
</mxCell>
51-
<mxCell id="SBEox3NDaokPfLYJbtWu-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1.004;entryY=0.433;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
52-
<mxGeometry relative="1" as="geometry" />
53-
</mxCell>
54-
<mxCell id="SBEox3NDaokPfLYJbtWu-17" value="Blob Trigger" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-16" vertex="1" connectable="0">
55-
<mxGeometry x="0.029" y="1" relative="1" as="geometry">
56-
<mxPoint as="offset" />
57-
</mxGeometry>
58-
</mxCell>
59-
<mxCell id="SBEox3NDaokPfLYJbtWu-20" value="Cosmos DB" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/CosmosDB.svg;" parent="1" vertex="1">
60-
<mxGeometry x="840" y="520" width="50" height="50" as="geometry" />
61-
</mxCell>
62-
<mxCell id="SBEox3NDaokPfLYJbtWu-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.359;exitY=1.043;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" edge="1">
63-
<mxGeometry relative="1" as="geometry">
64-
<mxPoint x="730" y="390" as="sourcePoint" />
65-
<mxPoint x="850" y="569" as="targetPoint" />
66-
<Array as="points">
67-
<mxPoint x="540" y="363" />
68-
<mxPoint x="540" y="569" />
69-
</Array>
70-
</mxGeometry>
71-
</mxCell>
72-
<mxCell id="SBEox3NDaokPfLYJbtWu-22" value="Store parsed information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-21" vertex="1" connectable="0">
73-
<mxGeometry x="-0.0694" y="3" relative="1" as="geometry">
74-
<mxPoint as="offset" />
75-
</mxGeometry>
76-
</mxCell>
77-
<mxCell id="_wiV1sLz3M6k8l1JJ68s-1" value="Document Intelligence" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/ai_machine_learning/Form_Recognizers.svg;" parent="1" vertex="1">
78-
<mxGeometry x="680" y="90" width="63.2" height="68" as="geometry" />
79-
</mxCell>
80-
<mxCell id="_wiV1sLz3M6k8l1JJ68s-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.989;entryY=0.598;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="_wiV1sLz3M6k8l1JJ68s-1" target="SBEox3NDaokPfLYJbtWu-12" edge="1">
81-
<mxGeometry relative="1" as="geometry">
82-
<Array as="points">
83-
<mxPoint x="720" y="336" />
84-
</Array>
85-
</mxGeometry>
86-
</mxCell>
87-
<mxCell id="_wiV1sLz3M6k8l1JJ68s-7" value="Analyze/Extract the information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-6" vertex="1" connectable="0">
88-
<mxGeometry x="-0.3952" y="-1" relative="1" as="geometry">
89-
<mxPoint as="offset" />
90-
</mxGeometry>
91-
</mxCell>
92-
<mxCell id="qB0o09IW0mbKmVrXtbLM-1" value="Azure &lt;br&gt;AI Vision&amp;nbsp;" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/ai_machine_learning/Computer_Vision.svg;" vertex="1" parent="1">
93-
<mxGeometry x="550" y="-10" width="68" height="68" as="geometry" />
94-
</mxCell>
95-
<mxCell id="qB0o09IW0mbKmVrXtbLM-3" style="rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.609;entryDx=0;entryDy=0;entryPerimeter=0;edgeStyle=orthogonalEdgeStyle;elbow=vertical;shape=link;" edge="1" parent="1" source="_wiV1sLz3M6k8l1JJ68s-1" target="qB0o09IW0mbKmVrXtbLM-1">
96-
<mxGeometry relative="1" as="geometry">
97-
<Array as="points">
98-
<mxPoint x="710" y="31" />
99-
</Array>
100-
</mxGeometry>
101-
</mxCell>
102-
</root>
103-
</mxGraphModel>
104-
</diagram>
105-
</mxfile>
1+
<mxfile host="Electron" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/27.0.9 Chrome/134.0.6998.205 Electron/35.4.0 Safari/537.36" version="27.0.9">
2+
<diagram name="Page-1" id="_ZzkEdzZPlF0T37kGrCl">
3+
<mxGraphModel dx="1281" dy="1822" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
4+
<root>
5+
<mxCell id="0" />
6+
<mxCell id="1" parent="0" />
7+
<mxCell id="SBEox3NDaokPfLYJbtWu-15" value="" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
8+
<mxGeometry x="20" width="920" height="620" as="geometry" />
9+
</mxCell>
10+
<mxCell id="SBEox3NDaokPfLYJbtWu-2" value="Storage Account" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/storage/Storage_Accounts.svg;" parent="1" vertex="1">
11+
<mxGeometry x="240" y="136" width="75" height="60" as="geometry" />
12+
</mxCell>
13+
<mxCell id="SBEox3NDaokPfLYJbtWu-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.278;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-1" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
14+
<mxGeometry relative="1" as="geometry" />
15+
</mxCell>
16+
<mxCell id="SBEox3NDaokPfLYJbtWu-11" value="Upload" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-5" vertex="1" connectable="0">
17+
<mxGeometry x="-0.2575" y="-1" relative="1" as="geometry">
18+
<mxPoint as="offset" />
19+
</mxGeometry>
20+
</mxCell>
21+
<mxCell id="SBEox3NDaokPfLYJbtWu-10" value="" style="group" parent="1" vertex="1" connectable="0">
22+
<mxGeometry x="50" y="350" width="86" height="90" as="geometry" />
23+
</mxCell>
24+
<mxCell id="SBEox3NDaokPfLYJbtWu-1" value="PDF Layouts" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/File.svg;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
25+
<mxGeometry x="40" width="46" height="56.68" as="geometry" />
26+
</mxCell>
27+
<mxCell id="SBEox3NDaokPfLYJbtWu-3" value="Employee" style="shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;html=1;outlineConnect=0;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
28+
<mxGeometry y="30" width="30" height="60" as="geometry" />
29+
</mxCell>
30+
<mxCell id="_wiV1sLz3M6k8l1JJ68s-4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.017;entryY=0.605;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="_wiV1sLz3M6k8l1JJ68s-1" edge="1">
31+
<mxGeometry relative="1" as="geometry">
32+
<Array as="points">
33+
<mxPoint x="540" y="131" />
34+
</Array>
35+
</mxGeometry>
36+
</mxCell>
37+
<mxCell id="_wiV1sLz3M6k8l1JJ68s-5" value="Call API&amp;nbsp;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-4" vertex="1" connectable="0">
38+
<mxGeometry x="-0.2392" y="2" relative="1" as="geometry">
39+
<mxPoint y="1" as="offset" />
40+
</mxGeometry>
41+
</mxCell>
42+
<mxCell id="SBEox3NDaokPfLYJbtWu-12" value="Function App&lt;div&gt;&lt;br&gt;&lt;/div&gt;" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/compute/Function_Apps.svg;" parent="1" vertex="1">
43+
<mxGeometry x="510" y="300" width="68" height="60" as="geometry" />
44+
</mxCell>
45+
<mxCell id="SBEox3NDaokPfLYJbtWu-13" value="Resource Group" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/ResourceGroup.svg;" parent="1" vertex="1">
46+
<mxGeometry x="20" width="50" height="40" as="geometry" />
47+
</mxCell>
48+
<mxCell id="SBEox3NDaokPfLYJbtWu-14" value="Subscription" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/Subscriptions.svg;" parent="1" vertex="1">
49+
<mxGeometry x="890" y="-20" width="44" height="71" as="geometry" />
50+
</mxCell>
51+
<mxCell id="SBEox3NDaokPfLYJbtWu-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1.004;entryY=0.433;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
52+
<mxGeometry relative="1" as="geometry" />
53+
</mxCell>
54+
<mxCell id="SBEox3NDaokPfLYJbtWu-17" value="Blob Trigger" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-16" vertex="1" connectable="0">
55+
<mxGeometry x="0.029" y="1" relative="1" as="geometry">
56+
<mxPoint as="offset" />
57+
</mxGeometry>
58+
</mxCell>
59+
<mxCell id="SBEox3NDaokPfLYJbtWu-20" value="Cosmos DB" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/CosmosDB.svg;" parent="1" vertex="1">
60+
<mxGeometry x="840" y="520" width="50" height="50" as="geometry" />
61+
</mxCell>
62+
<mxCell id="SBEox3NDaokPfLYJbtWu-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.359;exitY=1.043;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" edge="1">
63+
<mxGeometry relative="1" as="geometry">
64+
<mxPoint x="730" y="390" as="sourcePoint" />
65+
<mxPoint x="850" y="569" as="targetPoint" />
66+
<Array as="points">
67+
<mxPoint x="540" y="363" />
68+
<mxPoint x="540" y="569" />
69+
</Array>
70+
</mxGeometry>
71+
</mxCell>
72+
<mxCell id="SBEox3NDaokPfLYJbtWu-22" value="Store parsed information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-21" vertex="1" connectable="0">
73+
<mxGeometry x="-0.0694" y="3" relative="1" as="geometry">
74+
<mxPoint as="offset" />
75+
</mxGeometry>
76+
</mxCell>
77+
<mxCell id="_wiV1sLz3M6k8l1JJ68s-1" value="Document Intelligence" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/ai_machine_learning/Form_Recognizers.svg;" parent="1" vertex="1">
78+
<mxGeometry x="680" y="90" width="63.2" height="68" as="geometry" />
79+
</mxCell>
80+
<mxCell id="_wiV1sLz3M6k8l1JJ68s-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.989;entryY=0.598;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="_wiV1sLz3M6k8l1JJ68s-1" target="SBEox3NDaokPfLYJbtWu-12" edge="1">
81+
<mxGeometry relative="1" as="geometry">
82+
<Array as="points">
83+
<mxPoint x="720" y="336" />
84+
</Array>
85+
</mxGeometry>
86+
</mxCell>
87+
<mxCell id="_wiV1sLz3M6k8l1JJ68s-7" value="Analyze/Extract the information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-6" vertex="1" connectable="0">
88+
<mxGeometry x="-0.3952" y="-1" relative="1" as="geometry">
89+
<mxPoint as="offset" />
90+
</mxGeometry>
91+
</mxCell>
92+
</root>
93+
</mxGraphModel>
94+
</diagram>
95+
</mxfile>

0 commit comments

Comments
 (0)