Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Demo: PDF Layout Extraction with Doc Intelligence <br/> Supporting Multiple Document Versions with Visual Selection Cues (full-code approach)
# Demo: PDF Layout Extraction with Doc Intelligence (full-code approach)

`Azure Storage + Document Intelligence + Function App + Cosmos DB`

Expand All @@ -8,16 +8,9 @@ Costa Rica
[![GitHub](https://img.shields.io/badge/--181717?logo=github&logoColor=ffffff)](https://github.com/)
[brown9804](https://github.com/brown9804)

Last updated: 2025-07-21
Last updated: 2025-07-16

-----------------------------

> This solution is designed to be flexible and robust, supporting multiple versions of PDF documents with varying layouts—including those that use visual selection cues such as gray fills, hand-drawn Xs, checkmarks, or circles. By building on the [PDFs-Layouts-Processing-Fapp-DocIntelligence](https://github.com/MicrosoftCloudEssentials-LearningHub/PDFs-Layouts-Processing-Fapp-DocIntelligence) repository, we ensure that:

- Table structure and text are extracted using Azure Document Intelligence (Layout model).
- Visual selection cues are detected using Azure AI Vision or image preprocessing.
- Visual indicators are mapped to structured data, returning only the selected values in a clean JSON format.
- The logic is abstracted to support multiple layout variations, so the system adapts easily to new document formats and selection styles.
----------

> [!IMPORTANT]
> This example is based on a `public network site and is intended for demonstration purposes only`. It showcases how several Azure resources can work together to achieve the desired result. Consider the section below about [Important Considerations for Production Environment](#important-considerations-for-production-environment). Please note that `these demos are intended as a guide and are based on my personal experiences. For official guidance, support, or more detailed information, please refer to Microsoft's official documentation or contact Microsoft directly`: [Microsoft Sales and Support](https://support.microsoft.com/contactus?ContactUsExperienceEntryPointAssetId=S.HP.SMC-HOME)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,105 +1,95 @@
<mxfile host="Electron" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/28.0.4 Chrome/138.0.7204.97 Electron/37.2.1 Safari/537.36" version="28.0.4">
<diagram name="Page-1" id="_ZzkEdzZPlF0T37kGrCl">
<mxGraphModel dx="732" dy="1532" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="SBEox3NDaokPfLYJbtWu-15" value="" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
<mxGeometry x="20" y="-90" width="920" height="710" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-2" value="Storage Account" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/storage/Storage_Accounts.svg;" parent="1" vertex="1">
<mxGeometry x="240" y="136" width="75" height="60" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.278;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-1" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-11" value="Upload" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-5" vertex="1" connectable="0">
<mxGeometry x="-0.2575" y="-1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-10" value="" style="group" parent="1" vertex="1" connectable="0">
<mxGeometry x="50" y="350" width="86" height="90" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-1" value="PDF Layouts" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/File.svg;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
<mxGeometry x="40" width="46" height="56.68" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-3" value="Employee" style="shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;html=1;outlineConnect=0;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
<mxGeometry y="30" width="30" height="60" as="geometry" />
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.126;entryY=0.408;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="qB0o09IW0mbKmVrXtbLM-1" edge="1">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="510" y="18" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-5" value="Call API&amp;nbsp;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-4" vertex="1" connectable="0">
<mxGeometry x="-0.2392" y="2" relative="1" as="geometry">
<mxPoint y="1" as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-12" value="Function App&lt;div&gt;&lt;br&gt;&lt;/div&gt;" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/compute/Function_Apps.svg;" parent="1" vertex="1">
<mxGeometry x="510" y="300" width="68" height="60" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-13" value="Resource Group" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/ResourceGroup.svg;" parent="1" vertex="1">
<mxGeometry x="20" y="-90" width="50" height="40" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-14" value="Subscription" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/Subscriptions.svg;" parent="1" vertex="1">
<mxGeometry x="890" y="-90" width="44" height="71" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1.004;entryY=0.433;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-17" value="Blob Trigger" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-16" vertex="1" connectable="0">
<mxGeometry x="0.029" y="1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-20" value="Cosmos DB" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/CosmosDB.svg;" parent="1" vertex="1">
<mxGeometry x="840" y="520" width="50" height="50" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.359;exitY=1.043;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" edge="1">
<mxGeometry relative="1" as="geometry">
<mxPoint x="730" y="390" as="sourcePoint" />
<mxPoint x="850" y="569" as="targetPoint" />
<Array as="points">
<mxPoint x="540" y="363" />
<mxPoint x="540" y="569" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-22" value="Store parsed information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-21" vertex="1" connectable="0">
<mxGeometry x="-0.0694" y="3" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-1" value="Document Intelligence" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/ai_machine_learning/Form_Recognizers.svg;" parent="1" vertex="1">
<mxGeometry x="680" y="90" width="63.2" height="68" as="geometry" />
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.989;entryY=0.598;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="_wiV1sLz3M6k8l1JJ68s-1" target="SBEox3NDaokPfLYJbtWu-12" edge="1">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="720" y="336" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-7" value="Analyze/Extract the information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-6" vertex="1" connectable="0">
<mxGeometry x="-0.3952" y="-1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="qB0o09IW0mbKmVrXtbLM-1" value="Azure &lt;br&gt;AI Vision&amp;nbsp;" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/ai_machine_learning/Computer_Vision.svg;" vertex="1" parent="1">
<mxGeometry x="550" y="-10" width="68" height="68" as="geometry" />
</mxCell>
<mxCell id="qB0o09IW0mbKmVrXtbLM-3" style="rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.609;entryDx=0;entryDy=0;entryPerimeter=0;edgeStyle=orthogonalEdgeStyle;elbow=vertical;shape=link;" edge="1" parent="1" source="_wiV1sLz3M6k8l1JJ68s-1" target="qB0o09IW0mbKmVrXtbLM-1">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="710" y="31" />
</Array>
</mxGeometry>
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>
<mxfile host="Electron" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/27.0.9 Chrome/134.0.6998.205 Electron/35.4.0 Safari/537.36" version="27.0.9">
<diagram name="Page-1" id="_ZzkEdzZPlF0T37kGrCl">
<mxGraphModel dx="1281" dy="1822" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="SBEox3NDaokPfLYJbtWu-15" value="" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
<mxGeometry x="20" width="920" height="620" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-2" value="Storage Account" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/storage/Storage_Accounts.svg;" parent="1" vertex="1">
<mxGeometry x="240" y="136" width="75" height="60" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.278;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-1" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-11" value="Upload" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-5" vertex="1" connectable="0">
<mxGeometry x="-0.2575" y="-1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-10" value="" style="group" parent="1" vertex="1" connectable="0">
<mxGeometry x="50" y="350" width="86" height="90" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-1" value="PDF Layouts" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/File.svg;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
<mxGeometry x="40" width="46" height="56.68" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-3" value="Employee" style="shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;html=1;outlineConnect=0;" parent="SBEox3NDaokPfLYJbtWu-10" vertex="1">
<mxGeometry y="30" width="30" height="60" as="geometry" />
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.017;entryY=0.605;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="_wiV1sLz3M6k8l1JJ68s-1" edge="1">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="540" y="131" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-5" value="Call API&amp;nbsp;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-4" vertex="1" connectable="0">
<mxGeometry x="-0.2392" y="2" relative="1" as="geometry">
<mxPoint y="1" as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-12" value="Function App&lt;div&gt;&lt;br&gt;&lt;/div&gt;" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/compute/Function_Apps.svg;" parent="1" vertex="1">
<mxGeometry x="510" y="300" width="68" height="60" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-13" value="Resource Group" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/ResourceGroup.svg;" parent="1" vertex="1">
<mxGeometry x="20" width="50" height="40" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-14" value="Subscription" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/general/Subscriptions.svg;" parent="1" vertex="1">
<mxGeometry x="890" y="-20" width="44" height="71" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1.004;entryY=0.433;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" target="SBEox3NDaokPfLYJbtWu-2" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-17" value="Blob Trigger" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-16" vertex="1" connectable="0">
<mxGeometry x="0.029" y="1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-20" value="Cosmos DB" style="image;sketch=0;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/mscae/CosmosDB.svg;" parent="1" vertex="1">
<mxGeometry x="840" y="520" width="50" height="50" as="geometry" />
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.359;exitY=1.043;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="SBEox3NDaokPfLYJbtWu-12" edge="1">
<mxGeometry relative="1" as="geometry">
<mxPoint x="730" y="390" as="sourcePoint" />
<mxPoint x="850" y="569" as="targetPoint" />
<Array as="points">
<mxPoint x="540" y="363" />
<mxPoint x="540" y="569" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="SBEox3NDaokPfLYJbtWu-22" value="Store parsed information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="SBEox3NDaokPfLYJbtWu-21" vertex="1" connectable="0">
<mxGeometry x="-0.0694" y="3" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-1" value="Document Intelligence" style="image;aspect=fixed;html=1;points=[];align=center;fontSize=12;image=img/lib/azure2/ai_machine_learning/Form_Recognizers.svg;" parent="1" vertex="1">
<mxGeometry x="680" y="90" width="63.2" height="68" as="geometry" />
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.989;entryY=0.598;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="_wiV1sLz3M6k8l1JJ68s-1" target="SBEox3NDaokPfLYJbtWu-12" edge="1">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="720" y="336" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="_wiV1sLz3M6k8l1JJ68s-7" value="Analyze/Extract the information" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="_wiV1sLz3M6k8l1JJ68s-6" vertex="1" connectable="0">
<mxGeometry x="-0.3952" y="-1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>
Loading