aws-solutions-library-samples
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitlab-ci.yml‎
Lines changed: 2 additions & 0 deletions b/‎.gitlab-ci.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 46 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 29 additions & 6 deletions b/‎Makefile‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config_library/pattern-2/bank-statement-sample/config.yaml‎
Lines changed: 67 additions & 50 deletions b/‎config_library/pattern-2/bank-statement-sample/config.yaml‎
Lines changed: 67 additions & 50 deletions
@@ -4,6 +4,8 @@ build.toml
 model.tar.gz
 .checksum
 .checksums/
+.build_checksum
+.lib_checksum
 .vscode/
 .DS_Store
 dist/
@@ -20,3 +22,4 @@ rvl_cdip_*
 notebooks/examples/data
 .idea/
 .dsr/
+*tmp-dev-assets*
@@ -28,6 +28,8 @@ developer_tests:
     - apt-get update -y
     - apt-get install make -y
     - pip install ruff
+    # Install dependencies needed by publish.py for test imports
+    - pip install typer rich boto3
     # Install test dependencies
     - cd lib/idp_common_pkg && pip install -e ".[test]" && cd ../..
 
 
@@ -5,8 +5,54 @@ SPDX-License-Identifier: MIT-0
 
 ## [Unreleased]
 
+## [0.3.13]
+
 ### Added
 
+- **External MCP Agent Integration for Custom Tool Extension**
+  - Added External MCP (Model Context Protocol) Agent support that enables integration with custom MCP servers to extend IDP capabilities
+  - **Cross-Account Integration**: Host MCP servers in separate AWS accounts or external infrastructure with secure OAuth authentication using AWS Cognito
+  - **Dynamic Tool Discovery**: Automatically discovers and integrates available tools from MCP servers through the IDP web interface
+  - **Secure Authentication Flow**: Uses AWS Cognito User Pools for OAuth bearer token authentication with proper token validation
+  - **Configuration Management**: JSON array configuration in AWS Secrets Manager supporting multiple MCP server connections with optional custom agent names and descriptions
+  - **Real-time Integration**: Tools become immediately available through the IDP web interface after configuration
+
+- **AWS GovCloud Support with Automated Template Generation**
+  - Added GovCloud compatibility through `scripts/generate_govcloud_template.py` script
+  - **ARN Partition Compatibility**: All templates updated to use `arn:${AWS::Partition}:` for both commercial and GovCloud regions
+  - **Headless Operation**: Automatically removes UI-related resources (CloudFront, AppSync, Cognito, WAF) for GovCloud deployment
+  - **Core Functionality Preserved**: All 3 processing patterns and complete 6-step pipeline (OCR, Classification, Extraction, Assessment, Summarization, Evaluation) remain fully functional
+  - **Automated Workflow**: Single script orchestrates build + GovCloud template generation + S3 upload with deployment URLs
+  - **Enterprise Ready**: Enables headless document processing for government and enterprise environments requiring GovCloud compliance
+  - **Documentation**: New `docs/govcloud-deployment.md` with deployment guide, architecture differences, and access methods
+
+- **Pattern-2 and Pattern-3 Assessment now generate geometry (bounding boxes) for visualization in UI 'Visual Editor' (parity with Pattern-1)**
+  - Added comprehensive spatial localization capabilities to both regular and granular assessment services
+  - **Automatic Processing**: When LLM provides bbox coordinates, automatically converts to UI-compatible (Visual Edit) geometry format without any configuration
+  - **Universal Support**: Works with all attribute types - simple attributes, nested group attributes (e.g., CompanyAddress.State), and list attributes
+  - **Enhanced Prompts**: Updated assessment task prompts with spatial-localization-guidelines requesting bbox coordinates in normalized 0-1000 scale
+  - **Demo Notebooks**: Assessment notebooks now showcase automatic bounding box processing
+
+- **New Python-Based Publishing System**
+  - Replaced `publish.sh` bash script with new `publish.py` Python script
+  - Rich console interface with progress bars, spinners, and colored output using Rich library
+  - Multi-threaded artifact building and uploading for significantly improved performance
+  - Native support for Linux, macOS, and Windows environments
+
+- **Windows Development Environment Setup Guide and Helper Script**
+  - New `scripts/dev_setup.bat` (570 lines) for complete Windows development environment configuration
+
+- **OCR Service Default Image Sizing for Resource Optimization**
+  - Implemented automatic default image size limits (951×1268) when no image sizing configuration is provided
+  - **Key Benefits**: Reduction in vision model token consumption, prevents OutOfMemory errors during concurrent processing, improves processing speed and reduces bandwidth usage
+
+### Changed
+
+- **Reverted to python3.12 runtime to resolve build package dependency problems**
+
+### Fixed
+- **Improved Visual Edit bounding box position when using image zoom or pan**
+
 
 
 ## [0.3.12]
 
@@ -14,7 +14,7 @@ test:
 	$(MAKE) -C lib/idp_common_pkg test
 
 # Run both linting and formatting in one command
-lint: ruff-lint format
+lint: ruff-lint format check-arn-partitions
 
 # Run linting checks and fix issues automatically
 ruff-lint:
@@ -29,16 +29,39 @@ format:
 lint-cicd:
 	@echo "Running code quality checks..."
 	@if ! ruff check; then \
-		echo "$(RED)ERROR: Ruff linting failed!$(NC)"; \
-		echo "$(YELLOW)Please run 'make ruff-lint' locally to fix these issues.$(NC)"; \
+		echo -e "$(RED)ERROR: Ruff linting failed!$(NC)"; \
+		echo -e "$(YELLOW)Please run 'make ruff-lint' locally to fix these issues.$(NC)"; \
 		exit 1; \
 	fi
 	@if ! ruff format --check; then \
-		echo "$(RED)ERROR: Code formatting check failed!$(NC)"; \
-		echo "$(YELLOW)Please run 'make format' locally to fix these issues.$(NC)"; \
+		echo -e "$(RED)ERROR: Code formatting check failed!$(NC)"; \
+		echo -e "$(YELLOW)Please run 'make format' locally to fix these issues.$(NC)"; \
+		exit 1; \
+	fi
+	@echo -e "$(GREEN)All code quality checks passed!$(NC)"
+
+# Check CloudFormation templates for hardcoded AWS partition ARNs
+check-arn-partitions:
+	@echo "Checking CloudFormation templates for hardcoded ARN partitions..."
+	@FOUND_ISSUES=0; \
+	for template in template.yaml patterns/*/template.yaml patterns/*/sagemaker_classifier_endpoint.yaml options/*/template.yaml; do \
+		if [ -f "$$template" ]; then \
+			echo "Checking $$template..."; \
+			MATCHES=$$(grep -n "arn:aws:" "$$template" | grep -v "arn:\$${AWS::Partition}:" || true); \
+			if [ -n "$$MATCHES" ]; then \
+				echo -e "$(RED)ERROR: Found hardcoded 'arn:aws:' references in $$template:$(NC)"; \
+				echo "$$MATCHES" | sed 's/^/  /'; \
+				echo -e "$(YELLOW)  These should use 'arn:\$${AWS::Partition}:' instead for GovCloud compatibility$(NC)"; \
+				FOUND_ISSUES=1; \
+			fi; \
+		fi; \
+	done; \
+	if [ $$FOUND_ISSUES -eq 0 ]; then \
+		echo -e "$(GREEN)✅ No hardcoded ARN partition references found!$(NC)"; \
+	else \
+		echo -e "$(RED)❌ Found hardcoded ARN partition references that need to be fixed$(NC)"; \
 		exit 1; \
 	fi
-	@echo "$(GREEN)All code quality checks passed!$(NC)"
 
 # A convenience Makefile target that runs 
 commit: lint test
 
@@ -124,6 +124,7 @@ For detailed deployment and testing instructions, see the [Deployment Guide](./d
 - [Deployment](./docs/deployment.md) - Build, publish, deploy, and test instructions
 - [Web UI](./docs/web-ui.md) - Web interface features and usage
 - [Agent Analysis](./docs/agent-analysis.md) - Natural language analytics and data visualization feature
+- [Custom MCP Agent](./docs/custom-MCP-agent.md) - Integrating external MCP servers for custom tools and capabilities
 - [Configuration](./docs/configuration.md) - Configuration and customization options
 - [Classification](./docs/classification.md) - Customizing document classification
 - [Extraction](./docs/extraction.md) - Customizing information extraction
 
@@ -1 +1 @@
-0.3.12
+0.3.13
@@ -1,7 +1,7 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: MIT-0
 
-notes: Default settings
+notes: Default settings for bank statement sample configuration
 ocr:
   backend: "textract"  # Default to Textract for backward compatibility
   model_id: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
@@ -368,6 +368,7 @@ summarization:
   model: us.anthropic.claude-3-7-sonnet-20250219-v1:0
   system_prompt: >-
     You are a document summarization expert who can analyze and summarize documents from various domains including medical, financial, legal, and general business documents. Your task is to create a summary that captures the key information, main points, and important details from the document. Your output must be in valid JSON format. \nSummarization Style: Balanced\\nCreate a balanced summary that provides a moderate level of detail. Include the main points and key supporting information, while maintaining the document's overall structure. Aim for a comprehensive yet concise summary.\n Your output MUST be in valid JSON format with markdown content. You MUST strictly adhere to the output format specified in the instructions.
+
 assessment:
   enabled: true
   image:
@@ -383,130 +384,146 @@ assessment:
   max_tokens: '10000'
   top_k: '5'
   temperature: '0.0'
-  model: us.amazon.nova-pro-v1:0
+  model: us.amazon.nova-lite-v1:0
   system_prompt: >-
-    You are a document analysis assessment expert. Your task is to evaluate the confidence of extraction results by analyzing the source document evidence. Respond only with JSON containing confidence scores for each extracted attribute.
+    You are a document analysis assessment expert. Your role is to evaluate the confidence and accuracy of data extraction results by analyzing them against source documents.
+    
+    Provide accurate confidence scores for each assessment.
+    When bounding boxes are requested, provide precise coordinate locations where information appears in the document.
   task_prompt: >-
     <background>
-
-    You are an expert document analysis assessment system. Your task is to evaluate the confidence of extraction results for a document of class {DOCUMENT_CLASS}.
-
+    You are an expert document analysis assessment system. Your task is to evaluate the confidence of extraction results for a document of class {DOCUMENT_CLASS} and provide precise spatial localization for each field.
     </background>
 
-
     <task>
-
-    Analyze the extraction results against the source document and provide confidence assessments for each extracted attribute. Consider factors such as:
-
-    1. Text clarity and OCR quality in the source regions
-    2. Alignment between extracted values and document content
-    3. Presence of clear evidence supporting the extraction
-    4. Potential ambiguity or uncertainty in the source material
+    Analyze the extraction results against the source document and provide confidence assessments AND bounding box coordinates for each extracted attribute. Consider factors such as:
+    1. Text clarity and OCR quality in the source regions 
+    2. Alignment between extracted values and document content 
+    3. Presence of clear evidence supporting the extraction 
+    4. Potential ambiguity or uncertainty in the source material 
     5. Completeness and accuracy of the extracted information
-
+    6. Precise spatial location of each field in the document
     </task>
 
-
     <assessment-guidelines>
-
-    For each attribute, provide:
-    A confidence score between 0.0 and 1.0 where:
+    For each attribute, provide: 
+    - A confidence score between 0.0 and 1.0 where:
        - 1.0 = Very high confidence, clear and unambiguous evidence
        - 0.8-0.9 = High confidence, strong evidence with minor uncertainty
        - 0.6-0.7 = Medium confidence, reasonable evidence but some ambiguity
        - 0.4-0.5 = Low confidence, weak or unclear evidence
        - 0.0-0.3 = Very low confidence, little to no supporting evidence
-
-    Guidelines:
-    - Base assessments on actual document content and OCR quality
-    - Consider both text-based evidence and visual/layout clues
-    - Account for OCR confidence scores when provided
-    - Be objective and specific in reasoning
+    - A clear explanation of the confidence reasoning
+    - Precise spatial coordinates where the field appears in the document
+
+    Guidelines: 
+    - Base assessments on actual document content and OCR quality 
+    - Consider both text-based evidence and visual/layout clues 
+    - Account for OCR confidence scores when provided 
+    - Be objective and specific in reasoning 
     - If an extraction appears incorrect, score accordingly with explanation
-
+    - Provide tight, accurate bounding boxes around the actual text
     </assessment-guidelines>
 
-    <final-instructions>
+    <spatial-localization-guidelines>
+    For each field, provide bounding box coordinates:
+    - bbox: [x1, y1, x2, y2] coordinates in normalized 0-1000 scale
+    - page: Page number where the field appears (starting from 1)
+    
+    Coordinate system:
+    - Use normalized scale 0-1000 for both x and y axes
+    - x1, y1 = top-left corner of bounding box  
+    - x2, y2 = bottom-right corner of bounding box
+    - Ensure x2 > x1 and y2 > y1
+    - Make bounding boxes tight around the actual text content
+    - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    </spatial-localization-guidelines>
 
-    Analyze the extraction results against the source document and provide confidence assessments. Return a JSON object with the following structure based on the attribute type:
+    <final-instructions>
+    Analyze the extraction results against the source document and provide confidence assessments with spatial localization. Return a JSON object with the following structure based on the attribute type:
 
-    For SIMPLE attributes:
+    For SIMPLE attributes: 
     {
       "simple_attribute_name": {
         "confidence": 0.85,
+        "bbox": [100, 200, 300, 250],
+        "page": 1
       }
     }
 
-    For GROUP attributes (nested object structure):
+    For GROUP attributes (nested object structure): 
     {
       "group_attribute_name": {
         "sub_attribute_1": {
           "confidence": 0.90,
+          "bbox": [150, 300, 250, 320],
+          "page": 1
         },
         "sub_attribute_2": {
           "confidence": 0.75,
+          "bbox": [150, 325, 280, 345],
+          "page": 1
         }
       }
     }
 
-    For LIST attributes (array of assessed items):
+    For LIST attributes (array of assessed items): 
     {
       "list_attribute_name": [
         {
           "item_attribute_1": {
             "confidence": 0.95,
+            "bbox": [100, 400, 200, 420],
+            "page": 1
           },
           "item_attribute_2": {
             "confidence": 0.88,
+            "bbox": [250, 400, 350, 420],
+            "page": 1
           }
         },
         {
           "item_attribute_1": {
             "confidence": 0.92,
+            "bbox": [100, 425, 200, 445],
+            "page": 1
           },
           "item_attribute_2": {
             "confidence": 0.70,
+            "bbox": [250, 425, 350, 445],
+            "page": 1
           }
         }
       ]
     }
 
-    IMPORTANT: 
-    - For LIST attributes like "Transactions", assess EACH individual item in the list separately
-    - Each transaction should be assessed as a separate object in the array
-    - Do NOT provide aggregate assessments for list items - assess each one individually
-    - Include assessments for ALL attributes present in the extraction results
+    IMPORTANT:  
+    - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
+    - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
+    - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
+    - Include assessments AND bounding boxes for ALL attributes present in the extraction results
     - Match the exact structure of the extracted data
-
+    - Provide page numbers for all bounding boxes (starting from 1)
     </final-instructions>
 
-    <attributes-definitions>
-
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-
-    </attributes-definitions>
-
     <<CACHEPOINT>>
 
     <document-image>
-
     {DOCUMENT_IMAGE}
-
     </document-image>
 
-
     <ocr-text-confidence-results>
-
     {OCR_TEXT_CONFIDENCE}
-
     </ocr-text-confidence-results>
 
     <<CACHEPOINT>>
 
-    <extraction-results>
+    <attributes-definitions>
+    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
+    </attributes-definitions>
 
+    <extraction-results>
     {EXTRACTION_RESULTS}
-
     </extraction-results>
 
 evaluation: