aws-solutions-library-samples
diff --git a/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 26 additions & 29 deletions b/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 26 additions & 29 deletions
@@ -5,9 +5,17 @@ SPDX-License-Identifier: MIT-0
 
 ## [Unreleased]
 
+## [0.3.12]
 
 ### Added
 
+- **Refactored Document Classification Service for Enhanced Boundary Detection**
+  - Consolidated `multimodalPageLevelClassification` and the experimental `multimodalPageBoundaryClassification` (from v0.3.11) into a single enhanced `multimodalPageLevelClassification` method
+  - Implemented BIO-like sequence segmentation with document boundary indicators: "start" (new document) and "continue" (same document)
+  - Automatically segments multi-document packets, even when they contain multiple documents of the same type
+  - **Benefits**: Simplified codebase with single multimodal classification method, improved handling of complex document packets, maintains backward compatibility
+  - **No Breaking Changes**: Existing configurations work unchanged, no configuration updates required
+
 - **Enhanced A2I Template and Workflow Management**
   - Enhanced A2I template with improved user interface and clearer instructions for reviewers
   - Added comprehensive instructions for reviewers in A2I template to guide the review process
 
@@ -1,3 +1,9 @@
+# SPDX-License-Identifier: MIT-0
+
+notes: Boundary-aware classification example for pattern-2
+
+
+
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: MIT-0
 
@@ -914,15 +920,26 @@ classes:
         evaluation_method: LLM
         attributeType: group
 classification:
+  classificationMethod: multimodalPageLevelClassification
   image:
     target_height: ''
     target_width: ''
+  model: us.amazon.nova-pro-v1:0
+  temperature: '0.0'
   top_p: '0.1'
   max_tokens: '4096'
   top_k: '5'
+  system_prompt: >-
+    You are a multimodal document classification expert that analyzes business documents using both visual layout and textual content. Your task is to classify single-page documents into predefined categories based on their structural patterns, visual features, and text content. Your output must be valid JSON according to the requested format.
+
+    <variables>
+    <document-ocr-data>: OCR-extracted text content from the document page that provides textual information for classification
+    <document-image>: Visual representation of the document page that provides layout, formatting, and visual structure information
+    <document-types>: List of valid document types with their descriptions that the document must be classified into
+    </variables>
   task_prompt: >-
     <task-description>
-    Analyze the provided document using both its visual layout and textual content to determine its document type. You must classify it into exactly one of the predefined categories.
+    Analyze the provided document using both its visual layout and textual content to determine its document type and whether this page begins a new document or continues the previous one.
     </task-description>
 
     <document-types>
@@ -934,24 +951,16 @@ classification:
     1. Examine the visual layout: headers, logos, formatting, structure, and visual organization
     2. Analyze the textual content: key phrases, terminology, purpose, and information type
     3. Identify distinctive features that match the document type descriptions
-    4. Consider both visual and textual evidence together to determine the best match
-    5. CRITICAL: Only use document types explicitly listed in the <document-types> section
+    4. Decide if this page starts a new document (output "start") or continues the previous document (output "continue")
+    5. Consider both visual and textual evidence together to determine the best match
+    6. CRITICAL: Only use document types explicitly listed in the <document-types> section
     </classification-instructions>
 
-    <reasoning-guidelines>
-    When determining the document type:
-    - First identify the document's primary purpose and function
-    - Note specific visual elements (letterhead, forms, tables, signatures)
-    - Identify key textual indicators (terminology, phrases, structure)
-    - Consider the document's intended audience and use case
-    - Provide specific evidence from both visual and textual analysis
-    </reasoning-guidelines>
-
     <output-format>
-    Return your classification as valid JSON following this exact structure:
     {
       "classification_reason": "Detailed reasoning including specific visual and textual evidence that led to this classification",
-      "class": "exact_document_type_from_list"
+      "class": "exact_document_type_from_list",
+      "document_boundary": "start or continue"
     }
     </output-format>
 
@@ -968,22 +977,10 @@ classification:
     <final-instructions>
     Analyze the document above by:
     1. Applying the <classification-instructions> to examine both visual and textual features
-    2. Following the <reasoning-guidelines> to build your classification rationale
-    3. Selecting ONLY from document types in <document-types>
-    4. Providing clear reasoning with specific evidence before the classification
-    5. Outputting in the exact JSON format specified in <output-format>
+    2. Selecting ONLY from document types in <document-types>
+    3. Providing clear reasoning with specific evidence
+    4. Outputting in the exact JSON format specified in <output-format>
     </final-instructions>
-  temperature: '0.0'
-  model: us.amazon.nova-pro-v1:0
-  system_prompt: >-
-    You are a multimodal document classification expert that analyzes business documents using both visual layout and textual content. Your task is to classify single-page documents into predefined categories based on their structural patterns, visual features, and text content. Your output must be valid JSON according to the requested format.
-
-    <variables>
-    DOCUMENT_TEXT: OCR-extracted text content from the document page that provides textual information for classification
-    DOCUMENT_IMAGE: Visual representation of the document page that provides layout, formatting, and visual structure information
-    CLASS_NAMES_AND_DESCRIPTIONS: List of valid document types with their descriptions that the document must be classified into
-    </variables>
-  classificationMethod: multimodalPageLevelClassification
 extraction:
   image:
     target_width: ''