File tree Expand file tree Collapse file tree 1 file changed +6
-4
lines changed
lib/idp_common_pkg/idp_common/classification Expand file tree Collapse file tree 1 file changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -147,7 +147,9 @@ def __init__(
147147 f"Invalid classification method '{ self .classification_method } ', falling back to '{ self .MULTIMODAL_PAGE_LEVEL } '"
148148 )
149149 self .classification_method = self .MULTIMODAL_PAGE_LEVEL
150- logger .info ("Using multimodal page-level classification method with document boundary detection" )
150+ logger .info (
151+ "Using multimodal page-level classification method with document boundary detection"
152+ )
151153
152154 def _load_document_types (self ) -> List [DocumentType ]:
153155 """Load document types from configuration."""
@@ -1566,15 +1568,15 @@ def _group_consecutive_pages(
15661568 ) -> List [DocumentSection ]:
15671569 """
15681570 Group consecutive pages into sections using sequence segmentation.
1569-
1571+
15701572 This method implements the BIO-like tagging approach by examining both:
15711573 1. Document type (classification)
15721574 2. Document boundary indicator ("start" or "continue")
1573-
1575+
15741576 A new section is created when:
15751577 - The document type changes from one page to the next
15761578 - A page has boundary="start", indicating a new document begins
1577-
1579+
15781580 This enables accurate segmentation of multi-document packets where multiple
15791581 documents of the same type may appear consecutively.
15801582
You can’t perform that action at this time.
0 commit comments