@@ -81,7 +81,7 @@ def _add_child_elements(
8181 for child in element .cluster .children :
8282 c_label = child .label
8383 c_bbox = child .bbox .to_bottom_left_origin (
84- doc .pages [element .page_no + 1 ].size .height
84+ doc .pages [element .page_no ].size .height
8585 )
8686 c_text = " " .join (
8787 [
@@ -92,7 +92,7 @@ def _add_child_elements(
9292 )
9393
9494 c_prov = ProvenanceItem (
95- page_no = element .page_no + 1 , charspan = (0 , len (c_text )), bbox = c_bbox
95+ page_no = element .page_no , charspan = (0 , len (c_text )), bbox = c_bbox
9696 )
9797 if c_label == DocItemLabel .LIST_ITEM :
9898 # TODO: Infer if this is a numbered or a bullet list item
@@ -142,7 +142,7 @@ def _readingorder_elements_to_docling_doc(
142142 out_doc : DoclingDocument = DoclingDocument (name = doc_name , origin = origin )
143143
144144 for page in conv_res .pages :
145- page_no = page .page_no + 1
145+ page_no = page .page_no
146146 size = page .size
147147
148148 assert size is not None , "Page size is not initialized."
@@ -174,7 +174,7 @@ def _readingorder_elements_to_docling_doc(
174174 if element .label == DocItemLabel .CODE :
175175 cap_text = element .text
176176 prov = ProvenanceItem (
177- page_no = element .page_no + 1 ,
177+ page_no = element .page_no ,
178178 charspan = (0 , len (cap_text )),
179179 bbox = element .cluster .bbox .to_bottom_left_origin (page_height ),
180180 )
@@ -230,7 +230,7 @@ def _readingorder_elements_to_docling_doc(
230230 )
231231
232232 prov = ProvenanceItem (
233- page_no = element .page_no + 1 ,
233+ page_no = element .page_no ,
234234 charspan = (0 , 0 ),
235235 bbox = element .cluster .bbox .to_bottom_left_origin (page_height ),
236236 )
@@ -286,7 +286,7 @@ def _readingorder_elements_to_docling_doc(
286286 elif isinstance (element , FigureElement ):
287287 cap_text = ""
288288 prov = ProvenanceItem (
289- page_no = element .page_no + 1 ,
289+ page_no = element .page_no ,
290290 charspan = (0 , len (cap_text )),
291291 bbox = element .cluster .bbox .to_bottom_left_origin (page_height ),
292292 )
@@ -330,7 +330,7 @@ def _add_caption_or_footnote(self, elem, out_doc, parent, page_height):
330330 assert isinstance (elem , TextElement )
331331 text = elem .text
332332 prov = ProvenanceItem (
333- page_no = elem .page_no + 1 ,
333+ page_no = elem .page_no ,
334334 charspan = (0 , len (text )),
335335 bbox = elem .cluster .bbox .to_bottom_left_origin (page_height ),
336336 )
@@ -343,7 +343,7 @@ def _handle_text_element(self, element, out_doc, current_list, page_height):
343343 cap_text = element .text
344344
345345 prov = ProvenanceItem (
346- page_no = element .page_no + 1 ,
346+ page_no = element .page_no ,
347347 charspan = (0 , len (cap_text )),
348348 bbox = element .cluster .bbox .to_bottom_left_origin (page_height ),
349349 )
@@ -391,7 +391,7 @@ def _merge_elements(self, element, merged_elem, new_item, page_height):
391391 "Labels of merged elements must match."
392392 )
393393 prov = ProvenanceItem (
394- page_no = merged_elem .page_no + 1 ,
394+ page_no = merged_elem .page_no ,
395395 charspan = (
396396 len (new_item .text ) + 1 ,
397397 len (new_item .text ) + 1 + len (merged_elem .text ),
0 commit comments