@@ -69,7 +69,9 @@ def _iterate_items(
6969 node : Optional [NodeItem ] = None ,
7070 traverse_pictures : bool = False ,
7171 add_page_breaks : bool = False ,
72+ visited : Optional [set [str ]] = None ,
7273):
74+ my_visited : set [str ] = visited if visited is not None else set ()
7375 prev_page_nr : Optional [int ] = None
7476 page_break_i = 0
7577 for item , _ in doc .iterate_items (
@@ -78,10 +80,33 @@ def _iterate_items(
7880 included_content_layers = layers ,
7981 traverse_pictures = traverse_pictures ,
8082 ):
81- if isinstance (item , DocItem ):
82- if item .prov :
83+ if add_page_breaks :
84+ if (
85+ isinstance (item , (UnorderedList , OrderedList , InlineGroup ))
86+ and item .self_ref not in my_visited
87+ ):
88+ # if group starts with new page, yield page break before group node
89+ my_visited .add (item .self_ref )
90+ for it in _iterate_items (
91+ doc = doc ,
92+ layers = layers ,
93+ node = item ,
94+ traverse_pictures = traverse_pictures ,
95+ add_page_breaks = add_page_breaks ,
96+ visited = my_visited ,
97+ ):
98+ if isinstance (it , DocItem ) and it .prov :
99+ page_no = it .prov [0 ].page_no
100+ if prev_page_nr is not None and page_no > prev_page_nr :
101+ yield _PageBreakNode (
102+ self_ref = f"#/pb/{ page_break_i } " ,
103+ prev_page = prev_page_nr ,
104+ next_page = page_no ,
105+ )
106+ break
107+ elif isinstance (item , DocItem ) and item .prov :
83108 page_no = item .prov [0 ].page_no
84- if add_page_breaks and ( prev_page_nr is None or page_no > prev_page_nr ) :
109+ if prev_page_nr is None or page_no > prev_page_nr :
85110 if prev_page_nr is not None : # close previous range
86111 yield _PageBreakNode (
87112 self_ref = f"#/pb/{ page_break_i } " ,
0 commit comments