@@ -1819,6 +1819,18 @@ def _append_item(self, *, item: NodeItem, parent_ref: RefItem) -> RefItem:
18191819 item .parent = parent_ref
18201820
18211821 self .form_items .append (item )
1822+
1823+ elif isinstance (item , (UnorderedList , OrderedList , InlineGroup )):
1824+ item_label = "groups"
1825+ item_index = len (self .groups )
1826+
1827+ cref = f"#/{ item_label } /{ item_index } "
1828+
1829+ item .self_ref = cref
1830+ item .parent = parent_ref
1831+
1832+ self .groups .append (item )
1833+
18221834 else :
18231835 raise ValueError (f"Item { item } is not supported for insertion" )
18241836
@@ -2154,8 +2166,8 @@ def add_list_item(
21542166 :param parent: Optional[NodeItem]: (Default value = None)
21552167
21562168 """
2157- if not parent :
2158- parent = self . body
2169+ if not isinstance ( parent , ( OrderedList , UnorderedList )) :
2170+ raise ValueError ( "ListItem's parent must be a list group" )
21592171
21602172 if not orig :
21612173 orig = text
@@ -4197,3 +4209,58 @@ def validate_document(cls, d: "DoclingDocument"):
41974209 raise ValueError ("Document hierachy is inconsistent." )
41984210
41994211 return d
4212+
4213+ @model_validator (mode = "after" )
4214+ def validate_misplaced_list_items (self ):
4215+ """validate_misplaced_list_items."""
4216+ # find list items without list parent, putting succesive ones together
4217+ misplaced_list_items : list [list [ListItem ]] = []
4218+ prev : Optional [NodeItem ] = None
4219+ for item , _ in self .iterate_items (
4220+ traverse_pictures = True ,
4221+ included_content_layers = {c for c in ContentLayer },
4222+ with_groups = True , # so that we can distinguish neighboring lists
4223+ ):
4224+ if isinstance (item , ListItem ) and (
4225+ item .parent is None
4226+ or not isinstance (
4227+ item .parent .resolve (doc = self ), (OrderedList , UnorderedList )
4228+ )
4229+ ):
4230+ # non_group_list_items.append(item)
4231+ if prev is None or not isinstance (prev , ListItem ): # if new list
4232+ misplaced_list_items .append ([item ])
4233+ else :
4234+ misplaced_list_items [- 1 ].append (item )
4235+ prev = item
4236+
4237+ for curr_list_items in reversed (misplaced_list_items ):
4238+
4239+ # add group
4240+ new_group = (
4241+ OrderedList (self_ref = "#" )
4242+ if curr_list_items [0 ].enumerated
4243+ else UnorderedList (self_ref = "#" )
4244+ )
4245+ self .insert_item_before_sibling (
4246+ new_item = new_group ,
4247+ sibling = curr_list_items [0 ],
4248+ )
4249+
4250+ # delete list items from document (should not be affected by group addition)
4251+ self .delete_items (node_items = curr_list_items )
4252+
4253+ # add list items to new group
4254+ for li in curr_list_items :
4255+ self .add_list_item (
4256+ text = li .text ,
4257+ enumerated = li .enumerated ,
4258+ marker = li .marker ,
4259+ orig = li .orig ,
4260+ prov = li .prov [0 ] if li .prov else None ,
4261+ parent = new_group ,
4262+ content_layer = li .content_layer ,
4263+ formatting = li .formatting ,
4264+ hyperlink = li .hyperlink ,
4265+ )
4266+ return self
0 commit comments