File tree Expand file tree Collapse file tree 1 file changed +7
-1
lines changed Expand file tree Collapse file tree 1 file changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -3606,7 +3606,9 @@ def parse_key_value_item(
36063606 rf"{ DocumentToken .UNORDERED_LIST .value } |"
36073607 rf"{ DocItemLabel .KEY_VALUE_REGION } |"
36083608 rf"{ DocumentToken .CHART .value } |"
3609- rf"{ DocumentToken .OTSL .value } )>.*?</(?P=tag)>"
3609+ rf"{ DocumentToken .OTSL .value } )>"
3610+ rf"(?P<content>.*?)"
3611+ rf"(?:(?P<closed></(?P=tag)>)|(?P<eof>$))"
36103612 )
36113613 pattern = re .compile (tag_pattern , re .DOTALL )
36123614
@@ -3616,6 +3618,10 @@ def parse_key_value_item(
36163618 tag_name = match .group ("tag" )
36173619
36183620 bbox = extract_bounding_box (full_chunk ) # Extracts first bbox
3621+ if not match .group ("closed" ):
3622+ # no closing tag; only the existence of the item is recovered
3623+ full_chunk = f"<{ tag_name } ></{ tag_name } >"
3624+
36193625 doc_label = tag_to_doclabel .get (tag_name , DocItemLabel .PARAGRAPH )
36203626
36213627 if tag_name == DocumentToken .OTSL .value :
You can’t perform that action at this time.
0 commit comments