@@ -3237,6 +3237,11 @@ def load_from_doctags( # noqa: C901
32373237 "document_index" : DocItemLabel .DOCUMENT_INDEX ,
32383238 "otsl" : DocItemLabel .TABLE ,
32393239 "section_header_level_1" : DocItemLabel .SECTION_HEADER ,
3240+ "section_header_level_2" : DocItemLabel .SECTION_HEADER ,
3241+ "section_header_level_3" : DocItemLabel .SECTION_HEADER ,
3242+ "section_header_level_4" : DocItemLabel .SECTION_HEADER ,
3243+ "section_header_level_5" : DocItemLabel .SECTION_HEADER ,
3244+ "section_header_level_6" : DocItemLabel .SECTION_HEADER ,
32403245 "checkbox_selected" : DocItemLabel .CHECKBOX_SELECTED ,
32413246 "checkbox_unselected" : DocItemLabel .CHECKBOX_UNSELECTED ,
32423247 "text" : DocItemLabel .TEXT ,
@@ -3622,7 +3627,7 @@ def parse_key_value_item(
36223627 rf"{ DocItemLabel .PAGE_FOOTER } |{ DocItemLabel .FORMULA } |"
36233628 rf"{ DocItemLabel .CAPTION } |{ DocItemLabel .PICTURE } |"
36243629 rf"{ DocItemLabel .FOOTNOTE } |{ DocItemLabel .CODE } |"
3625- rf"{ DocItemLabel .SECTION_HEADER } _level_1 |"
3630+ rf"{ DocItemLabel .SECTION_HEADER } _level_[1-6] |"
36263631 rf"{ DocumentToken .ORDERED_LIST .value } |"
36273632 rf"{ DocumentToken .UNORDERED_LIST .value } |"
36283633 rf"{ DocItemLabel .KEY_VALUE_REGION } |"
@@ -3830,12 +3835,23 @@ def parse_key_value_item(
38303835 if tag_name in [DocItemLabel .PAGE_HEADER , DocItemLabel .PAGE_FOOTER ]:
38313836 content_layer = ContentLayer .FURNITURE
38323837
3833- doc .add_text (
3834- label = doc_label ,
3835- text = text_content ,
3836- prov = element_prov ,
3837- content_layer = content_layer ,
3838- )
3838+ if doc_label == DocItemLabel .SECTION_HEADER :
3839+ # Extract level from tag_name (e.g. "section_level_header_1" -> 1)
3840+ level = int (tag_name .split ("_" )[- 1 ])
3841+ doc .add_heading (
3842+ text = text_content ,
3843+ level = level ,
3844+ prov = element_prov ,
3845+ content_layer = content_layer ,
3846+ )
3847+ else :
3848+ doc .add_text (
3849+ label = doc_label ,
3850+ text = text_content ,
3851+ prov = element_prov ,
3852+ content_layer = content_layer ,
3853+ )
3854+
38393855 return doc
38403856
38413857 @deprecated ("Use save_as_doctags instead." )
0 commit comments