@@ -106,36 +106,60 @@ def serialize(
106106 doc_serializer : BaseDocSerializer ,
107107 doc : DoclingDocument ,
108108 is_inline_scope : bool = False ,
109+ visited : Optional [set [str ]] = None , # refs of visited items
109110 ** kwargs : Any ,
110111 ) -> SerializationResult :
111112 """Serializes the passed item."""
113+ my_visited = visited if visited is not None else set ()
112114 params = MarkdownParams (** kwargs )
113115 res_parts : list [SerializationResult ] = []
116+ text = item .text
114117 escape_html = True
115118 escape_underscores = True
116- if isinstance (item , TitleItem ):
117- text_part = f"# { item .text } "
118- elif isinstance (item , SectionHeaderItem ):
119- text_part = f"{ (item .level + 1 ) * '#' } { item .text } "
119+ processing_pending = True
120+ if isinstance (item , (TitleItem , SectionHeaderItem )):
121+ # case where processing/formatting should be applied first (in inner scope)
122+ processing_pending = False
123+ if (
124+ text == ""
125+ and len (item .children ) == 1
126+ and isinstance (
127+ (child_group := item .children [0 ].resolve (doc )), InlineGroup
128+ )
129+ ):
130+ # case of heading with inline
131+ ser_res = doc_serializer .serialize (item = child_group )
132+ text = ser_res .text
133+ for span in ser_res .spans :
134+ my_visited .add (span .item .self_ref )
135+ else :
136+ text = doc_serializer .post_process (
137+ text = text ,
138+ escape_html = escape_html ,
139+ escape_underscores = escape_underscores ,
140+ formatting = item .formatting ,
141+ hyperlink = item .hyperlink ,
142+ )
143+ num_hashes = 1 if isinstance (item , TitleItem ) else item .level + 1
144+ text_part = f"{ num_hashes * '#' } { text } "
120145 elif isinstance (item , CodeItem ):
121- text_part = (
122- f"`{ item .text } `" if is_inline_scope else f"```\n { item .text } \n ```"
123- )
146+ text_part = f"`{ text } `" if is_inline_scope else f"```\n { text } \n ```"
124147 escape_html = False
125148 escape_underscores = False
126149 elif isinstance (item , FormulaItem ):
127- if item . text :
128- text_part = f"${ item . text } $" if is_inline_scope else f"$${ item . text } $$"
150+ if text :
151+ text_part = f"${ text } $" if is_inline_scope else f"$${ text } $$"
129152 elif item .orig :
130153 text_part = "<!-- formula-not-decoded -->"
131154 else :
132155 text_part = ""
133156 escape_html = False
134157 escape_underscores = False
135158 elif params .wrap_width :
136- text_part = textwrap .fill (item .text , width = params .wrap_width )
159+ # although wrapping is not guaranteed if post-processing makes changes
160+ text_part = textwrap .fill (text , width = params .wrap_width )
137161 else :
138- text_part = item . text
162+ text_part = text
139163
140164 if text_part :
141165 text_res = create_ser_result (text = text_part , span_source = item )
@@ -147,13 +171,14 @@ def serialize(
147171 res_parts .append (cap_res )
148172
149173 text = (" " if is_inline_scope else "\n \n " ).join ([r .text for r in res_parts ])
150- text = doc_serializer .post_process (
151- text = text ,
152- escape_html = escape_html ,
153- escape_underscores = escape_underscores ,
154- formatting = item .formatting ,
155- hyperlink = item .hyperlink ,
156- )
174+ if processing_pending :
175+ text = doc_serializer .post_process (
176+ text = text ,
177+ escape_html = escape_html ,
178+ escape_underscores = escape_underscores ,
179+ formatting = item .formatting ,
180+ hyperlink = item .hyperlink ,
181+ )
157182 return create_ser_result (text = text , span_source = res_parts )
158183
159184
0 commit comments