@@ -163,6 +163,36 @@ def handle_endtag(self, tag):
163163 # If we only have one newline before block element, add another
164164 if not item .endswith ('\n \n ' ) and item .endswith ('\n ' ):
165165 self .cleandoc .append ('\n ' )
166+
167+ # Flatten the HTML structure of "markdown" blocks such that when they
168+ # get parsed, content will be parsed similar inside the blocks as it
169+ # does outside the block. Having real HTML elements in the tree before
170+ # the content adjacent content is processed can cause unpredictable
171+ # issues for extensions.
172+ current = element
173+ last = []
174+ while current is not None :
175+ for child in list (current ):
176+ current .remove (child )
177+ text = current .text if current .text is not None else ''
178+ tail = child .tail if child .tail is not None else ''
179+ child .tail = None
180+ state = child .attrib .get ('markdown' , 'off' )
181+
182+ # If the tail is just a new line, omit it.
183+ if tail == '\n ' :
184+ tail = ''
185+
186+ # Process the block nested under the spac appropriately
187+ if state in ('span' , 'block' ):
188+ current .text = text + '\n ' + self .md .htmlStash .store (child ) + '\n ' + tail
189+ last .append (child )
190+ else :
191+ child .attrib .pop ('markdown' )
192+ [c .attrib .pop ('markdown' , None ) for c in child .iter ()]
193+ current .text = text + '\n ' + self .md .htmlStash .store (child ) + '\n ' + tail
194+ current = last .pop (0 ) if last else None
195+
166196 self .cleandoc .append (self .md .htmlStash .store (element ))
167197 self .cleandoc .append ('\n \n ' )
168198 self .state = []
@@ -270,53 +300,53 @@ def parse_element_content(self, element: etree.Element) -> None:
270300 md_attr = element .attrib .pop ('markdown' , 'off' )
271301
272302 if md_attr == 'block' :
273- # Parse content as block level
274- # The order in which the different parts are parsed (text, children, tails) is important here as the
275- # order of elements needs to be preserved. We can't be inserting items at a later point in the current
276- # iteration as we don't want to do raw processing on elements created from parsing Markdown text (for
277- # example). Therefore, the order of operations is children, tails, text.
278-
279- # Recursively parse existing children from raw HTML
280- for child in list (element ):
281- self .parse_element_content (child )
282-
283- # Parse Markdown text in tail of children. Do this separate to avoid raw HTML parsing.
284- # Save the position of each item to be inserted later in reverse.
285- tails = []
286- for pos , child in enumerate (element ):
287- if child .tail :
288- block = child .tail .rstrip ('\n ' )
289- child .tail = ''
290- # Use a dummy placeholder element.
291- dummy = etree .Element ('div' )
292- self .parser .parseBlocks (dummy , block .split ('\n \n ' ))
293- children = list (dummy )
294- children .reverse ()
295- tails .append ((pos + 1 , children ))
296-
297- # Insert the elements created from the tails in reverse.
298- tails .reverse ()
299- for pos , tail in tails :
300- for item in tail :
301- element .insert (pos , item )
302-
303- # Parse Markdown text content. Do this last to avoid raw HTML parsing.
303+ # Parse the block elements content as Markdown
304304 if element .text :
305305 block = element .text .rstrip ('\n ' )
306306 element .text = ''
307- # Use a dummy placeholder element as the content needs to get inserted before existing children.
308- dummy = etree .Element ('div' )
309- self .parser .parseBlocks (dummy , block .split ('\n \n ' ))
310- children = list (dummy )
311- children .reverse ()
312- for child in children :
313- element .insert (0 , child )
307+ self .parser .parseBlocks (element , block .split ('\n \n ' ))
314308
315309 elif md_attr == 'span' :
316- # Span level parsing will be handled by inline processors.
317- # Walk children here to remove any `markdown` attributes.
318- for child in list (element ):
319- self .parse_element_content (child )
310+ # Span elements need to be recursively processed for block elements and raw HTML
311+ # as their content is not normally accessed by block processors, so expand stashed
312+ # HTML under the span. Span content itself will not be parsed here, but will await
313+ # the inline parser.
314+ block = element .text
315+ element .text = ''
316+ child = None
317+ start = 0
318+
319+ # Search the content for HTML placeholders and process the elements
320+ for m in util .HTML_PLACEHOLDER_RE .finditer (block ):
321+ index = int (m .group (1 ))
322+ el = self .parser .md .htmlStash .rawHtmlBlocks [index ]
323+ end = m .start ()
324+
325+ # Cut out the placeholder and and insert the processed element back in.
326+ if isinstance (el , etree .Element ):
327+ if child is None :
328+ element .text = block [start :end ]
329+ else :
330+ child .tail = (child .tail if child .tail is not None else '' ) + block [start :end ]
331+ element .append (el )
332+ self .parse_element_content (el )
333+ child = el
334+ self .parser .md .htmlStash .rawHtmlBlocks .pop (index )
335+ self .parser .md .htmlStash .rawHtmlBlocks .insert (index , '' )
336+
337+ else :
338+ # Not an element object, so insert content back into the element
339+ if child is None :
340+ element .text = block [start :end ]
341+ else :
342+ child .tail = (child .tail if child .tail is not None else '' )+ block [start :end ]
343+ start = end
344+
345+ # Insert anything left after last element
346+ if child is None :
347+ element .text = block [start :]
348+ else :
349+ child .tail = (child .tail if child .tail is not None else '' ) + block [start :]
320350
321351 else :
322352 # Disable inline parsing for everything else
@@ -336,8 +366,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
336366 if isinstance (element , etree .Element ):
337367 # We have a matched element. Process it.
338368 blocks .pop (0 )
339- self .parse_element_content (element )
340369 parent .append (element )
370+ self .parse_element_content (element )
341371 # Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
342372 self .parser .md .htmlStash .rawHtmlBlocks .pop (index )
343373 self .parser .md .htmlStash .rawHtmlBlocks .insert (index , '' )
0 commit comments