@@ -71,8 +71,14 @@ proc advance(lex: var MarkdownLexer) =
7171 inc lex.line
7272 lex.col = 0
7373 lex.wsno = 0
74- elif lex.current in {' ' , '\t ' , '\r ' }:
75- inc lex.wsno
74+ elif lex.current in {' ' , '\t ' }:
75+ # Only count indentation (wsno) when whitespace is at start of line.
76+ if lex.col == 0 :
77+ inc lex.wsno
78+ inc lex.col
79+ elif lex.current == '\r ' :
80+ # Treat CR similarly to other non-leading whitespace; do not
81+ # increment wsno for it.
7682 inc lex.col
7783 else :
7884 lex.wsno = 0
@@ -138,8 +144,8 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
138144 # Remove local wsno, use lex.wsno
139145 # Skip whitespace and newlines before token
140146 while true :
141- while lex.current in { ' ' , ' \t ' , ' \r ' }:
142- lex. advance ()
147+ # Only normalize line endings and consume newlines; leave spaces/tabs
148+ # so that they can be emitted as text tokens (preserve inline spaces).
143149 if lex.current == '\n ' :
144150 lex.col = 0
145151 lex.advance ()
@@ -432,36 +438,79 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
432438 return newTokenTuple (lex, mtkEmphasis, wsno= lex.wsno)
433439 of ' ' :
434440 # Line break (two or more spaces at end of line)
441+ # Also accept tabs as whitespace that should be emitted as text tokens.
435442 if lex.peek () == ' ' and (lex.peek (2 ) == '\n ' or lex.peek (2 ) == '\r ' ):
436443 lex.advance (); lex.advance ();
437444 if lex.current in {'\n ' , '\r ' }:
438445 lex.advance ()
439- return newTokenTuple (lex, mtkLineBreak, wsno= lex.wsno)
446+ return newTokenTuple (lex, mtkLineBreak, wsno= lex.wsno)
440447 else :
441448 var text = " "
442449 lex.advance ()
443450 return newTokenTuple (lex, mtkText, text, wsno= lex.wsno)
451+ of '\t ' :
452+ # Treat tabs as text tokens similar to spaces.
453+ var text = " \t "
454+ lex.advance ()
455+ return newTokenTuple (lex, mtkText, text, wsno= lex.wsno)
444456 of '<' :
445- # Raw HTML
457+ # Raw HTML block: consume until matching closing tag (handles nesting)
446458 lex.strbuf.setLen (0 )
447459 var tag: string
448- var stopTag = false
460+ var stopTagName = false
461+ # Parse opening tag and get tag name
462+ let tagStart = lex.pos
449463 while true :
450464 case lex.current
451465 of '>' , '\0 ' : break
452466 of ' ' :
453- stopTag = true
467+ stopTagName = true
454468 lex.strbuf.add (lex.current)
455469 of 'a' .. 'z' , 'A' .. 'Z' , '0' .. '9' , '_' , '-' :
456470 lex.strbuf.add (lex.current)
457- if not stopTag: tag.add (lex.current)
471+ if not stopTagName:
472+ tag.add (lex.current)
458473 else :
459474 lex.strbuf.add (lex.current)
460475 lex.advance ()
461476 if lex.current == '>' :
462477 lex.strbuf.add (lex.current)
463478 lex.advance ()
464- return newTokenTuple (lex, mtkHtml, lex.strbuf, wsno= lex.wsno, attrs= some (@ [tag]))
479+ # now consume until outermost closing tag
480+ # TODO test for self-closing tags
481+ var htmlContent = lex.strbuf
482+ var depth = 1
483+ while depth > 0 and lex.current != '\0 ' :
484+ if lex.current == '<' :
485+ if lex.peek () == '/' :
486+ # Possible closing tag
487+ var closeTag = " "
488+ var tempPos = lex.pos + 2
489+ while tempPos < lex.input.len and lex.input[tempPos] in {'a' .. 'z' , 'A' .. 'Z' , '0' .. '9' , '_' , '-' }:
490+ closeTag.add (lex.input[tempPos])
491+ inc tempPos
492+ if closeTag == tag:
493+ depth -= 1
494+ # Add chars to htmlContent until '>'
495+ while lex.current != '>' and lex.current != '\0 ' :
496+ htmlContent.add (lex.current)
497+ lex.advance ()
498+ if lex.current == '>' :
499+ htmlContent.add (lex.current)
500+ lex.advance ()
501+ continue
502+ else :
503+ # Possible nested opening tag
504+ var openTag = " "
505+ var tempPos = lex.pos + 1
506+ while tempPos < lex.input.len and lex.input[tempPos] in {'a' .. 'z' , 'A' .. 'Z' , '0' .. '9' , '_' , '-' }:
507+ openTag.add (lex.input[tempPos])
508+ inc tempPos
509+ if openTag == tag:
510+ depth += 1
511+ htmlContent.add (lex.current)
512+ lex.advance ()
513+ return newTokenTuple (lex, mtkHtml, htmlContent, wsno= lex.wsno, attrs= some (@ [tag]))
465514 of '|' :
466515 # Table row
467516 lex.strbuf.setLen (0 )
0 commit comments