@@ -139,27 +139,30 @@ proc scanTextWithLinks(lex: var MarkdownLexer, wsno: int): seq[MarkdownTokenTupl
139139 tokens.add (newTokenTuple (lex, mtkText, buf, wsno= wsno))
140140 return tokens
141141
142+ const newSpace = " "
142143proc nextToken * (lex: var MarkdownLexer ): MarkdownTokenTuple =
143144 # # Lex the next token from the input
144145 # Remove local wsno, use lex.wsno
145146 # Skip whitespace and newlines before token
146- while true :
147- # Only normalize line endings and consume newlines; leave spaces/tabs
148- # so that they can be emitted as text tokens (preserve inline spaces).
149- if lex.current == '\n ' :
150- lex.col = 0
151- lex.advance ()
152- continue
153- elif lex.current == '\r ' :
154- if lex.peek () == '\n ' :
155- lex.advance ()
156- inc lex.line
147+ var newlineCount = 0
148+ while lex.current == '\n ' or lex.current == '\r ' :
149+ # CRLF -> consume both as a single newline
150+ if lex.current == '\r ' and lex.peek () == '\n ' :
151+ lex.advance () # consume '\r', now at '\n'
152+ # consume the newline character
153+ if lex.current == '\n ' or lex.current == '\r ' :
154+ inc newlineCount
157155 lex.col = 0
158156 lex.advance ()
159157 continue
160158 break
161- # End of input
159+
160+ if newlineCount > 2 :
161+ # More than 2 newlines -> paragraph break
162+ return newTokenTuple (lex, mtkParagraph, wsno= lex.wsno)
163+
162164 if lex.current == '\0 ' :
165+ # End of input
163166 return newTokenTuple (lex, mtkEOF, wsno= lex.wsno)
164167
165168 # Return buffered tokens if present
@@ -445,9 +448,8 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
445448 lex.advance ()
446449 return newTokenTuple (lex, mtkLineBreak, wsno= lex.wsno)
447450 else :
448- var text = " "
449451 lex.advance ()
450- return newTokenTuple (lex, mtkText, text , wsno= lex.wsno)
452+ return newTokenTuple (lex, mtkText, newSpace , wsno= lex.wsno)
451453 of '\t ' :
452454 # Treat tabs as text tokens similar to spaces.
453455 var text = " \t "
@@ -512,16 +514,21 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
512514 lex.advance ()
513515 return newTokenTuple (lex, mtkHtml, htmlContent, wsno= lex.wsno, attrs= some (@ [tag]))
514516 of '|' :
515- # Table row
516- lex.strbuf.setLen (0 )
517- while lex.current notin {'\n ' , '\r ' , '\0 ' }:
518- lex.strbuf.add (lex.current)
517+ if lex.col == 0 or lex.wsno > 0 :
518+ # table row
519+ lex.strbuf.setLen (0 )
520+ while lex.current notin {'\n ' , '\r ' , '\0 ' }:
521+ lex.strbuf.add (lex.current)
522+ lex.advance ()
523+ return newTokenTuple (lex, mtkTable, lex.strbuf, wsno= lex.wsno)
524+ else :
525+ # treat as text
519526 lex.advance ()
520- return newTokenTuple (lex, mtkTable, lex.strbuf , wsno= lex.wsno)
527+ return newTokenTuple (lex, mtkText, " | " , wsno= lex.wsno)
521528 else :
522529 # Paragraph or plain text
523530 # Scan for auto links anywhere in the text
524- let tokens = lex.scanTextWithLinks (lex.wsno)
531+ let tokens = lex.scanTextWithLinks (lex.wsno) # This should be optional, no?
525532 if tokens.len > 0 :
526533 if tokens.len > 1 :
527534 lex.pendingTokens = tokens[1 ..^ 1 ]
0 commit comments