4848 pos* , line* , col* : int
4949 strbuf* : string
5050 pendingTokens: seq [MarkdownTokenTuple ] # Buffer for tokens split from text
51+ wsno: int # Track whitespace before current token
5152
5253#
5354# Markdown Lexer
@@ -58,6 +59,7 @@ proc initLexer*(input: sink string): MarkdownLexer =
5859 result .line = 1
5960 result .col = 1
6061 result .strbuf = " "
62+ result .wsno = 0
6163 if input.len > 0 :
6264 result .current = input[0 ]
6365 else :
@@ -68,7 +70,12 @@ proc advance(lex: var MarkdownLexer) =
6870 if lex.current == '\n ' :
6971 inc lex.line
7072 lex.col = 0
73+ lex.wsno = 0
74+ elif lex.current in {' ' , '\t ' , '\r ' }:
75+ inc lex.wsno
76+ inc lex.col
7177 else :
78+ lex.wsno = 0
7279 inc lex.col
7380 inc lex.pos
7481 if lex.pos < lex.input.len:
@@ -124,32 +131,26 @@ proc scanTextWithLinks(lex: var MarkdownLexer, wsno: int): seq[MarkdownTokenTupl
124131
125132proc nextToken * (lex: var MarkdownLexer ): MarkdownTokenTuple =
126133 # # Lex the next token from the input
127- var wsno = 0
134+ # Remove local wsno, use lex.wsno
128135 # Skip whitespace and newlines before token
129136 while true :
130137 while lex.current in {' ' , '\t ' , '\r ' }:
131- inc wsno
132138 lex.advance ()
133139 if lex.current == '\n ' :
134- # inc lex.line
135140 lex.col = 0
136141 lex.advance ()
137- wsno = 0
138142 continue
139143 elif lex.current == '\r ' :
140144 if lex.peek () == '\n ' :
141145 lex.advance ()
142146 inc lex.line
143147 lex.col = 0
144148 lex.advance ()
145- wsno = 0
146149 continue
147150 break
148151 # End of input
149152 if lex.current == '\0 ' :
150- return newTokenTuple (lex, mtkEOF, wsno= wsno)
151-
152- # let startCol = wsno # not needed anymore
153+ return newTokenTuple (lex, mtkEOF, wsno= lex.wsno)
153154
154155 # Return buffered tokens if present
155156 if lex.pendingTokens.len > 0 :
@@ -170,24 +171,32 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
170171 while lex.current notin {'\n ' , '\r ' , '\0 ' }:
171172 lex.strbuf.add (lex.current)
172173 lex.advance ()
173- return newTokenTuple (lex, mtkHeading, lex.strbuf.strip (), wsno= wsno, attrs= some (@ [$ level]))
174+ return newTokenTuple (lex, mtkHeading, lex.strbuf.strip (), wsno= lex. wsno, attrs= some (@ [$ level]))
174175 else :
175- return newTokenTuple (lex, mtkText, repeat ('#' , level), wsno= wsno)
176+ return newTokenTuple (lex, mtkText, repeat ('#' , level), wsno= lex. wsno)
176177 of '-' , #[ '*',]# '_' :
177178 # Horizontal rule or unordered list or emphasis/strong
179+
178180 let ch = lex.current
179181 var count = 0
180182 while lex.current == ch:
181183 inc count
182184 lex.advance ()
185+
183186 if count >= 3 and (lex.current == '\n ' or lex.current == '\0 ' ):
184- # it's a horizontal rule!
185- return newTokenTuple (lex, mtkHorizontalRule, repeat (ch, count), wsno= wsno)
186- elif (ch in {'-' , '*' , '+' }) and (lex.current == ' ' or lex.current == '\t ' ):
187+ # Horizontal rule
188+ return newTokenTuple (lex, mtkHorizontalRule, repeat (ch, count), wsno= lex.wsno)
189+
190+ if (ch in {'-' , '*' , '+' }) and (lex.current == ' ' or lex.current == '\t ' ):
191+ # Unordered list item
187192 lex.advance ()
193+ while lex.current == ' ' or lex.current == '\t ' :
194+ lex.advance ()
195+
188196 # Check for checkbox pattern
189197 while lex.current == ' ' or lex.current == '\t ' :
190198 lex.advance ()
199+
191200 if lex.current == '[' and (lex.peek () == 'x' or lex.peek () == ' ' ):
192201 lex.advance () # skip '['
193202 let cbChar = lex.current
@@ -206,23 +215,25 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
206215 if cbChar == 'x' : " checked"
207216 else : " unchecked"
208217 return newTokenTuple (lex, mtkListItemCheckbox,
209- lex.strbuf.strip (), wsno= wsno, attrs= some (@ [" checkbox" , checkState]))
218+ lex.strbuf.strip (), wsno= lex.wsno, attrs= some (@ [" checkbox" , checkState]))
219+
210220 # Otherwise, normal list item
211221 lex.strbuf.setLen (0 )
212222 while lex.current notin {'\n ' , '\r ' , '\0 ' }:
213223 lex.strbuf.add (lex.current)
214224 lex.advance ()
215- return newTokenTuple (lex, mtkListItem, lex.strbuf.strip (), wsno= wsno)
216- elif ch in {'*' , '_' }:
225+ return newTokenTuple (lex, mtkListItem, lex.strbuf.strip (), wsno= lex.wsno)
226+
227+ if ch in {'*' , '_' }:
217228 # Emphasis or strong
218229 if lex.peek () == ch:
219230 lex.advance (); lex.advance () # skip both delimiters
220- return newTokenTuple (lex, mtkStrong, wsno= wsno)
231+ return newTokenTuple (lex, mtkStrong, wsno= lex. wsno)
221232 else :
222- lex.advance ();
223- return newTokenTuple (lex, mtkEmphasis, wsno= wsno)
233+ # lex.advance(); not needed, already advanced
234+ return newTokenTuple (lex, mtkEmphasis, wsno= lex. wsno)
224235 else :
225- return newTokenTuple (lex, mtkText, repeat (ch, count), wsno= wsno)
236+ return newTokenTuple (lex, mtkText, repeat (ch, count), wsno= lex. wsno)
226237 of '>' :
227238 # Blockquote
228239 lex.advance ()
@@ -232,7 +243,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
232243 while lex.current notin {'\n ' , '\r ' , '\0 ' }:
233244 lex.strbuf.add (lex.current)
234245 lex.advance ()
235- return newTokenTuple (lex, mtkBlockquote, lex.strbuf.strip (), wsno= wsno)
246+ return newTokenTuple (lex, mtkBlockquote, lex.strbuf.strip (), wsno= lex. wsno)
236247 of '0' .. '9' :
237248 # Ordered list item
238249 lex.strbuf.setLen (0 )
@@ -248,9 +259,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
248259 while lex.current notin {'\n ' , '\r ' , '\0 ' }:
249260 lex.strbuf.add (lex.current)
250261 lex.advance ()
251- return newTokenTuple (lex, mtkOListItem, lex.strbuf.strip (), wsno= wsno)
262+ return newTokenTuple (lex, mtkOListItem, lex.strbuf.strip (), wsno= lex. wsno)
252263 else :
253- return newTokenTuple (lex, mtkText, lex.strbuf, wsno= wsno)
264+ return newTokenTuple (lex, mtkText, lex.strbuf, wsno= lex. wsno)
254265 of '`' , '~' :
255266 # Fenced code block (``` or ~~~)
256267 if lex.peek () == lex.current and lex.peek (2 ) == lex.current:
@@ -273,7 +284,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
273284 lex.advance (); lex.advance (); lex.advance ()
274285 if lex.current in {'\n ' , '\r ' }:
275286 lex.advance ()
276- return newTokenTuple (lex, mtkCodeBlock, lex.strbuf, wsno= wsno, attrs= some (@ [lang]))
287+ return newTokenTuple (lex, mtkCodeBlock, lex.strbuf, wsno= lex. wsno, attrs= some (@ [lang]))
277288 elif lex.current == '`' :
278289 # Inline code
279290 lex.advance ()
@@ -283,13 +294,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
283294 lex.advance ()
284295 if lex.current == '`' :
285296 lex.advance ()
286- return newTokenTuple (lex, mtkInlineCode, lex.strbuf, wsno= wsno)
297+ return newTokenTuple (lex, mtkInlineCode, lex.strbuf, wsno= lex. wsno)
287298 else :
288299 # treat as text
289300 lex.strbuf.setLen (0 )
290301 lex.strbuf.add (lex.current)
291302 lex.advance ()
292- return newTokenTuple (lex, mtkText, lex.strbuf, wsno= wsno)
303+ return newTokenTuple (lex, mtkText, lex.strbuf, wsno= lex. wsno)
293304 of '!' :
294305 # Image
295306 if lex.peek () == '[' :
@@ -327,13 +338,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
327338 if lex.current == ')' :
328339 lex.advance ()
329340 if title.len > 0 :
330- return newTokenTuple (lex, mtkImage, wsno= wsno, attrs= some (@ [alt, src, title]))
341+ return newTokenTuple (lex, mtkImage, wsno= lex. wsno, attrs= some (@ [alt, src, title]))
331342 else :
332- return newTokenTuple (lex, mtkImage, wsno= wsno, attrs= some (@ [alt, src]))
343+ return newTokenTuple (lex, mtkImage, wsno= lex. wsno, attrs= some (@ [alt, src]))
333344 else :
334345 var text = " !"
335346 lex.advance ()
336- return newTokenTuple (lex, mtkText, text, wsno= wsno)
347+ return newTokenTuple (lex, mtkText, text, wsno= lex. wsno)
337348 of '[' :
338349 # Link, Checkbox, or Footnote
339350 if lex.peek () == '^' :
@@ -357,11 +368,11 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
357368 lex.strbuf.add (lex.current)
358369 lex.advance ()
359370 return newTokenTuple (lex, mtkFootnoteDef,
360- lex.strbuf.strip (), wsno= wsno, attrs= some (@ [footId]))
371+ lex.strbuf.strip (), wsno= lex. wsno, attrs= some (@ [footId]))
361372 else :
362373 # Footnote reference: [^id]
363374 return newTokenTuple (lex, mtkFootnoteRef, " " ,
364- wsno= wsno, attrs= some (@ [footId]))
375+ wsno= lex. wsno, attrs= some (@ [footId]))
365376 # Regular link or checkbox
366377 lex.advance ()
367378 lex.strbuf.setLen (0 )
@@ -398,9 +409,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
398409 if lex.current == ')' :
399410 lex.advance ()
400411 if title.len > 0 :
401- return newTokenTuple (lex, mtkLink, wsno= wsno, attrs= some (@ [text, href, title]))
412+ return newTokenTuple (lex, mtkLink, wsno= lex. wsno, attrs= some (@ [text, href, title]))
402413 else :
403- return newTokenTuple (lex, mtkLink, wsno= wsno, attrs= some (@ [text, href]))
414+ return newTokenTuple (lex, mtkLink, wsno= lex. wsno, attrs= some (@ [text, href]))
404415 # elif text == "x":
405416 # # Special case for [x] checkbox
406417 # return newTokenTuple(lex, mtkListItemCheckbox,
@@ -409,26 +420,26 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
409420 # # Special case for [ ] checkbox
410421 # return newTokenTuple(lex, mtkListItemCheckbox,
411422 # wsno=wsno, attrs=some(@["checkbox", "unchecked"]))
412- return newTokenTuple (lex, mtkText, text, wsno= wsno)
423+ return newTokenTuple (lex, mtkText, text, wsno= lex. wsno)
413424 of '*' :
414425 # Emphasis or strong
415426 if lex.peek () == '*' :
416427 lex.advance (); lex.advance ()
417- return newTokenTuple (lex, mtkStrong, wsno= wsno)
428+ return newTokenTuple (lex, mtkStrong, wsno= lex. wsno)
418429 else :
419430 lex.advance ();
420- return newTokenTuple (lex, mtkEmphasis, wsno= wsno)
431+ return newTokenTuple (lex, mtkEmphasis, wsno= lex. wsno)
421432 of ' ' :
422433 # Line break (two or more spaces at end of line)
423434 if lex.peek () == ' ' and (lex.peek (2 ) == '\n ' or lex.peek (2 ) == '\r ' ):
424435 lex.advance (); lex.advance ();
425436 if lex.current in {'\n ' , '\r ' }:
426437 lex.advance ()
427- return newTokenTuple (lex, mtkLineBreak, wsno= wsno)
438+ return newTokenTuple (lex, mtkLineBreak, wsno= lex. wsno)
428439 else :
429440 var text = " "
430441 lex.advance ()
431- return newTokenTuple (lex, mtkText, text, wsno= wsno)
442+ return newTokenTuple (lex, mtkText, text, wsno= lex. wsno)
432443 of '<' :
433444 # Raw HTML
434445 lex.strbuf.setLen (0 )
@@ -449,20 +460,20 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
449460 if lex.current == '>' :
450461 lex.strbuf.add (lex.current)
451462 lex.advance ()
452- return newTokenTuple (lex, mtkHtml, lex.strbuf, wsno= wsno, attrs= some (@ [tag]))
463+ return newTokenTuple (lex, mtkHtml, lex.strbuf, wsno= lex. wsno, attrs= some (@ [tag]))
453464 of '|' :
454465 # Table row
455466 lex.strbuf.setLen (0 )
456467 while lex.current notin {'\n ' , '\r ' , '\0 ' }:
457468 lex.strbuf.add (lex.current)
458469 lex.advance ()
459- return newTokenTuple (lex, mtkTable, lex.strbuf, wsno= wsno)
470+ return newTokenTuple (lex, mtkTable, lex.strbuf, wsno= lex. wsno)
460471 else :
461472 # Paragraph or plain text
462473 # Scan for auto links anywhere in the text
463- let tokens = lex.scanTextWithLinks (wsno)
474+ let tokens = lex.scanTextWithLinks (lex. wsno)
464475 if tokens.len > 0 :
465476 if tokens.len > 1 :
466477 lex.pendingTokens = tokens[1 ..^ 1 ]
467478 return tokens[0 ]
468- return newTokenTuple (lex, mtkUnknown, wsno= wsno)
479+ return newTokenTuple (lex, mtkUnknown, wsno= lex. wsno)
0 commit comments