Skip to content

Commit 8b218d5

Browse files
committed
update
Signed-off-by: George Lemon <georgelemon@protonmail.com>
1 parent 8df25db commit 8b218d5

File tree

1 file changed

+45
-44
lines changed

1 file changed

+45
-44
lines changed

src/marvdown/lexer.nim

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -92,19 +92,23 @@ proc initToken(lex: var MarkdownLexer, kind: static MarkdownTokenKind, wsno: int
9292
(kind, "", lex.line, lex.pos, lex.col, wsno, none(seq[string]))
9393

9494
# For tokens that need a value (identifiers, numbers, strings, etc)
95-
proc initToken(lex: var MarkdownLexer, kind: MarkdownTokenKind, value: sink string, wsno: int): MarkdownTokenTuple =
95+
proc initToken(lex: var MarkdownLexer, kind: MarkdownTokenKind,
96+
value: sink string, wsno: int): MarkdownTokenTuple =
9697
(kind, value, lex.line, lex.pos, lex.col, wsno, none(seq[string]))
9798

98-
proc newTokenTuple(lex: MarkdownLexer, kind: MarkdownTokenKind, token: string = "", attrs: Option[seq[string]] = none(seq[string])): MarkdownTokenTuple =
99-
(kind, token, lex.line, lex.col - token.len, lex.pos, lex.wsno, attrs)
99+
proc newTokenTuple(lex: MarkdownLexer, kind: MarkdownTokenKind,
100+
token: string = "", wsno: int = 0,
101+
attrs: Option[seq[string]] = none(seq[string])
102+
): MarkdownTokenTuple =
103+
(kind, token, lex.line, lex.col - token.len, lex.pos, wsno, attrs)
100104

101105
proc handleAutoLink(lex: var MarkdownLexer, wsno: int): MarkdownTokenTuple =
102106
var tempStrBuf = ""
103107
let startPos = lex.pos
104108
while lex.current notin {' ', '\t', '\n', '\r', '\0'}:
105109
tempStrBuf.add(lex.current)
106110
lex.advance()
107-
return newTokenTuple(lex, mtkLink, attrs=some(@[tempStrBuf, tempStrBuf]))
111+
return newTokenTuple(lex, mtkLink, wsno=wsno, attrs=some(@[tempStrBuf, tempStrBuf]))
108112

109113
proc scanTextWithLinks(lex: var MarkdownLexer, wsno: int): seq[MarkdownTokenTuple] =
110114
## Scan plain text and emit mtkText and mtkLink tokens for URLs found anywhere
@@ -118,15 +122,15 @@ proc scanTextWithLinks(lex: var MarkdownLexer, wsno: int): seq[MarkdownTokenTupl
118122
if isHttp or isHttps:
119123
# Flush buffer as text token
120124
if buf.len > 0:
121-
tokens.add(newTokenTuple(lex, mtkText, buf))
125+
tokens.add(newTokenTuple(lex, mtkText, buf, wsno=wsno))
122126
buf.setLen(0)
123127
# Handle link
124128
tokens.add(lex.handleAutoLink(wsno))
125129
continue
126130
buf.add(lex.current)
127131
lex.advance()
128132
if buf.len > 0:
129-
tokens.add(newTokenTuple(lex, mtkText, buf))
133+
tokens.add(newTokenTuple(lex, mtkText, buf, wsno=wsno))
130134
return tokens
131135

132136
proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
@@ -150,7 +154,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
150154
break
151155
# End of input
152156
if lex.current == '\0':
153-
return newTokenTuple(lex, mtkEOF)
157+
return newTokenTuple(lex, mtkEOF, wsno=lex.wsno)
154158

155159
# Return buffered tokens if present
156160
if lex.pendingTokens.len > 0:
@@ -171,9 +175,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
171175
while lex.current notin {'\n', '\r', '\0'}:
172176
lex.strbuf.add(lex.current)
173177
lex.advance()
174-
return newTokenTuple(lex, mtkHeading, lex.strbuf.strip(), attrs=some(@[$level]))
178+
return newTokenTuple(lex, mtkHeading, lex.strbuf.strip(), wsno=lex.wsno, attrs=some(@[$level]))
175179
else:
176-
return newTokenTuple(lex, mtkText, repeat('#', level))
180+
return newTokenTuple(lex, mtkText, repeat('#', level), wsno=lex.wsno)
177181
of '-', #['*',]# '_':
178182
# Horizontal rule or unordered list or emphasis/strong
179183

@@ -185,7 +189,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
185189

186190
if count >= 3 and (lex.current == '\n' or lex.current == '\0'):
187191
# Horizontal rule
188-
return newTokenTuple(lex, mtkHorizontalRule, repeat(ch, count))
192+
return newTokenTuple(lex, mtkHorizontalRule, repeat(ch, count), wsno=lex.wsno)
189193

190194
if (ch in {'-', '*', '+'}) and (lex.current == ' ' or lex.current == '\t'):
191195
# Unordered list item
@@ -215,25 +219,25 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
215219
if cbChar == 'x': "checked"
216220
else: "unchecked"
217221
return newTokenTuple(lex, mtkListItemCheckbox,
218-
lex.strbuf.strip(), attrs=some(@["checkbox", checkState]))
222+
lex.strbuf.strip(), wsno=lex.wsno, attrs=some(@["checkbox", checkState]))
219223

220224
# Otherwise, normal list item
221225
lex.strbuf.setLen(0)
222226
while lex.current notin {'\n', '\r', '\0'}:
223227
lex.strbuf.add(lex.current)
224228
lex.advance()
225-
return newTokenTuple(lex, mtkListItem, lex.strbuf.strip())
229+
return newTokenTuple(lex, mtkListItem, lex.strbuf.strip(), wsno=lex.wsno)
226230

227231
if ch in {'*', '_'}:
228232
# Emphasis or strong
229233
if lex.peek() == ch:
230234
lex.advance(); lex.advance() # skip both delimiters
231-
return newTokenTuple(lex, mtkStrong)
235+
return newTokenTuple(lex, mtkStrong, wsno=lex.wsno)
232236
else:
233237
# lex.advance(); not needed, already advanced
234-
return newTokenTuple(lex, mtkEmphasis)
238+
return newTokenTuple(lex, mtkEmphasis, wsno=lex.wsno)
235239
else:
236-
return newTokenTuple(lex, mtkText, repeat(ch, count))
240+
return newTokenTuple(lex, mtkText, repeat(ch, count), wsno=lex.wsno)
237241
of '>':
238242
# Blockquote
239243
lex.advance()
@@ -243,7 +247,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
243247
while lex.current notin {'\n', '\r', '\0'}:
244248
lex.strbuf.add(lex.current)
245249
lex.advance()
246-
return newTokenTuple(lex, mtkBlockquote, lex.strbuf.strip())
250+
return newTokenTuple(lex, mtkBlockquote, lex.strbuf.strip(), wsno=lex.wsno)
247251
of '0'..'9':
248252
# Ordered list item
249253
lex.strbuf.setLen(0)
@@ -259,9 +263,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
259263
while lex.current notin {'\n', '\r', '\0'}:
260264
lex.strbuf.add(lex.current)
261265
lex.advance()
262-
return newTokenTuple(lex, mtkOListItem, lex.strbuf.strip())
266+
return newTokenTuple(lex, mtkOListItem, lex.strbuf.strip(), wsno=lex.wsno)
263267
else:
264-
return newTokenTuple(lex, mtkText, lex.strbuf)
268+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=lex.wsno)
265269
of '`', '~':
266270
# Fenced code block (``` or ~~~)
267271
if lex.peek() == lex.current and lex.peek(2) == lex.current:
@@ -284,7 +288,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
284288
lex.advance(); lex.advance(); lex.advance()
285289
if lex.current in {'\n', '\r'}:
286290
lex.advance()
287-
return newTokenTuple(lex, mtkCodeBlock, lex.strbuf, attrs=some(@[lang]))
291+
return newTokenTuple(lex, mtkCodeBlock, lex.strbuf, wsno=lex.wsno, attrs=some(@[lang]))
288292
elif lex.current == '`':
289293
# Inline code
290294
lex.advance()
@@ -294,13 +298,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
294298
lex.advance()
295299
if lex.current == '`':
296300
lex.advance()
297-
return newTokenTuple(lex, mtkInlineCode, lex.strbuf)
301+
return newTokenTuple(lex, mtkInlineCode, lex.strbuf, wsno=lex.wsno)
298302
else:
299303
# treat as text
300304
lex.strbuf.setLen(0)
301305
lex.strbuf.add(lex.current)
302306
lex.advance()
303-
return newTokenTuple(lex, mtkText, lex.strbuf)
307+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=lex.wsno)
304308
of '!':
305309
# Image
306310
if lex.peek() == '[':
@@ -338,13 +342,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
338342
if lex.current == ')':
339343
lex.advance()
340344
if title.len > 0:
341-
return newTokenTuple(lex, mtkImage, attrs=some(@[alt, src, title]))
345+
return newTokenTuple(lex, mtkImage, wsno=lex.wsno, attrs=some(@[alt, src, title]))
342346
else:
343-
return newTokenTuple(lex, mtkImage, attrs=some(@[alt, src]))
347+
return newTokenTuple(lex, mtkImage, wsno=lex.wsno, attrs=some(@[alt, src]))
344348
else:
345349
var text = "!"
346350
lex.advance()
347-
return newTokenTuple(lex, mtkText, text)
351+
return newTokenTuple(lex, mtkText, text, wsno=lex.wsno)
348352
of '[':
349353
# Link, Checkbox, or Footnote
350354
if lex.peek() == '^':
@@ -368,20 +372,18 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
368372
lex.strbuf.add(lex.current)
369373
lex.advance()
370374
return newTokenTuple(lex, mtkFootnoteDef,
371-
lex.strbuf.strip(), attrs=some(@[footId]))
375+
lex.strbuf.strip(), wsno=lex.wsno, attrs=some(@[footId]))
372376
else:
373377
# Footnote reference: [^id]
374378
return newTokenTuple(lex, mtkFootnoteRef, "",
375-
attrs=some(@[footId]))
379+
wsno=lex.wsno, attrs=some(@[footId]))
376380
# Regular link or checkbox
377381
lex.advance()
378382
lex.strbuf.setLen(0)
379-
380383
while lex.current != ']' and lex.current != '\0':
381384
lex.strbuf.add(lex.current)
382385
lex.advance()
383386
let text = lex.strbuf
384-
385387
if lex.current == ']':
386388
lex.advance()
387389
if lex.current == '(':
@@ -411,35 +413,34 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
411413
if lex.current == ')':
412414
lex.advance()
413415
if title.len > 0:
414-
return newTokenTuple(lex, mtkLink, attrs=some(@[text, href, title]))
416+
return newTokenTuple(lex, mtkLink, wsno=lex.wsno, attrs=some(@[text, href, title]))
415417
else:
416-
return newTokenTuple(lex, mtkLink, attrs=some(@[text, href]))
417-
# handle checkboxes
418-
let checkState =
419-
if text == "x": "checked"
420-
else: "unchecked"
421-
return newTokenTuple(lex, mtkListItemCheckbox,
422-
attrs=some(@["checkbox", checkState]))
423-
return newTokenTuple(lex, mtkText, text)
418+
return newTokenTuple(lex, mtkLink, wsno=lex.wsno, attrs=some(@[text, href]))
419+
let checkState =
420+
if text == "x": "checked"
421+
else: "unchecked"
422+
return newTokenTuple(lex, mtkListItemCheckbox,
423+
wsno=lex.wsno, attrs=some(@["checkbox", checkState]))
424+
return newTokenTuple(lex, mtkText, text, wsno=lex.wsno)
424425
of '*':
425426
# Emphasis or strong
426427
if lex.peek() == '*':
427428
lex.advance(); lex.advance()
428-
return newTokenTuple(lex, mtkStrong)
429+
return newTokenTuple(lex, mtkStrong, wsno=lex.wsno)
429430
else:
430431
lex.advance();
431-
return newTokenTuple(lex, mtkEmphasis)
432+
return newTokenTuple(lex, mtkEmphasis, wsno=lex.wsno)
432433
of ' ':
433434
# Line break (two or more spaces at end of line)
434435
if lex.peek() == ' ' and (lex.peek(2) == '\n' or lex.peek(2) == '\r'):
435436
lex.advance(); lex.advance();
436437
if lex.current in {'\n', '\r'}:
437438
lex.advance()
438-
return newTokenTuple(lex, mtkLineBreak)
439+
return newTokenTuple(lex, mtkLineBreak, wsno=lex.wsno)
439440
else:
440441
var text = " "
441442
lex.advance()
442-
return newTokenTuple(lex, mtkText, text)
443+
return newTokenTuple(lex, mtkText, text, wsno=lex.wsno)
443444
of '<':
444445
# Raw HTML
445446
lex.strbuf.setLen(0)
@@ -460,14 +461,14 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
460461
if lex.current == '>':
461462
lex.strbuf.add(lex.current)
462463
lex.advance()
463-
return newTokenTuple(lex, mtkHtml, lex.strbuf, attrs=some(@[tag]))
464+
return newTokenTuple(lex, mtkHtml, lex.strbuf, wsno=lex.wsno, attrs=some(@[tag]))
464465
of '|':
465466
# Table row
466467
lex.strbuf.setLen(0)
467468
while lex.current notin {'\n', '\r', '\0'}:
468469
lex.strbuf.add(lex.current)
469470
lex.advance()
470-
return newTokenTuple(lex, mtkTable, lex.strbuf)
471+
return newTokenTuple(lex, mtkTable, lex.strbuf, wsno=lex.wsno)
471472
else:
472473
# Paragraph or plain text
473474
# Scan for auto links anywhere in the text
@@ -476,4 +477,4 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
476477
if tokens.len > 1:
477478
lex.pendingTokens = tokens[1..^1]
478479
return tokens[0]
479-
return newTokenTuple(lex, mtkUnknown)
480+
return newTokenTuple(lex, mtkUnknown, wsno=lex.wsno)

0 commit comments

Comments
 (0)