Skip to content

Commit 12516ce

Browse files
committed
update
Signed-off-by: George Lemon <georgelemon@protonmail.com>
1 parent ca249d2 commit 12516ce

File tree

3 files changed

+61
-45
lines changed

3 files changed

+61
-45
lines changed

src/marvdown/ast.nim

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
# Made by Humans from OpenPeeps
55
# https://github.com/openpeeps/marvdown
66

7-
import std/options
7+
import std/[options, json]
8+
import pkg/jsony
89

910
from std/htmlparser import HtmlTag
1011

@@ -96,4 +97,7 @@ type
9697
line*: int
9798
## Line number in the source markdown
9899
wsno*: int
99-
## Whitespace count before the token (for indentation)
100+
## Whitespace count before the token (for indentation)
101+
102+
proc debugEcho*(n: MarkdownNode) =
103+
echo toJson(n)

src/marvdown/lexer.nim

Lines changed: 53 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type
4848
pos*, line*, col*: int
4949
strbuf*: string
5050
pendingTokens: seq[MarkdownTokenTuple] # Buffer for tokens split from text
51+
wsno: int # Track whitespace before current token
5152

5253
#
5354
# Markdown Lexer
@@ -58,6 +59,7 @@ proc initLexer*(input: sink string): MarkdownLexer =
5859
result.line = 1
5960
result.col = 1
6061
result.strbuf = ""
62+
result.wsno = 0
6163
if input.len > 0:
6264
result.current = input[0]
6365
else:
@@ -68,7 +70,12 @@ proc advance(lex: var MarkdownLexer) =
6870
if lex.current == '\n':
6971
inc lex.line
7072
lex.col = 0
73+
lex.wsno = 0
74+
elif lex.current in {' ', '\t', '\r'}:
75+
inc lex.wsno
76+
inc lex.col
7177
else:
78+
lex.wsno = 0
7279
inc lex.col
7380
inc lex.pos
7481
if lex.pos < lex.input.len:
@@ -124,32 +131,26 @@ proc scanTextWithLinks(lex: var MarkdownLexer, wsno: int): seq[MarkdownTokenTupl
124131

125132
proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
126133
## Lex the next token from the input
127-
var wsno = 0
134+
# Remove local wsno, use lex.wsno
128135
# Skip whitespace and newlines before token
129136
while true:
130137
while lex.current in {' ', '\t', '\r'}:
131-
inc wsno
132138
lex.advance()
133139
if lex.current == '\n':
134-
# inc lex.line
135140
lex.col = 0
136141
lex.advance()
137-
wsno = 0
138142
continue
139143
elif lex.current == '\r':
140144
if lex.peek() == '\n':
141145
lex.advance()
142146
inc lex.line
143147
lex.col = 0
144148
lex.advance()
145-
wsno = 0
146149
continue
147150
break
148151
# End of input
149152
if lex.current == '\0':
150-
return newTokenTuple(lex, mtkEOF, wsno=wsno)
151-
152-
# let startCol = wsno # not needed anymore
153+
return newTokenTuple(lex, mtkEOF, wsno=lex.wsno)
153154

154155
# Return buffered tokens if present
155156
if lex.pendingTokens.len > 0:
@@ -170,24 +171,32 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
170171
while lex.current notin {'\n', '\r', '\0'}:
171172
lex.strbuf.add(lex.current)
172173
lex.advance()
173-
return newTokenTuple(lex, mtkHeading, lex.strbuf.strip(), wsno=wsno, attrs=some(@[$level]))
174+
return newTokenTuple(lex, mtkHeading, lex.strbuf.strip(), wsno=lex.wsno, attrs=some(@[$level]))
174175
else:
175-
return newTokenTuple(lex, mtkText, repeat('#', level), wsno=wsno)
176+
return newTokenTuple(lex, mtkText, repeat('#', level), wsno=lex.wsno)
176177
of '-', #['*',]# '_':
177178
# Horizontal rule or unordered list or emphasis/strong
179+
178180
let ch = lex.current
179181
var count = 0
180182
while lex.current == ch:
181183
inc count
182184
lex.advance()
185+
183186
if count >= 3 and (lex.current == '\n' or lex.current == '\0'):
184-
# it's a horizontal rule!
185-
return newTokenTuple(lex, mtkHorizontalRule, repeat(ch, count), wsno=wsno)
186-
elif (ch in {'-', '*', '+'}) and (lex.current == ' ' or lex.current == '\t'):
187+
# Horizontal rule
188+
return newTokenTuple(lex, mtkHorizontalRule, repeat(ch, count), wsno=lex.wsno)
189+
190+
if (ch in {'-', '*', '+'}) and (lex.current == ' ' or lex.current == '\t'):
191+
# Unordered list item
187192
lex.advance()
193+
while lex.current == ' ' or lex.current == '\t':
194+
lex.advance()
195+
188196
# Check for checkbox pattern
189197
while lex.current == ' ' or lex.current == '\t':
190198
lex.advance()
199+
191200
if lex.current == '[' and (lex.peek() == 'x' or lex.peek() == ' '):
192201
lex.advance() # skip '['
193202
let cbChar = lex.current
@@ -206,23 +215,25 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
206215
if cbChar == 'x': "checked"
207216
else: "unchecked"
208217
return newTokenTuple(lex, mtkListItemCheckbox,
209-
lex.strbuf.strip(), wsno=wsno, attrs=some(@["checkbox", checkState]))
218+
lex.strbuf.strip(), wsno=lex.wsno, attrs=some(@["checkbox", checkState]))
219+
210220
# Otherwise, normal list item
211221
lex.strbuf.setLen(0)
212222
while lex.current notin {'\n', '\r', '\0'}:
213223
lex.strbuf.add(lex.current)
214224
lex.advance()
215-
return newTokenTuple(lex, mtkListItem, lex.strbuf.strip(), wsno=wsno)
216-
elif ch in {'*', '_'}:
225+
return newTokenTuple(lex, mtkListItem, lex.strbuf.strip(), wsno=lex.wsno)
226+
227+
if ch in {'*', '_'}:
217228
# Emphasis or strong
218229
if lex.peek() == ch:
219230
lex.advance(); lex.advance() # skip both delimiters
220-
return newTokenTuple(lex, mtkStrong, wsno=wsno)
231+
return newTokenTuple(lex, mtkStrong, wsno=lex.wsno)
221232
else:
222-
lex.advance();
223-
return newTokenTuple(lex, mtkEmphasis, wsno=wsno)
233+
# lex.advance(); not needed, already advanced
234+
return newTokenTuple(lex, mtkEmphasis, wsno=lex.wsno)
224235
else:
225-
return newTokenTuple(lex, mtkText, repeat(ch, count), wsno=wsno)
236+
return newTokenTuple(lex, mtkText, repeat(ch, count), wsno=lex.wsno)
226237
of '>':
227238
# Blockquote
228239
lex.advance()
@@ -232,7 +243,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
232243
while lex.current notin {'\n', '\r', '\0'}:
233244
lex.strbuf.add(lex.current)
234245
lex.advance()
235-
return newTokenTuple(lex, mtkBlockquote, lex.strbuf.strip(), wsno=wsno)
246+
return newTokenTuple(lex, mtkBlockquote, lex.strbuf.strip(), wsno=lex.wsno)
236247
of '0'..'9':
237248
# Ordered list item
238249
lex.strbuf.setLen(0)
@@ -248,9 +259,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
248259
while lex.current notin {'\n', '\r', '\0'}:
249260
lex.strbuf.add(lex.current)
250261
lex.advance()
251-
return newTokenTuple(lex, mtkOListItem, lex.strbuf.strip(), wsno=wsno)
262+
return newTokenTuple(lex, mtkOListItem, lex.strbuf.strip(), wsno=lex.wsno)
252263
else:
253-
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=wsno)
264+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=lex.wsno)
254265
of '`', '~':
255266
# Fenced code block (``` or ~~~)
256267
if lex.peek() == lex.current and lex.peek(2) == lex.current:
@@ -273,7 +284,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
273284
lex.advance(); lex.advance(); lex.advance()
274285
if lex.current in {'\n', '\r'}:
275286
lex.advance()
276-
return newTokenTuple(lex, mtkCodeBlock, lex.strbuf, wsno=wsno, attrs=some(@[lang]))
287+
return newTokenTuple(lex, mtkCodeBlock, lex.strbuf, wsno=lex.wsno, attrs=some(@[lang]))
277288
elif lex.current == '`':
278289
# Inline code
279290
lex.advance()
@@ -283,13 +294,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
283294
lex.advance()
284295
if lex.current == '`':
285296
lex.advance()
286-
return newTokenTuple(lex, mtkInlineCode, lex.strbuf, wsno=wsno)
297+
return newTokenTuple(lex, mtkInlineCode, lex.strbuf, wsno=lex.wsno)
287298
else:
288299
# treat as text
289300
lex.strbuf.setLen(0)
290301
lex.strbuf.add(lex.current)
291302
lex.advance()
292-
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=wsno)
303+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=lex.wsno)
293304
of '!':
294305
# Image
295306
if lex.peek() == '[':
@@ -327,13 +338,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
327338
if lex.current == ')':
328339
lex.advance()
329340
if title.len > 0:
330-
return newTokenTuple(lex, mtkImage, wsno=wsno, attrs=some(@[alt, src, title]))
341+
return newTokenTuple(lex, mtkImage, wsno=lex.wsno, attrs=some(@[alt, src, title]))
331342
else:
332-
return newTokenTuple(lex, mtkImage, wsno=wsno, attrs=some(@[alt, src]))
343+
return newTokenTuple(lex, mtkImage, wsno=lex.wsno, attrs=some(@[alt, src]))
333344
else:
334345
var text = "!"
335346
lex.advance()
336-
return newTokenTuple(lex, mtkText, text, wsno=wsno)
347+
return newTokenTuple(lex, mtkText, text, wsno=lex.wsno)
337348
of '[':
338349
# Link, Checkbox, or Footnote
339350
if lex.peek() == '^':
@@ -357,11 +368,11 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
357368
lex.strbuf.add(lex.current)
358369
lex.advance()
359370
return newTokenTuple(lex, mtkFootnoteDef,
360-
lex.strbuf.strip(), wsno=wsno, attrs=some(@[footId]))
371+
lex.strbuf.strip(), wsno=lex.wsno, attrs=some(@[footId]))
361372
else:
362373
# Footnote reference: [^id]
363374
return newTokenTuple(lex, mtkFootnoteRef, "",
364-
wsno=wsno, attrs=some(@[footId]))
375+
wsno=lex.wsno, attrs=some(@[footId]))
365376
# Regular link or checkbox
366377
lex.advance()
367378
lex.strbuf.setLen(0)
@@ -398,9 +409,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
398409
if lex.current == ')':
399410
lex.advance()
400411
if title.len > 0:
401-
return newTokenTuple(lex, mtkLink, wsno=wsno, attrs=some(@[text, href, title]))
412+
return newTokenTuple(lex, mtkLink, wsno=lex.wsno, attrs=some(@[text, href, title]))
402413
else:
403-
return newTokenTuple(lex, mtkLink, wsno=wsno, attrs=some(@[text, href]))
414+
return newTokenTuple(lex, mtkLink, wsno=lex.wsno, attrs=some(@[text, href]))
404415
# elif text == "x":
405416
# # Special case for [x] checkbox
406417
# return newTokenTuple(lex, mtkListItemCheckbox,
@@ -409,26 +420,26 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
409420
# # Special case for [ ] checkbox
410421
# return newTokenTuple(lex, mtkListItemCheckbox,
411422
# wsno=wsno, attrs=some(@["checkbox", "unchecked"]))
412-
return newTokenTuple(lex, mtkText, text, wsno=wsno)
423+
return newTokenTuple(lex, mtkText, text, wsno=lex.wsno)
413424
of '*':
414425
# Emphasis or strong
415426
if lex.peek() == '*':
416427
lex.advance(); lex.advance()
417-
return newTokenTuple(lex, mtkStrong, wsno=wsno)
428+
return newTokenTuple(lex, mtkStrong, wsno=lex.wsno)
418429
else:
419430
lex.advance();
420-
return newTokenTuple(lex, mtkEmphasis, wsno=wsno)
431+
return newTokenTuple(lex, mtkEmphasis, wsno=lex.wsno)
421432
of ' ':
422433
# Line break (two or more spaces at end of line)
423434
if lex.peek() == ' ' and (lex.peek(2) == '\n' or lex.peek(2) == '\r'):
424435
lex.advance(); lex.advance();
425436
if lex.current in {'\n', '\r'}:
426437
lex.advance()
427-
return newTokenTuple(lex, mtkLineBreak, wsno=wsno)
438+
return newTokenTuple(lex, mtkLineBreak, wsno=lex.wsno)
428439
else:
429440
var text = " "
430441
lex.advance()
431-
return newTokenTuple(lex, mtkText, text, wsno=wsno)
442+
return newTokenTuple(lex, mtkText, text, wsno=lex.wsno)
432443
of '<':
433444
# Raw HTML
434445
lex.strbuf.setLen(0)
@@ -449,20 +460,20 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
449460
if lex.current == '>':
450461
lex.strbuf.add(lex.current)
451462
lex.advance()
452-
return newTokenTuple(lex, mtkHtml, lex.strbuf, wsno=wsno, attrs=some(@[tag]))
463+
return newTokenTuple(lex, mtkHtml, lex.strbuf, wsno=lex.wsno, attrs=some(@[tag]))
453464
of '|':
454465
# Table row
455466
lex.strbuf.setLen(0)
456467
while lex.current notin {'\n', '\r', '\0'}:
457468
lex.strbuf.add(lex.current)
458469
lex.advance()
459-
return newTokenTuple(lex, mtkTable, lex.strbuf, wsno=wsno)
470+
return newTokenTuple(lex, mtkTable, lex.strbuf, wsno=lex.wsno)
460471
else:
461472
# Paragraph or plain text
462473
# Scan for auto links anywhere in the text
463-
let tokens = lex.scanTextWithLinks(wsno)
474+
let tokens = lex.scanTextWithLinks(lex.wsno)
464475
if tokens.len > 0:
465476
if tokens.len > 1:
466477
lex.pendingTokens = tokens[1..^1]
467478
return tokens[0]
468-
return newTokenTuple(lex, mtkUnknown, wsno=wsno)
479+
return newTokenTuple(lex, mtkUnknown, wsno=lex.wsno)

tests/test1.nim

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ let opts = MarkdownOptions(
1111
allowTagsByType: tagNone,
1212
allowInlineStyle: false,
1313
allowHtmlAttributes: false,
14-
enableAnchors: true
14+
enableAnchors: true,
15+
anchorIcon: "🔗"
1516
)
1617
test "headings with anchors":
1718
let sample = """

0 commit comments

Comments
 (0)