Skip to content

Commit 041a2ff

Browse files
committed
fix wsno | update tests
Signed-off-by: George Lemon <georgelemon@protonmail.com>
1 parent 7ed06c4 commit 041a2ff

File tree

6 files changed

+272
-73
lines changed

6 files changed

+272
-73
lines changed

.github/workflows/docs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ on:
33
push:
44
tags:
55
- '*.*.*'
6-
branches:
7-
- main
6+
# branches:
7+
# - main
88
env:
99
nim-version: 'stable'
1010
nim-src: src/${{ github.event.repository.name }}.nim

src/marvdown/ast.nim

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,39 @@ type
3838
case kind*: MarkdownNodeKind
3939
of mdkText:
4040
text*: string
41+
## Plain text content
4142
of mdkElement:
4243
tag*: HtmlTag
44+
## HTML tag information
4345
attrs*: seq[(string, string)]
46+
## HTML attributes as (name, value) pairs
4447
of mdkCodeBlock:
4548
code*: string
49+
## Code block content
4650
codeLang*: string
51+
## Language identifier (if any)
4752
of mdkHeading:
48-
level*: int
53+
level*: range[1..6]
54+
## Heading level (1-6)
4955
textHeading*: string
56+
## Heading text content
5057
textAnchor*: Option[string]
5158
## Anchor for the heading (for linking)
5259
## Generated if `enableAnchors` is true in `MarkdownOptions`
5360
of mdkList:
5461
listOrdered*: bool
5562
of mdkLink:
5663
linkHref*: string
64+
## URL for the link
5765
linkTitle*: string
66+
## Title text for the link
5867
of mdkImage:
5968
imageSrc*: string
69+
## Image source URL
6070
imageAlt*: string
71+
## Alt text for the image
6172
imageTitle*: string
73+
## Title text for the image
6274
of mdkInlineCode:
6375
inlineCode*: string
6476
## Inline code content
@@ -67,9 +79,15 @@ type
6779
## Raw HTML content
6880
of mdkTable:
6981
headers*: seq[string]
82+
## Table headers
7083
rows*: seq[seq[string]]
84+
## Table rows
7185
of mdkUnknown:
7286
info*: string # For unknown or unsupported nodes
7387
else: discard
7488
children*: MarkdownNodeList
75-
## Child nodes (for container nodes)
89+
## Child nodes (for container nodes)
90+
line*: int
91+
## Line number in the source markdown
92+
wsno*: int
93+
## Whitespace count before the token (for indentation)

src/marvdown/lexer.nim

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ proc advance(lex: var MarkdownLexer) =
6363
if lex.pos < lex.input.len:
6464
if lex.current == '\n':
6565
inc lex.line
66-
lex.col = 1
66+
lex.col = 0
6767
else:
6868
inc lex.col
6969
inc lex.pos
@@ -85,7 +85,7 @@ proc initToken(lex: var MarkdownLexer, kind: MarkdownTokenKind, value: sink stri
8585
(kind, value, lex.line, lex.pos, lex.col, wsno, none(seq[string]))
8686

8787
proc newTokenTuple(lex: MarkdownLexer, kind: MarkdownTokenKind, token: string = "", wsno: int = 0, attrs: Option[seq[string]] = none(seq[string])): MarkdownTokenTuple =
88-
(kind, token, lex.line, lex.col, lex.pos, wsno, attrs)
88+
(kind, token, lex.line, lex.col - token.len, lex.pos, wsno, attrs)
8989

9090
proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
9191
## Lex the next token from the input
@@ -96,7 +96,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
9696
inc wsno
9797
lex.advance()
9898
if lex.current == '\n':
99-
inc lex.line
99+
# inc lex.line
100100
lex.col = 0
101101
lex.advance()
102102
wsno = 0
@@ -114,6 +114,8 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
114114
if lex.current == '\0':
115115
return newTokenTuple(lex, mtkEOF, wsno=wsno)
116116

117+
# let startCol = wsno # not needed anymore
118+
117119
case lex.current
118120
of '#':
119121
# Headings (e.g., ## Heading 2)
@@ -127,9 +129,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
127129
while lex.current notin {'\n', '\r', '\0'}:
128130
lex.strbuf.add(lex.current)
129131
lex.advance()
130-
return newTokenTuple(lex, mtkHeading, lex.strbuf.strip(), wsno, some(@[$level]))
132+
return newTokenTuple(lex, mtkHeading, lex.strbuf.strip(), wsno=wsno, attrs=some(@[$level]))
131133
else:
132-
return newTokenTuple(lex, mtkText, repeat('#', level), wsno)
134+
return newTokenTuple(lex, mtkText, repeat('#', level), wsno=wsno)
133135
of '-', #['*',]# '_':
134136
# Horizontal rule or unordered list or emphasis/strong
135137
let ch = lex.current
@@ -139,14 +141,14 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
139141
lex.advance()
140142
if count >= 3 and (lex.current == '\n' or lex.current == '\0'):
141143
# it's a horizontal rule!
142-
return newTokenTuple(lex, mtkHorizontalRule, repeat(ch, count), wsno)
144+
return newTokenTuple(lex, mtkHorizontalRule, repeat(ch, count), wsno=wsno)
143145
elif (ch in {'-', '*', '+'}) and (lex.current == ' ' or lex.current == '\t'):
144146
lex.advance()
145147
lex.strbuf.setLen(0)
146148
while lex.current notin {'\n', '\r', '\0'}:
147149
lex.strbuf.add(lex.current)
148150
lex.advance()
149-
return newTokenTuple(lex, mtkListItem, lex.strbuf.strip(), wsno)
151+
return newTokenTuple(lex, mtkListItem, lex.strbuf.strip(), wsno=wsno)
150152
elif ch in {'*', '_'}:
151153
# Emphasis or strong
152154
if lex.peek() == ch:
@@ -155,7 +157,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
155157
else:
156158
return newTokenTuple(lex, mtkEmphasis, wsno=wsno)
157159
else:
158-
return newTokenTuple(lex, mtkText, repeat(ch, count), wsno)
160+
return newTokenTuple(lex, mtkText, repeat(ch, count), wsno=wsno)
159161
of '>':
160162
# Blockquote
161163
lex.advance()
@@ -165,7 +167,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
165167
while lex.current notin {'\n', '\r', '\0'}:
166168
lex.strbuf.add(lex.current)
167169
lex.advance()
168-
return newTokenTuple(lex, mtkBlockquote, lex.strbuf.strip(), wsno)
170+
return newTokenTuple(lex, mtkBlockquote, lex.strbuf.strip(), wsno=wsno)
169171
of '0'..'9':
170172
# Ordered list item
171173
lex.strbuf.setLen(0)
@@ -181,9 +183,9 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
181183
while lex.current notin {'\n', '\r', '\0'}:
182184
lex.strbuf.add(lex.current)
183185
lex.advance()
184-
return newTokenTuple(lex, mtkOListItem, lex.strbuf.strip(), wsno)
186+
return newTokenTuple(lex, mtkOListItem, lex.strbuf.strip(), wsno=wsno)
185187
else:
186-
return newTokenTuple(lex, mtkText, lex.strbuf, wsno)
188+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=wsno)
187189
of '`', '~':
188190
# Fenced code block (``` or ~~~)
189191
if lex.peek() == lex.current and lex.peek(2) == lex.current:
@@ -206,7 +208,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
206208
lex.advance(); lex.advance(); lex.advance()
207209
if lex.current in {'\n', '\r'}:
208210
lex.advance()
209-
return newTokenTuple(lex, mtkCodeBlock, lex.strbuf, wsno, some(@[lang]))
211+
return newTokenTuple(lex, mtkCodeBlock, lex.strbuf, wsno=wsno, attrs=some(@[lang]))
210212
elif lex.current == '`':
211213
# Inline code
212214
lex.advance()
@@ -216,13 +218,13 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
216218
lex.advance()
217219
if lex.current == '`':
218220
lex.advance()
219-
return newTokenTuple(lex, mtkInlineCode, lex.strbuf, wsno)
221+
return newTokenTuple(lex, mtkInlineCode, lex.strbuf, wsno=wsno)
220222
else:
221223
# treat as text
222224
lex.strbuf.setLen(0)
223225
lex.strbuf.add(lex.current)
224226
lex.advance()
225-
return newTokenTuple(lex, mtkText, lex.strbuf, wsno)
227+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=wsno)
226228
of '!':
227229
# Image
228230
if lex.peek() == '[':
@@ -247,7 +249,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
247249
else:
248250
var text = "!"
249251
lex.advance()
250-
return newTokenTuple(lex, mtkText, text, wsno)
252+
return newTokenTuple(lex, mtkText, text, wsno=wsno)
251253
of '[':
252254
# Link
253255
lex.advance()
@@ -268,7 +270,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
268270
if lex.current == ')':
269271
lex.advance()
270272
return newTokenTuple(lex, mtkLink, wsno=wsno, attrs=some(@[text, href]))
271-
return newTokenTuple(lex, mtkText, text, wsno)
273+
return newTokenTuple(lex, mtkText, text, wsno=wsno)
272274
of '*':
273275
# Emphasis or strong
274276
if lex.peek() == '*':
@@ -286,7 +288,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
286288
else:
287289
var text = " "
288290
lex.advance()
289-
return newTokenTuple(lex, mtkText, text, wsno)
291+
return newTokenTuple(lex, mtkText, text, wsno=wsno)
290292
of '<':
291293
# Raw HTML
292294
lex.strbuf.setLen(0)
@@ -307,14 +309,14 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
307309
if lex.current == '>':
308310
lex.strbuf.add(lex.current)
309311
lex.advance()
310-
return newTokenTuple(lex, mtkHtml, lex.strbuf, wsno, some(@[tag]))
312+
return newTokenTuple(lex, mtkHtml, lex.strbuf, wsno=wsno, attrs=some(@[tag]))
311313
of '|':
312314
# Table row
313315
lex.strbuf.setLen(0)
314316
while lex.current notin {'\n', '\r', '\0'}:
315317
lex.strbuf.add(lex.current)
316318
lex.advance()
317-
return newTokenTuple(lex, mtkTable, lex.strbuf, wsno)
319+
return newTokenTuple(lex, mtkTable, lex.strbuf, wsno=wsno)
318320
else:
319321
# Paragraph or plain text
320322
lex.strbuf.setLen(0)
@@ -323,5 +325,5 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
323325
lex.strbuf.add(lex.current)
324326
lex.advance()
325327
if lex.strbuf.len > 0:
326-
return newTokenTuple(lex, mtkText, lex.strbuf, wsno)
328+
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=wsno)
327329
return newTokenTuple(lex, mtkUnknown, wsno=wsno)

0 commit comments

Comments
 (0)