Skip to content

Commit 593153b

Browse files
committed
Implement paragraph lexing
1 parent da422ea commit 593153b

File tree

3 files changed

+109
-39
lines changed

3 files changed

+109
-39
lines changed

README.org

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ TODO
5959
| Affiliated Keywords | | - | | | | |
6060
| LaTeX Environment | | X | | | | |
6161
| NodeProperty | | X | | | | |
62-
| Paragraph | | | | | | |
62+
| Paragraph | | X | | | | |
6363
| TableRow | | X | | | | |
6464
| TableHRule | | X | | | | |
6565
| BlankLine | | | | | | |

src/lexer.jl

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,21 @@ function lexnext(state::LexerState, bytes::DenseVector{UInt8}, start::UInt32)
7171
linestart, newlines = @inline skipnewlines(bytes, start)
7272
skipws = skipspaces(bytes, linestart)
7373
pos = skipws.stop
74+
if state.lastelement == K""
75+
elseif state.lastelement K"<footnote_definition"
76+
return Token(K"<paragraph", pos, pos), pos
77+
elseif state.lastelement K"item" && !islineend(bytes, start)
78+
return Token(K"<paragraph", pos, pos), pos
79+
end
7480
chr = bytes[pos]
75-
next = if newlines > 0 && K"clock" state.ctx
76-
Token(K">clock", start - 0x01, start - 0x01), start
81+
next = if newlines > 1 && K"paragraph" state.ctx
82+
Token(K">paragraph", start - 0x1, start - 0x1), start
7783
elseif newlines > 2 && K"footnote_definition" state.ctx
7884
Token(K">footnote_definition", start - 0x1, start - 0x1), start
7985
elseif newlines > 2 && K"item" state.ctx
8086
Token(settag(K">item", tag(state.ctx)), start - 0x1, start - 0x1), start
8187
elseif newlines != 0
82-
if K"table" state.ctx
88+
nextelem = if K"table" state.ctx
8389
if chr == UInt8('|')
8490
if ischarat(bytes, pos + 0x1, '-')
8591
lend = lineend(bytes, pos)
@@ -132,6 +138,11 @@ function lexnext(state::LexerState, bytes::DenseVector{UInt8}, start::UInt32)
132138
NONE_TOKEN
133139
end
134140
end
141+
if nextelem == NONE_TOKEN && K"paragraph" state.ctx && K"paragraph" state.restriction && linestart < length(bytes)
142+
Token(K"<paragraph", linestart, linestart), linestart
143+
else
144+
nextelem
145+
end
135146
else # No newlines
136147
if K"table" state.ctx && islineend(bytes, pos + 0x1)
137148
if K"table_cell" state.ctx
@@ -157,7 +168,11 @@ function lexnext(state::LexerState, bytes::DenseVector{UInt8}, start::UInt32)
157168
end
158169
end
159170
if next != NONE_TOKEN
160-
next
171+
if K"paragraph" state.ctx
172+
Token(K">paragraph", start - 0x1, start - 0x1), start
173+
else
174+
next
175+
end
161176
else
162177
npos = @inline skipplain(bytes, pos)
163178
if pos == npos && pos < length(bytes)

test/runtests.jl

Lines changed: 89 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,30 @@ end
4242
#+end_block
4343
""")) ==
4444
[Token(K"<block[30]", 1, 13),
45-
Token(K">block[30]", 23, 33)]
45+
Token(K"<paragraph", 15, 15),
46+
Token(K">paragraph", 21, 21),
47+
Token(K">block[30]", 23, 33)]
4648
@test collect(Lexer("""
4749
#+BEGIN_BLOCK parameters
4850
content
4951
#+END_BLOCK
5052
""")) ==
5153
[Token(K"<block[30]", 1, 24),
52-
Token(K">block[30]", 34, 44)]
54+
Token(K"<paragraph", 26, 26),
55+
Token(K">paragraph", 32, 32),
56+
Token(K">block[30]", 34, 44)]
5357
@test collect(Lexer("""
5458
#+begin_block
5559
#+begin_other
5660
content
5761
#+end_other
5862
#+end_block
5963
""")) ==
60-
[Token(K"<block[30]", 1, 13)
61-
Token(K"<block[41]", 15, 27)
62-
Token(K">block[41]", 37, 47)
64+
[Token(K"<block[30]", 1, 13),
65+
Token(K"<block[41]", 15, 27),
66+
Token(K"<paragraph", 29, 29),
67+
Token(K">paragraph", 35, 35),
68+
Token(K">block[41]", 37, 47),
6369
Token(K">block[30]", 49, 59)]
6470
end
6571
@testset "Dynamic blocks" begin
@@ -69,6 +75,8 @@ end
6975
#+end:
7076
""")) ==
7177
[Token(K"<dynamic_block", 1, 13),
78+
Token(K"<paragraph", 15, 15),
79+
Token(K">paragraph", 21, 21),
7280
Token(K">dynamic_block", 23, 28)]
7381
end
7482
@testset "Lesser blocks" begin
@@ -87,7 +95,8 @@ end
8795
#+end_src extra
8896
""")) ==
8997
[Token(K"<source_block", 1, 17),
90-
Token(K">source_block", 37, 51)]
98+
Token(K">source_block", 37, 51),
99+
Token(K"<paragraph", 53, 53)]
91100
@test collect(Lexer("""
92101
#+begin_export html
93102
<b>content</b>
@@ -105,6 +114,8 @@ end
105114
:end:
106115
""")) ==
107116
[Token(K"<drawer", 1, 8),
117+
Token(K"<paragraph", 10, 10),
118+
Token(K">paragraph", 16, 16),
108119
Token(K">drawer", 18, 22)]
109120
@test collect(Lexer("""
110121
:drawer:
@@ -114,6 +125,10 @@ end
114125
:end:
115126
""")) ==
116127
[Token(K"<drawer", 1, 8),
128+
Token(K"<paragraph", 10, 10),
129+
Token(K">paragraph", 16, 16),
130+
Token(K"<paragraph", 18, 18),
131+
Token(K">paragraph", 34, 34),
117132
Token(K">drawer", 36, 40)]
118133
end
119134
@testset "Property drawers" begin
@@ -143,51 +158,84 @@ end
143158
end
144159
@testset "Footnote defs" begin
145160
@test collect(Lexer("[fn:1] stuff")) ==
146-
[Token(K"<footnote_definition", 1, 6)]
161+
[Token(K"<footnote_definition", 1, 6),
162+
Token(K"<paragraph", 8, 8)]
147163
@test collect(Lexer("[fn:1] stuff\n[fn:2] more")) ==
148164
[Token(K"<footnote_definition", 1, 6),
165+
Token(K"<paragraph", 8, 8),
166+
Token(K">paragraph", 12, 12),
149167
Token(K">footnote_definition", 12, 12),
150-
Token(K"<footnote_definition", 14, 19)]
168+
Token(K"<footnote_definition", 14, 19),
169+
Token(K"<paragraph", 21, 21)]
151170
@test collect(Lexer("[fn:1] stuff\n\n\nmore")) ==
152171
[Token(K"<footnote_definition", 1, 6),
153-
Token(K">footnote_definition", 12, 12)]
172+
Token(K"<paragraph", 8, 8),
173+
Token(K">paragraph", 12, 12),
174+
Token(K">footnote_definition", 12, 12),
175+
Token(K"<paragraph", 16, 16)]
154176
end
155177
@testset "Items" begin
156178
@test collect(Lexer("+ item")) ==
157-
[Token(K"<item[1]", 1, 1)]
179+
[Token(K"<item[1]", 1, 1),
180+
Token(K"<paragraph", 3, 3)]
158181
@test collect(Lexer(" + item")) ==
159-
[Token(K"<item[3]", 3, 3)]
182+
[Token(K"<item[3]", 3, 3),
183+
Token(K"<paragraph", 5, 5)]
160184
@test collect(Lexer("- item")) ==
161-
[Token(K"<item[1]", 1, 1)]
185+
[Token(K"<item[1]", 1, 1),
186+
Token(K"<paragraph", 3, 3)]
162187
@test collect(Lexer(" * item")) ==
163-
[Token(K"item[2]", 2, 2)]
188+
[Token(K"<item[2]", 2, 2),
189+
Token(K"<paragraph", 4, 4)]
164190
@test collect(Lexer("+ item\nmore")) ==
165191
[Token(K"<item[1]", 1, 1),
166-
Token(K">item[1]", 6, 6)]
192+
Token(K"<paragraph", 3, 3),
193+
Token(K">paragraph", 6, 6),
194+
Token(K">item[1]", 6, 6),
195+
Token(K"<paragraph", 8, 8)]
167196
@test collect(Lexer("+ item\n more")) ==
168-
[Token(K"<item[1]", 1, 1)]
197+
[Token(K"<item[1]", 1, 1),
198+
Token(K"<paragraph", 3, 3)]
169199
@test collect(Lexer("+ item\n more")) ==
170-
[Token(K"<item[1]", 1, 1)]
200+
[Token(K"<item[1]", 1, 1),
201+
Token(K"<paragraph", 3, 3)]
171202
@test collect(Lexer("+ item\n \n more")) ==
172-
[Token(K"<item[1]", 1, 1)]
203+
[Token(K"<item[1]", 1, 1),
204+
Token(K"<paragraph", 3, 3),
205+
Token(K">paragraph", 6, 6),
206+
Token(K"<paragraph", 11, 11)]
173207
@test collect(Lexer("+ item\n\n more")) ==
174-
[Token(K"<item[1]", 1, 1)]
208+
[Token(K"<item[1]", 1, 1),
209+
Token(K"<paragraph", 3, 3),
210+
Token(K">paragraph", 6, 6),
211+
Token(K"<paragraph", 9, 9)]
175212
@test collect(Lexer("+ item\n\n\n more")) ==
176213
[Token(K"<item[1]", 1, 1),
177-
Token(K">item[1]", 6, 6)]
214+
Token(K"<paragraph", 3, 3),
215+
Token(K">paragraph", 6, 6),
216+
Token(K">item[1]", 6, 6),
217+
Token(K"<paragraph", 10, 10)]
178218
@test collect(Lexer(" + item\n more")) ==
179219
[Token(K"<item[2]", 2, 2),
180-
Token(K">item[2]", 7, 7)]
220+
Token(K"<paragraph", 4, 4),
221+
Token(K">paragraph", 7, 7),
222+
Token(K">item[2]", 7, 7),
223+
Token(K"<paragraph", 9, 9)]
181224
@test collect(Lexer(" + item\n more")) ==
182-
[Token(K"<item[2]", 2, 2)]
225+
[Token(K"<item[2]", 2, 2),
226+
Token(K"<paragraph", 4, 4)]
183227
@test collect(Lexer("1. item")) ==
184-
[Token(K"<item[1]", 1, 2)]
228+
[Token(K"<item[1]", 1, 2),
229+
Token(K"<paragraph", 4, 4)]
185230
@test collect(Lexer("12) item")) ==
186-
[Token(K"<item[1]", 1, 3)]
231+
[Token(K"<item[1]", 1, 3),
232+
Token(K"<paragraph", 5, 5)]
187233
@test collect(Lexer("a. item")) ==
188-
[Token(K"<item[1]", 1, 2)]
234+
[Token(K"<item[1]", 1, 2),
235+
Token(K"<paragraph", 4, 4)]
189236
@test collect(Lexer("ab) item")) ==
190-
[Token(K"<item[1]", 1, 3)]
237+
[Token(K"<item[1]", 1, 3)
238+
Token(K"<paragraph", 5, 5)]
191239
end
192240
@testset "Tables" begin
193241
@test collect(Lexer("|")) ==
@@ -205,7 +253,8 @@ end
205253
Token(K"<table_cell", 3, 3),
206254
Token(K">table_cell", 6, 6),
207255
Token(K">table_row", 6, 6),
208-
Token(K">table", 6, 6)]
256+
Token(K">table", 6, 6),
257+
Token(K"<paragraph", 8, 8)]
209258
@test collect(Lexer("| cell | two | three")) ==
210259
[Token(K"<table", 1, 1),
211260
Token(K"<table_row", 1, 1),
@@ -246,10 +295,10 @@ end
246295
[Token(K"<clock", 1, 1)]
247296
@test collect(Lexer("clock: [2019-03-25 Mon 10:49]--[2019-03-25 Mon 11:31] => 0:42")) ==
248297
[Token(K"<clock", 1, 1)]
249-
@test collect(Lexer("clock: 12:30")) !=
250-
[Token(K"<clock", 1, 1)]
251-
@test collect(Lexer("clock: [2024-10-12]--")) !=
252-
[Token(K"<clock", 1, 1)]
298+
@test collect(Lexer("clock: 12:30")) ==
299+
[Token(K"<paragraph", 1, 1)]
300+
@test collect(Lexer("clock: [2024-10-12]--")) ==
301+
[Token(K"<paragraph", 1, 1)]
253302
end
254303
@testset "Diary sexp" begin
255304
@test collect(Lexer("%%(org-calendar-holiday)")) ==
@@ -298,8 +347,12 @@ end
298347
[Token(K"fixedwidth", 1, 18)]
299348
end
300349
@testset "Horizontal rule" begin
301-
@test collect(Lexer("----")) == Token[]
302-
@test collect(Lexer("-- ---")) == Token[]
350+
@test collect(Lexer("----")) ==
351+
[Token(K"<paragraph", 1, 1)]
352+
@test collect(Lexer("-- ---")) ==
353+
[Token(K"<paragraph", 1, 1)]
354+
@test collect(Lexer("----- -----")) ==
355+
[Token(K"<paragraph", 1, 1)]
303356
@test collect(Lexer("-----")) ==
304357
[Token(K"hrule", 1, 5)]
305358
@test collect(Lexer("------")) ==
@@ -319,7 +372,7 @@ end
319372
stuff
320373
\\end{env}fluff
321374
""")) ==
322-
Token[]
375+
[Token(K"<paragraph", 1, 1)]
323376
@test collect(Lexer("""
324377
\\begin{equation*}
325378
\\begin{align}
@@ -334,7 +387,9 @@ end
334387
x^2 + y^2 = z^2
335388
\\end{equation*}
336389
""")) ==
337-
[Token(K"latex_environment", 1, 76)
390+
[Token(K"latex_environment", 1, 76),
391+
Token(K"<paragraph", 79, 79),
392+
Token(K">paragraph", 85, 85),
338393
Token(K"latex_environment", 88, 136)]
339394
end
340395
@testset "Type inference" begin

0 commit comments

Comments
 (0)