Skip to content

Commit 2d8dccb

Browse files
committed
Implement LaTeX environment lexing
1 parent 8dcaa9d commit 2d8dccb

File tree

3 files changed

+64
-3
lines changed

3 files changed

+64
-3
lines changed

README.org

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ TODO
4747
| PropertyDrawer | | X | | | | |
4848
| Table | | X | | | | |
4949
|---------------------+------+-----+-------+-----+------+------|
50-
| BabelCall | | | | | | |
50+
| BabelCall | | - | | | | |
5151
| Block | | X | | | | |
5252
| Clock | | X | | | | |
5353
| DiarySexp | | X | | | | |
@@ -57,7 +57,7 @@ TODO
5757
| HorizontalRule | | X | | | | |
5858
| Keyword | | X | | | | |
5959
| Affiliated Keywords | | - | | | | |
60-
| LaTeX Environment | | | | | | |
60+
| LaTeX Environment | | X | | | | |
6161
| NodeProperty | | X | | | | |
6262
| Paragraph | | | | | | |
6363
| TableRow | | X | | | | |

src/lexer.jl

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ function lexnext(state::LexerState, bytes::DenseVector{UInt8}, start::UInt32)::T
117117
lex_comment(state, bytes, pos)
118118
elseif chr == UInt8('-') && ischarat(bytes, pos + 0x1, '-')
119119
lex_hrule(state, bytes, pos)
120+
elseif chr == UInt8('\\') && hasprefix(bytes, pos + 0x1, "begin{")
121+
lex_latexenv(state, bytes, pos)
120122
elseif K"heading" state.lastelement
121123
lex_planning(state, bytes, pos)
122124
else
@@ -458,7 +460,33 @@ function lex_hrule(::LexerState, bytes::DenseVector{UInt8}, pos::UInt32)
458460
Token(K"hrule", pos, rend - 0x1), lend
459461
end
460462

461-
# TODO: LaTeX environments
463+
function lex_latexenv(::LexerState, bytes::DenseVector{UInt8}, start::UInt32)
464+
hasprefix(bytes, start, "\\begin{") || return NONE_TOKEN
465+
namestart = start + ncodeunits("\\begin{") % UInt32
466+
nameend = skipcharsets(bytes, namestart, ('a':'z', 'A':'Z', '0':'9', '*'))
467+
nameend < length(bytes) && bytes[nameend] == UInt8('}') || return NONE_TOKEN
468+
namelen = nameend - namestart
469+
pos = start
470+
while pos <= length(bytes)
471+
pos = lineend(bytes, pos) + 0x1
472+
pos = skipspaces(bytes, pos).stop
473+
hasprefix(bytes, pos, "\\end{") || continue
474+
pos += ncodeunits("\\end{") % UInt32
475+
pos + namelen < length(bytes) || return NONE_TOKEN
476+
namematch = true
477+
for offset in 0:namelen-0x1
478+
if bytes[namestart + offset] != bytes[pos + offset]
479+
namematch = false
480+
break
481+
end
482+
end
483+
namematch && bytes[pos + namelen] == UInt8('}') || continue
484+
islineend(bytes, skipspaces(bytes, pos + namelen + 0x1).stop) ||
485+
return NONE_TOKEN
486+
return Token(K"latex_environment", start, pos + namelen), lineend(bytes, pos)
487+
end
488+
NONE_TOKEN
489+
end
462490

463491
# TODO: Paragraphs
464492

test/runtests.jl

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,36 @@ end
288288
@test collect(Lexer("----- ")) ==
289289
[Token(K"hrule", 1, 5)]
290290
end
291+
@testset "LaTeX envs" begin
292+
@test collect(Lexer("""
293+
\\begin{env}
294+
stuff
295+
\\end{env}
296+
""")) ==
297+
[Token(K"latex_environment", 1, 27)]
298+
@test collect(Lexer("""
299+
\\begin{env}
300+
stuff
301+
\\end{env}fluff
302+
""")) ==
303+
Token[]
304+
@test collect(Lexer("""
305+
\\begin{equation*}
306+
\\begin{align}
307+
a &= b \\\\
308+
c &= d
309+
\\end{align}
310+
\\end{equation*}
311+
312+
foo bar
313+
314+
\\begin{equation*}
315+
x^2 + y^2 = z^2
316+
\\end{equation*}
317+
""")) ==
318+
[Token(K"latex_environment", 1, 76)
319+
Token(K"latex_environment", 88, 136)]
320+
end
291321
@testset "Type inference" begin
292322
@testset "Utilities" begin
293323
bytes, pos = codeunits("abc"), UInt32(1)
@@ -326,6 +356,7 @@ end
326356
@inferred Tuple{Token, UInt32} Org.lex_comment(lstate, bytes, pos)
327357
@inferred Tuple{Token, UInt32} Org.lex_fixedwidth(lstate, bytes, pos)
328358
@inferred Tuple{Token, UInt32} Org.lex_hrule(lstate, bytes, pos)
359+
@inferred Tuple{Token, UInt32} Org.lex_latexenv(lstate, bytes, pos)
329360
end
330361
end
331362
@testset "Unhandled errors" begin
@@ -366,6 +397,7 @@ end
366397
@test_call Org.lex_comment(lstate, bytes, pos)
367398
@test_call Org.lex_fixedwidth(lstate, bytes, pos)
368399
@test_call Org.lex_hrule(lstate, bytes, pos)
400+
@test_call Org.lex_latexenv(lstate, bytes, pos)
369401
end
370402
@testset "Iteration" begin
371403
@test_call iterate(Lexer("abc"), LexerState())
@@ -409,6 +441,7 @@ end
409441
@test_opt Org.lex_comment(lstate, bytes, pos)
410442
@test_opt Org.lex_fixedwidth(lstate, bytes, pos)
411443
@test_opt Org.lex_hrule(lstate, bytes, pos)
444+
@test_opt Org.lex_latexenv(lstate, bytes, pos)
412445
end
413446
@testset "Iteration" begin
414447
@test_opt iterate(Lexer("abc"), LexerState())

0 commit comments

Comments
 (0)