Skip to content

Commit 55e84b7

Browse files
authored
Fix for tokenization of consecutive nested multiline comments (#171)
1 parent 56aa403 commit 55e84b7

File tree

2 files changed

+25
-20
lines changed

2 files changed

+25
-20
lines changed

src/tokenize.jl

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -542,33 +542,38 @@ function lex_whitespace(l::Lexer, c)
542542
return emit(l, k)
543543
end
544544

545-
function lex_comment(l::Lexer, doemit=true)
545+
function lex_comment(l::Lexer)
546546
if peekchar(l) != '='
547547
while true
548548
pc = peekchar(l)
549549
if pc == '\n' || pc == EOF_CHAR
550-
return doemit ? emit(l, K"Comment") : EMPTY_TOKEN
550+
return emit(l, K"Comment")
551551
end
552552
readchar(l)
553553
end
554554
else
555-
pc = '#'
556555
c = readchar(l) # consume the '='
557-
n_start, n_end = 1, 0
556+
skip = true # true => c was part of the prev comment marker pair
557+
nesting = 1
558558
while true
559559
if c == EOF_CHAR
560-
return doemit ? emit_error(l, K"ErrorEofMultiComment") : EMPTY_TOKEN
560+
return emit_error(l, K"ErrorEofMultiComment")
561561
end
562562
nc = readchar(l)
563-
if c == '#' && nc == '='
564-
n_start += 1
565-
elseif c == '=' && nc == '#' && pc != '#'
566-
n_end += 1
567-
end
568-
if n_start == n_end
569-
return doemit ? emit(l, K"Comment") : EMPTY_TOKEN
563+
if skip
564+
skip = false
565+
else
566+
if c == '#' && nc == '='
567+
nesting += 1
568+
skip = true
569+
elseif c == '=' && nc == '#'
570+
nesting -= 1
571+
skip = true
572+
if nesting == 0
573+
return emit(l, K"Comment")
574+
end
575+
end
570576
end
571-
pc = c
572577
c = nc
573578
end
574579
end

test/tokenize.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ end
215215
end
216216

217217
@testset "comments" begin
218-
toks = collect(tokenize("""
218+
ts = collect(tokenize("""
219219
#
220220
\"\"\"
221221
f
@@ -227,7 +227,12 @@ end
227227
K"\"\"\"", K"String", K"String", K"\"\"\"", K"NewlineWs",
228228
K"Integer", K"NewlineWs",
229229
K"EndMarker"]
230-
@test kind.(toks) == kinds
230+
@test kind.(ts) == kinds
231+
232+
@test toks("#=# text=#") == ["#=# text=#"=>K"Comment"]
233+
234+
@test toks("#=#==#=#") == ["#=#==#=#"=>K"Comment"]
235+
@test toks("#=#==#=") == ["#=#==#="=>K"ErrorEofMultiComment"]
231236
end
232237

233238

@@ -791,11 +796,6 @@ end
791796
@test tok("1.?").kind == K"error"
792797
end
793798

794-
@testset "comments" begin
795-
s = "#=# text=#"
796-
@test length(collect(tokenize(s))) == 2
797-
end
798-
799799
@testset "invalid hexadecimal" begin
800800
s = "0x."
801801
tok(s, 1).kind === K"error"

0 commit comments

Comments
 (0)