Skip to content

Commit bb1265e

Browse files
authored
Merge pull request #181 from JuliaLang/c42f/token-error-fixes
Fixes for General registry testing
2 parents 45d8431 + 37ec22b commit bb1265e

File tree

5 files changed

+91
-85
lines changed

5 files changed

+91
-85
lines changed

src/kinds.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ const _kind_names =
1616
# Tokenization errors
1717
"ErrorEofMultiComment"
1818
"ErrorInvalidNumericConstant"
19+
"ErrorAmbiguousNumericConstant"
1920
"ErrorInvalidInterpolationTerminator"
2021
"ErrorNumericOverflow"
2122
"ErrorInvalidEscapeSequence"
2223
"ErrorOverLongCharacter"
24+
"ErrorUnknownCharacter"
2325
# Generic error
2426
"error"
2527
"END_ERRORS"
@@ -1014,10 +1016,12 @@ const _nonunique_kind_names = Set([
10141016

10151017
K"ErrorEofMultiComment"
10161018
K"ErrorInvalidNumericConstant"
1019+
K"ErrorAmbiguousNumericConstant"
10171020
K"ErrorInvalidInterpolationTerminator"
10181021
K"ErrorNumericOverflow"
10191022
K"ErrorInvalidEscapeSequence"
10201023
K"ErrorOverLongCharacter"
1024+
K"ErrorUnknownCharacter"
10211025
K"ErrorInvalidOperator"
10221026

10231027
K"Integer"
@@ -1053,6 +1057,20 @@ function untokenize(k::Kind; unique=true)
10531057
end
10541058
end
10551059

1060+
# Error kind => description
1061+
_token_error_descriptions = Dict{Kind, String}(
1062+
K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#",
1063+
K"ErrorInvalidNumericConstant" => "invalid numeric constant",
1064+
K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)",
1065+
K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead",
1066+
K"ErrorNumericOverflow"=>"overflow in numeric literal",
1067+
K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence",
1068+
K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
1069+
K"ErrorUnknownCharacter"=>"unknown unicode character",
1070+
K"ErrorInvalidOperator" => "invalid operator",
1071+
K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting",
1072+
K"error" => "unknown error token",
1073+
)
10561074

10571075
#-------------------------------------------------------------------------------
10581076
# Predicates

src/parse_stream.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,7 @@ function validate_tokens(stream::ParseStream)
913913
elseif is_error(k) && k != K"error"
914914
# Emit messages for non-generic token errors
915915
emit_diagnostic(stream, fbyte, lbyte,
916-
error=Tokenize.TOKEN_ERROR_DESCRIPTION[k])
916+
error=_token_error_descriptions[k])
917917
end
918918
if error_kind != K"None"
919919
toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS),

src/tokenize.jl

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,6 @@ include("tokenize_utils.jl")
1212
#-------------------------------------------------------------------------------
1313
# Tokens
1414

15-
# Error kind => description
16-
TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}(
17-
K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#",
18-
K"ErrorInvalidNumericConstant" => "invalid numeric constant",
19-
K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead",
20-
K"ErrorNumericOverflow"=>"overflow in numeric literal",
21-
K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence",
22-
K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
23-
K"ErrorInvalidOperator" => "invalid operator",
24-
K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting",
25-
K"error" => "unknown error token",
26-
)
27-
2815
struct Token
2916
kind::Kind
3017
# Offsets into a string or buffer
@@ -283,11 +270,11 @@ function emit(l::Lexer, kind::Kind, maybe_op=true)
283270
end
284271

285272
"""
286-
emit_error(l::Lexer, err::Kind=K"error")
273+
emit_error(l::Lexer, err::Kind)
287274
288275
Returns an `K"error"` token with error `err` and starts a new `Token`.
289276
"""
290-
function emit_error(l::Lexer, err::Kind = K"error")
277+
function emit_error(l::Lexer, err::Kind)
291278
@assert is_error(err)
292279
return emit(l, err)
293280
end
@@ -387,7 +374,7 @@ function _next_token(l::Lexer, c)
387374
elseif (k = get(UNICODE_OPS, c, K"error")) != K"error"
388375
return emit(l, k)
389376
else
390-
emit_error(l)
377+
emit_error(l, K"ErrorUnknownCharacter")
391378
end
392379
end
393380

@@ -785,7 +772,7 @@ function lex_digit(l::Lexer, kind)
785772
return emit_error(l, K"ErrorInvalidNumericConstant")
786773
elseif is_operator_start_char(ppc) && ppc !== ':'
787774
readchar(l)
788-
return emit_error(l)
775+
return emit_error(l, K"ErrorAmbiguousNumericConstant")
789776
elseif (!(isdigit(ppc) ||
790777
iswhitespace(ppc) ||
791778
is_identifier_start_char(ppc)
@@ -824,7 +811,7 @@ function lex_digit(l::Lexer, kind)
824811
return emit_error(l, K"ErrorInvalidNumericConstant")
825812
end
826813
else
827-
return emit_error(l)
814+
return emit_error(l, K"ErrorInvalidNumericConstant")
828815
end
829816
elseif pc == '.' && (is_identifier_start_char(ppc) || ppc == EOF_CHAR)
830817
readchar(l)
@@ -842,7 +829,7 @@ function lex_digit(l::Lexer, kind)
842829
return emit_error(l, K"ErrorInvalidNumericConstant")
843830
end
844831
else
845-
return emit_error(l)
832+
return emit_error(l, K"ErrorInvalidNumericConstant")
846833
end
847834
elseif position(l) - startpos(l) == 1 && l.chars[1] == '0'
848835
kind == K"Integer"

test/tokenize.jl

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -769,10 +769,21 @@ end
769769
test_error(tok("0op",1), K"ErrorInvalidNumericConstant")
770770
test_error(tok("--",1), K"ErrorInvalidOperator")
771771

772+
@test toks("1e+") == ["1e+"=>K"ErrorInvalidNumericConstant"]
773+
@test toks("1.0e+") == ["1.0e+"=>K"ErrorInvalidNumericConstant"]
774+
@test toks("0x.") == ["0x."=>K"ErrorInvalidNumericConstant"]
775+
772776
@test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"]
773777
@test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
774778
@test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
775779
@test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
780+
781+
@test toks("1.+2") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", "2"=>K"Integer"]
782+
@test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"]
783+
@test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", ""=>K""]
784+
@test toks("1.?") == ["1."=>K"ErrorAmbiguousNumericConstant", "?"=>K"?"]
785+
786+
@test toks("\x00") == ["\x00"=>K"ErrorUnknownCharacter"]
776787
end
777788

778789
@testset "hat suffix" begin
@@ -786,21 +797,6 @@ end
786797
@test untokenize(collect(tokenize(s))[1], s) == s
787798
end
788799

789-
@testset "invalid float juxt" begin
790-
s = "1.+2"
791-
@test tok(s, 1).kind == K"error"
792-
@test is_operator(tok(s, 2).kind)
793-
test_roundtrip("1234.+1", K"error", "1234.")
794-
@test tok("1.+ ").kind == K"error"
795-
@test tok("1.⤋").kind == K"error"
796-
@test tok("1.?").kind == K"error"
797-
end
798-
799-
@testset "invalid hexadecimal" begin
800-
s = "0x."
801-
tok(s, 1).kind === K"error"
802-
end
803-
804800
@testset "circ arrow right op" begin
805801
s = ""
806802
@test collect(tokenize(s))[1].kind == K""

tools/check_all_packages.jl

Lines changed: 55 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,68 +12,73 @@ logger = Logging.ConsoleLogger(logio)
1212

1313
pkgspath = joinpath(@__DIR__, "pkgs")
1414

15+
exception_count = 0
16+
mismatch_count = 0
17+
file_count = 0
18+
t0 = time()
1519
exceptions = []
20+
1621
Logging.with_logger(logger) do
17-
t = time()
18-
i = 0
19-
iob = IOBuffer()
20-
exception_count = 0
21-
mismatch_count = 0
22+
global exception_count, mismatch_count, file_count, t0
2223
for (r, _, files) in walkdir(pkgspath)
2324
for f in files
2425
endswith(f, ".jl") || continue
2526
fpath = joinpath(r, f)
26-
if isfile(fpath)
27-
code = read(fpath, String)
28-
expr_cache = fpath*".Expr"
29-
#e2 = JuliaSyntax.fl_parseall(code)
30-
e2 = open(deserialize, fpath*".Expr")
31-
@assert Meta.isexpr(e2, :toplevel)
32-
try
33-
e1 = JuliaSyntax.parseall(Expr, code, filename=fpath)
34-
if !exprs_roughly_equal(e2, e1)
35-
mismatch_count += 1
36-
@error("Parsers succeed but disagree",
37-
fpath,
38-
diff=Text(sprint(show_expr_text_diff, show, e1, e2)),
39-
)
40-
end
41-
catch err
42-
err isa InterruptException && rethrow()
43-
ex = (err, catch_backtrace())
44-
push!(exceptions, ex)
45-
ref_parse = "success"
46-
if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete))
47-
ref_parse = "fail"
48-
if err isa JuliaSyntax.ParseError
49-
# Both parsers agree that there's an error, and
50-
# JuliaSyntax didn't have an internal error.
51-
continue
52-
end
53-
end
27+
isfile(fpath) || continue
5428

55-
exception_count += 1
56-
parse_to_syntax = "success"
57-
try
58-
JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code)
59-
catch err2
60-
parse_to_syntax = "fail"
29+
code = read(fpath, String)
30+
expr_cache = fpath*".Expr"
31+
#e2 = JuliaSyntax.fl_parseall(code)
32+
e2 = open(deserialize, fpath*".Expr")
33+
@assert Meta.isexpr(e2, :toplevel)
34+
try
35+
e1 = JuliaSyntax.parseall(Expr, code, filename=fpath, ignore_warnings=true)
36+
if !exprs_roughly_equal(e2, e1)
37+
mismatch_count += 1
38+
@error("Parsers succeed but disagree",
39+
fpath,
40+
diff=Text(sprint(show_expr_text_diff, show, e1, e2)),
41+
)
42+
end
43+
catch err
44+
err isa InterruptException && rethrow()
45+
ex = (err, catch_backtrace())
46+
push!(exceptions, ex)
47+
ref_parse = "success"
48+
if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete))
49+
ref_parse = "fail"
50+
if err isa JuliaSyntax.ParseError
51+
# Both parsers agree that there's an error, and
52+
# JuliaSyntax didn't have an internal error.
53+
continue
6154
end
62-
@error "Parse failed" fpath exception=ex parse_to_syntax
6355
end
56+
57+
exception_count += 1
58+
parse_to_syntax = "success"
59+
try
60+
JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code)
61+
catch err2
62+
parse_to_syntax = "fail"
63+
end
64+
@error "Parse failed" fpath exception=ex parse_to_syntax
6465
end
65-
i += 1
66-
if i % 100 == 0
67-
runtime = time() - t
68-
avg = round(runtime/i*1000, digits = 2)
69-
print(iob, "\e[2J\e[0;0H")
70-
println(iob, "$i files parsed")
71-
println(iob, "> $(exception_count) failures compared to reference parser")
72-
println(iob, "> $(mismatch_count) Expr mismatches")
73-
println(iob, "> $(avg)ms per file, $(round(Int, runtime))s in total")
74-
println(stderr, String(take!(iob)))
66+
67+
file_count += 1
68+
if file_count % 100 == 0
69+
t_avg = round((time() - t0)/file_count*1000, digits = 2)
70+
print(stderr, "\r$file_count files parsed, $t_avg ms per file")
7571
end
7672
end
7773
end
7874
end
7975
close(logio)
76+
77+
t_avg = round((time() - t0)/file_count*1000, digits = 2)
78+
79+
println()
80+
@info """
81+
Finished parsing $file_count files.
82+
$(exception_count) failures compared to reference parser
83+
$(mismatch_count) Expr mismatches
84+
$(t_avg)ms per file"""

0 commit comments

Comments
 (0)