Skip to content

Commit 37ec22b

Browse files
committed
Use specific error token kinds, not K"error"
1 parent 38f396e commit 37ec22b

File tree

4 files changed

+36
-35
lines changed

4 files changed

+36
-35
lines changed

src/kinds.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ const _kind_names =
1616
# Tokenization errors
1717
"ErrorEofMultiComment"
1818
"ErrorInvalidNumericConstant"
19+
"ErrorAmbiguousNumericConstant"
1920
"ErrorInvalidInterpolationTerminator"
2021
"ErrorNumericOverflow"
2122
"ErrorInvalidEscapeSequence"
2223
"ErrorOverLongCharacter"
24+
"ErrorUnknownCharacter"
2325
# Generic error
2426
"error"
2527
"END_ERRORS"
@@ -1014,10 +1016,12 @@ const _nonunique_kind_names = Set([
10141016

10151017
K"ErrorEofMultiComment"
10161018
K"ErrorInvalidNumericConstant"
1019+
K"ErrorAmbiguousNumericConstant"
10171020
K"ErrorInvalidInterpolationTerminator"
10181021
K"ErrorNumericOverflow"
10191022
K"ErrorInvalidEscapeSequence"
10201023
K"ErrorOverLongCharacter"
1024+
K"ErrorUnknownCharacter"
10211025
K"ErrorInvalidOperator"
10221026

10231027
K"Integer"
@@ -1053,6 +1057,20 @@ function untokenize(k::Kind; unique=true)
10531057
end
10541058
end
10551059

1060+
# Error kind => description
1061+
_token_error_descriptions = Dict{Kind, String}(
1062+
K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#",
1063+
K"ErrorInvalidNumericConstant" => "invalid numeric constant",
1064+
K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)",
1065+
K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead",
1066+
K"ErrorNumericOverflow"=>"overflow in numeric literal",
1067+
K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence",
1068+
K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
1069+
K"ErrorUnknownCharacter"=>"unknown unicode character",
1070+
K"ErrorInvalidOperator" => "invalid operator",
1071+
K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting",
1072+
K"error" => "unknown error token",
1073+
)
10561074

10571075
#-------------------------------------------------------------------------------
10581076
# Predicates

src/parse_stream.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,7 @@ function validate_tokens(stream::ParseStream)
913913
elseif is_error(k) && k != K"error"
914914
# Emit messages for non-generic token errors
915915
emit_diagnostic(stream, fbyte, lbyte,
916-
error=Tokenize.TOKEN_ERROR_DESCRIPTION[k])
916+
error=_token_error_descriptions[k])
917917
end
918918
if error_kind != K"None"
919919
toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS),

src/tokenize.jl

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,6 @@ include("tokenize_utils.jl")
1212
#-------------------------------------------------------------------------------
1313
# Tokens
1414

15-
# Error kind => description
16-
TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}(
17-
K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#",
18-
K"ErrorInvalidNumericConstant" => "invalid numeric constant",
19-
K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead",
20-
K"ErrorNumericOverflow"=>"overflow in numeric literal",
21-
K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence",
22-
K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
23-
K"ErrorInvalidOperator" => "invalid operator",
24-
K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting",
25-
K"error" => "unknown error token",
26-
)
27-
2815
struct Token
2916
kind::Kind
3017
# Offsets into a string or buffer
@@ -283,11 +270,11 @@ function emit(l::Lexer, kind::Kind, maybe_op=true)
283270
end
284271

285272
"""
286-
emit_error(l::Lexer, err::Kind=K"error")
273+
emit_error(l::Lexer, err::Kind)
287274
288275
Returns an `K"error"` token with error `err` and starts a new `Token`.
289276
"""
290-
function emit_error(l::Lexer, err::Kind = K"error")
277+
function emit_error(l::Lexer, err::Kind)
291278
@assert is_error(err)
292279
return emit(l, err)
293280
end
@@ -387,7 +374,7 @@ function _next_token(l::Lexer, c)
387374
elseif (k = get(UNICODE_OPS, c, K"error")) != K"error"
388375
return emit(l, k)
389376
else
390-
emit_error(l)
377+
emit_error(l, K"ErrorUnknownCharacter")
391378
end
392379
end
393380

@@ -785,7 +772,7 @@ function lex_digit(l::Lexer, kind)
785772
return emit_error(l, K"ErrorInvalidNumericConstant")
786773
elseif is_operator_start_char(ppc) && ppc !== ':'
787774
readchar(l)
788-
return emit_error(l)
775+
return emit_error(l, K"ErrorAmbiguousNumericConstant")
789776
elseif (!(isdigit(ppc) ||
790777
iswhitespace(ppc) ||
791778
is_identifier_start_char(ppc)
@@ -824,7 +811,7 @@ function lex_digit(l::Lexer, kind)
824811
return emit_error(l, K"ErrorInvalidNumericConstant")
825812
end
826813
else
827-
return emit_error(l)
814+
return emit_error(l, K"ErrorInvalidNumericConstant")
828815
end
829816
elseif pc == '.' && (is_identifier_start_char(ppc) || ppc == EOF_CHAR)
830817
readchar(l)
@@ -842,7 +829,7 @@ function lex_digit(l::Lexer, kind)
842829
return emit_error(l, K"ErrorInvalidNumericConstant")
843830
end
844831
else
845-
return emit_error(l)
832+
return emit_error(l, K"ErrorInvalidNumericConstant")
846833
end
847834
elseif position(l) - startpos(l) == 1 && l.chars[1] == '0'
848835
kind == K"Integer"

test/tokenize.jl

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -769,10 +769,21 @@ end
769769
test_error(tok("0op",1), K"ErrorInvalidNumericConstant")
770770
test_error(tok("--",1), K"ErrorInvalidOperator")
771771

772+
@test toks("1e+") == ["1e+"=>K"ErrorInvalidNumericConstant"]
773+
@test toks("1.0e+") == ["1.0e+"=>K"ErrorInvalidNumericConstant"]
774+
@test toks("0x.") == ["0x."=>K"ErrorInvalidNumericConstant"]
775+
772776
@test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"]
773777
@test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
774778
@test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
775779
@test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"]
780+
781+
@test toks("1.+2") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", "2"=>K"Integer"]
782+
@test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"]
783+
@test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", ""=>K""]
784+
@test toks("1.?") == ["1."=>K"ErrorAmbiguousNumericConstant", "?"=>K"?"]
785+
786+
@test toks("\x00") == ["\x00"=>K"ErrorUnknownCharacter"]
776787
end
777788

778789
@testset "hat suffix" begin
@@ -786,21 +797,6 @@ end
786797
@test untokenize(collect(tokenize(s))[1], s) == s
787798
end
788799

789-
@testset "invalid float juxt" begin
790-
s = "1.+2"
791-
@test tok(s, 1).kind == K"error"
792-
@test is_operator(tok(s, 2).kind)
793-
test_roundtrip("1234.+1", K"error", "1234.")
794-
@test tok("1.+ ").kind == K"error"
795-
@test tok("1.⤋").kind == K"error"
796-
@test tok("1.?").kind == K"error"
797-
end
798-
799-
@testset "invalid hexadecimal" begin
800-
s = "0x."
801-
tok(s, 1).kind === K"error"
802-
end
803-
804800
@testset "circ arrow right op" begin
805801
s = ""
806802
@test collect(tokenize(s))[1].kind == K""

0 commit comments

Comments
 (0)