Skip to content

Commit 3a12196

Browse files
JeffBezansonc42f
andauthored
add more specific error for invalid identifier start chars (#421)
* add more specific error for invalid identifier start chars * Tweak ErrorIdentifierStart message + add diagnostics test. --------- Co-authored-by: Claire Foster <[email protected]>
1 parent 25f8eb2 commit 3a12196

File tree

5 files changed

+8
-3
lines changed

5 files changed

+8
-3
lines changed

src/kinds.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ register_kinds!(JuliaSyntax, 0, [
212212
"ErrorOverLongCharacter"
213213
"ErrorInvalidUTF8"
214214
"ErrorInvisibleChar"
215+
"ErrorIdentifierStart"
215216
"ErrorUnknownCharacter"
216217
"ErrorBidiFormatting"
217218
# Generic error
@@ -1175,6 +1176,7 @@ const _token_error_descriptions = Dict{Kind, String}(
11751176
K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
11761177
K"ErrorInvalidUTF8"=>"invalid UTF-8 sequence",
11771178
K"ErrorInvisibleChar"=>"invisible character",
1179+
K"ErrorIdentifierStart" => "identifier cannot begin with character",
11781180
K"ErrorUnknownCharacter"=>"unknown unicode character",
11791181
K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting",
11801182
K"ErrorInvalidOperator" => "invalid operator",

src/parse_stream.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,7 @@ function validate_tokens(stream::ParseStream)
10831083
elseif is_error(k) && k != K"error"
10841084
# Emit messages for non-generic token errors
10851085
tokstr = String(txtbuf[tokrange])
1086-
msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter"
1086+
msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart"
10871087
"$(_token_error_descriptions[k]) $(repr(tokstr[1]))"
10881088
elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting"
10891089
"$(_token_error_descriptions[k]) $(repr(tokstr))"

src/tokenize.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -538,8 +538,9 @@ function _next_token(l::Lexer, c)
538538
return emit(l, k)
539539
else
540540
emit(l,
541-
!isvalid(c) ? K"ErrorInvalidUTF8" :
542-
is_invisible_char(c) ? K"ErrorInvisibleChar" :
541+
!isvalid(c) ? K"ErrorInvalidUTF8" :
542+
is_invisible_char(c) ? K"ErrorInvisibleChar" :
543+
is_identifier_char(c) ? K"ErrorIdentifierStart" :
543544
K"ErrorUnknownCharacter")
544545
end
545546
end

test/diagnostics.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ end
1919
@test diagnostic("a$(c)b") ==
2020
Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))")
2121
end
22+
@test diagnostic("₁") == Diagnostic(1, 3, :error, "identifier cannot begin with character '₁'")
2223
@test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'")
2324

2425
@test diagnostic("\"X \u202a X\"") == Diagnostic(2, 8, :error, "unbalanced bidirectional unicode formatting \"X \\u202a X\"")

test/tokenize.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,7 @@ end
10091009

10101010
@testset "invalid UTF-8 characters" begin
10111011
@test onlytok("\x00") == K"ErrorUnknownCharacter"
1012+
@test onlytok("₁") == K"ErrorIdentifierStart"
10121013

10131014
bad_chars = [
10141015
first("\xe2") # malformed

0 commit comments

Comments
 (0)