Skip to content

Commit dae2d23

Browse files
authored
Fix various numeric literal token errors (#196)
A fairly big refactor of numeric literal tokenization error cases and a couple of other tokenizer errors ported from the flisp code. * For hexfloat, emit a more specific errors when the `p` suffix is missing. * For octal, hex and binary, add errors for trailing invalid digits or identifier characters like `0b123` and `0xenomorph` * Emit an error for ambiguous numeric constants with dot suffix vs juxtuposition like `1.(` * Emit an error for underscore directly after dot as in `1._` * Emit an error for hexfloat without digits `0x.p0` * Add an invalid operator error for `<---` to follow compatibility with the reference parser.
1 parent 0b2c422 commit dae2d23

File tree

6 files changed

+210
-202
lines changed

6 files changed

+210
-202
lines changed

src/kinds.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ const _kind_names =
1616
# Tokenization errors
1717
"ErrorEofMultiComment"
1818
"ErrorInvalidNumericConstant"
19+
"ErrorHexFloatMustContainP"
1920
"ErrorAmbiguousNumericConstant"
21+
"ErrorAmbiguousNumericDotMultiply"
2022
"ErrorInvalidInterpolationTerminator"
2123
"ErrorNumericOverflow"
2224
"ErrorInvalidEscapeSequence"
@@ -1016,7 +1018,9 @@ const _nonunique_kind_names = Set([
10161018

10171019
K"ErrorEofMultiComment"
10181020
K"ErrorInvalidNumericConstant"
1021+
K"ErrorHexFloatMustContainP"
10191022
K"ErrorAmbiguousNumericConstant"
1023+
K"ErrorAmbiguousNumericDotMultiply"
10201024
K"ErrorInvalidInterpolationTerminator"
10211025
K"ErrorNumericOverflow"
10221026
K"ErrorInvalidEscapeSequence"
@@ -1061,7 +1065,9 @@ end
10611065
_token_error_descriptions = Dict{Kind, String}(
10621066
K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#",
10631067
K"ErrorInvalidNumericConstant" => "invalid numeric constant",
1068+
K"ErrorHexFloatMustContainP" => "hex float literal must contain `p` or `P`",
10641069
K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)",
1070+
K"ErrorAmbiguousNumericDotMultiply" => "numeric constant cannot be implicitly multiplied because it ends with `.`",
10651071
K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead",
10661072
K"ErrorNumericOverflow"=>"overflow in numeric literal",
10671073
K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence",

src/parser.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,9 +1103,6 @@ function is_juxtapose(ps, prev_k, t)
11031103
!(is_block_form(prev_k) ||
11041104
is_syntactic_unary_op(prev_k) ||
11051105
is_initial_reserved_word(ps, prev_k) ))) &&
1106-
# https://github.com/JuliaLang/julia/issues/16356
1107-
# 0xenomorph ==> 0x0e
1108-
!(prev_k in KSet"BinInt HexInt OctInt" && (k == K"Identifier" || is_keyword(k))) &&
11091106
(!is_operator(k) || is_radical_op(k)) &&
11101107
!is_closing_token(ps, k) &&
11111108
!is_initial_reserved_word(ps, k)

src/tokenize.jl

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ function lex_less(l::Lexer)
614614
else
615615
if accept(l, '>')
616616
return emit(l, K"<-->")
617+
elseif accept(l, '-')
618+
return emit_error(l, K"ErrorInvalidOperator")
617619
else
618620
return emit(l, K"<--")
619621
end
@@ -772,33 +774,13 @@ function lex_digit(l::Lexer, kind)
772774
return emit_error(l, K"ErrorInvalidNumericConstant")
773775
elseif is_operator_start_char(ppc) && ppc !== ':'
774776
readchar(l)
775-
return emit_error(l, K"ErrorAmbiguousNumericConstant")
776-
elseif (!(isdigit(ppc) ||
777-
iswhitespace(ppc) ||
778-
is_identifier_start_char(ppc)
779-
|| ppc == '('
780-
|| ppc == ')'
781-
|| ppc == '['
782-
|| ppc == ']'
783-
|| ppc == '{'
784-
|| ppc == '}'
785-
|| ppc == ','
786-
|| ppc == ';'
787-
|| ppc == '@'
788-
|| ppc == '`'
789-
|| ppc == '"'
790-
|| ppc == ':'
791-
|| ppc == '?'
792-
|| ppc == '#'
793-
|| ppc == EOF_CHAR))
794-
kind = K"Integer"
795-
796-
return emit(l, kind)
777+
return emit_error(l, K"ErrorAmbiguousNumericConstant") # `1.+`
797778
end
798779
readchar(l)
799780

800781
kind = K"Float"
801-
accept_number(l, isdigit)
782+
accept(l, '_') && return emit_error(l, K"ErrorInvalidNumericConstant") # `1._`
783+
had_fraction_digs = accept_number(l, isdigit)
802784
pc, ppc = dpeekchar(l)
803785
if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '')
804786
kind = pc == 'f' ? K"Float32" : K"Float"
@@ -807,17 +789,20 @@ function lex_digit(l::Lexer, kind)
807789
if accept_batch(l, isdigit)
808790
pc,ppc = dpeekchar(l)
809791
if pc === '.' && !dotop2(ppc)
810-
accept(l, '.')
811-
return emit_error(l, K"ErrorInvalidNumericConstant")
792+
readchar(l)
793+
return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e1.`
812794
end
813795
else
814-
return emit_error(l, K"ErrorInvalidNumericConstant")
796+
return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e`
815797
end
816-
elseif pc == '.' && (is_identifier_start_char(ppc) || ppc == EOF_CHAR)
798+
elseif pc == '.' && ppc != '.' && !is_operator_start_char(ppc)
817799
readchar(l)
818-
return emit_error(l, K"ErrorInvalidNumericConstant")
800+
return emit_error(l, K"ErrorInvalidNumericConstant") # `1.1.`
801+
elseif !had_fraction_digs && (is_identifier_start_char(pc) ||
802+
pc == '(' || pc == '[' || pc == '{' ||
803+
pc == '@' || pc == '`' || pc == '"')
804+
return emit_error(l, K"ErrorAmbiguousNumericDotMultiply") # `1.(` `1.x`
819805
end
820-
821806
elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '')
822807
kind = pc == 'f' ? K"Float32" : K"Float"
823808
readchar(l)
@@ -826,44 +811,54 @@ function lex_digit(l::Lexer, kind)
826811
pc,ppc = dpeekchar(l)
827812
if pc === '.' && !dotop2(ppc)
828813
accept(l, '.')
829-
return emit_error(l, K"ErrorInvalidNumericConstant")
814+
return emit_error(l, K"ErrorInvalidNumericConstant") # `1e1.`
830815
end
831816
else
832-
return emit_error(l, K"ErrorInvalidNumericConstant")
817+
return emit_error(l, K"ErrorInvalidNumericConstant") # `1e+`
833818
end
834819
elseif position(l) - startpos(l) == 1 && l.chars[1] == '0'
835820
kind == K"Integer"
821+
is_bin_oct_hex_int = false
836822
if pc == 'x'
837823
kind = K"HexInt"
838824
isfloat = false
839825
readchar(l)
840-
!(ishex(ppc) || ppc == '.') && return emit_error(l, K"ErrorInvalidNumericConstant")
841-
accept_number(l, ishex)
826+
had_digits = accept_number(l, ishex)
842827
pc,ppc = dpeekchar(l)
843828
if pc == '.' && ppc != '.'
844829
readchar(l)
845-
accept_number(l, ishex)
830+
had_digits |= accept_number(l, ishex)
846831
isfloat = true
847832
end
848833
if accept(l, "pP")
849834
kind = K"Float"
850835
accept(l, "+-−")
851-
if !accept_number(l, isdigit)
852-
return emit_error(l, K"ErrorInvalidNumericConstant")
836+
if !accept_number(l, isdigit) || !had_digits
837+
return emit_error(l, K"ErrorInvalidNumericConstant") # `0x1p` `0x.p0`
853838
end
854839
elseif isfloat
855-
return emit_error(l, K"ErrorInvalidNumericConstant")
840+
return emit_error(l, K"ErrorHexFloatMustContainP") # `0x.` `0x1.0`
856841
end
842+
is_bin_oct_hex_int = !isfloat
857843
elseif pc == 'b'
858-
!isbinary(ppc) && return emit_error(l, K"ErrorInvalidNumericConstant")
859844
readchar(l)
860-
accept_number(l, isbinary)
845+
had_digits = accept_number(l, isbinary)
861846
kind = K"BinInt"
847+
is_bin_oct_hex_int = true
862848
elseif pc == 'o'
863-
!isoctal(ppc) && return emit_error(l, K"ErrorInvalidNumericConstant")
864849
readchar(l)
865-
accept_number(l, isoctal)
850+
had_digits = accept_number(l, isoctal)
866851
kind = K"OctInt"
852+
is_bin_oct_hex_int = true
853+
end
854+
if is_bin_oct_hex_int
855+
pc = peekchar(l)
856+
if !had_digits || isdigit(pc) || is_identifier_start_char(pc)
857+
accept_batch(l, c->isdigit(c) || is_identifier_start_char(c))
858+
# `0x` `0xg` `0x_` `0x-`
859+
# `0b123` `0o78p` `0xenomorph` `0xaα`
860+
return emit_error(l, K"ErrorInvalidNumericConstant")
861+
end
867862
end
868863
end
869864
return emit(l, kind)

src/utils.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,7 @@ function _fl_parse_string(text::AbstractString, filename::AbstractString,
121121
ex, offset+1
122122
end
123123

124+
# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases.
125+
fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...)
126+
fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...)
127+

test/parser.jl

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ tests = [
193193
"sqrt(2)2" => "(call sqrt 2)"
194194
"x' y" => "(call-post x ')"
195195
"x 'y" => "x"
196-
"0xenomorph" => "0x0e"
197196
],
198197
JuliaSyntax.parse_unary => [
199198
":T" => "(quote T)"
@@ -950,17 +949,6 @@ broken_tests = [
950949
"@!x" => "(macrocall @! x)"
951950
"@..x" => "(macrocall @.. x)"
952951
"@.x" => "(macrocall @__dot__ x)"
953-
# Invalid numeric literals, not juxtaposition
954-
"0b12" => "(error \"0b12\")"
955-
"0xex" => "(error \"0xex\")"
956-
# Bad character literals
957-
"'\\xff'" => "(error '\\xff')"
958-
"'\\x80'" => "(error '\\x80')"
959-
"'ab'" => "(error 'ab')"
960-
]
961-
JuliaSyntax.parse_juxtapose => [
962-
# Want: "numeric constant \"10.\" cannot be implicitly multiplied because it ends with \".\""
963-
"10.x" => "(error (call * 10.0 x))"
964952
]
965953
]
966954

0 commit comments

Comments
 (0)