Skip to content

Commit b39008a

Browse files
committed
Cleanup and fix operator predicates
Remove big lists of operator kinds and inscrutable hardcoded unicode ranges in predicates. Instead, generate all this code directly from the table of operator kinds. This makes new operators much easier to add in a single place in the code, thereby fixing a few bugs/inconsistencies which have crept in over time as new operators were added. Also fix several bugs in the tokenizer where `is_operator_start_char()` was used, but the check should have been restricted to dottable operators.
1 parent 69ec1d9 commit b39008a

File tree

4 files changed

+92
-871
lines changed

4 files changed

+92
-871
lines changed

src/literal_parsing.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,11 @@ end
329329

330330
# static wrapper around user callback function
331331
function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32
332-
(codepoint == 0x025B ? 0x03B5 :
333-
codepoint == 0x00B5 ? 0x03BC :
334-
codepoint == 0x00B7 ? 0x22C5 :
335-
codepoint == 0x0387 ? 0x22C5 :
336-
codepoint == 0x2212 ? 0x002D :
332+
(codepoint == 0x025B ? 0x03B5 : # 'ɛ' => 'ε'
333+
codepoint == 0x00B5 ? 0x03BC : # 'µ' => 'μ'
334+
codepoint == 0x00B7 ? 0x22C5 : # '·' => '⋅'
335+
codepoint == 0x0387 ? 0x22C5 : # '·' => '⋅'
336+
codepoint == 0x2212 ? 0x002D : # '−' => '-'
337337
codepoint)
338338
end
339339

src/tokenize.jl

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module Tokenize
22

33
export tokenize, untokenize, Tokens
44

5-
using ..JuliaSyntax: Kind, @K_str
5+
using ..JuliaSyntax: JuliaSyntax, Kind, @K_str
66

77
import ..JuliaSyntax: kind,
88
is_literal, is_error, is_contextual_keyword, is_word_operator
@@ -370,7 +370,7 @@ function _next_token(l::Lexer, c)
370370
return lex_identifier(l, c)
371371
elseif isdigit(c)
372372
return lex_digit(l, K"Integer")
373-
elseif (k = get(UNICODE_OPS, c, K"error")) != K"error"
373+
elseif (k = get(_unicode_ops, c, K"error")) != K"error"
374374
return emit(l, k)
375375
else
376376
emit_error(l, K"ErrorUnknownCharacter")
@@ -416,6 +416,7 @@ function lex_string_chunk(l)
416416
!(pc == EOF_CHAR || is_operator_start_char(pc) || is_never_id_char(pc))
417417
# Only allow certain characters after interpolated vars
418418
# https://github.com/JuliaLang/julia/pull/25234
419+
readchar(l)
419420
return emit_error(l, K"ErrorInvalidInterpolationTerminator")
420421
end
421422
if pc == EOF_CHAR
@@ -771,7 +772,7 @@ function lex_digit(l::Lexer, kind)
771772
# If we enter the function with kind == K"Float" then a '.' has been parsed.
772773
readchar(l)
773774
return emit_error(l, K"ErrorInvalidNumericConstant")
774-
elseif is_operator_start_char(ppc) && ppc !== ':'
775+
elseif is_dottable_operator_start_char(ppc)
775776
readchar(l)
776777
return emit_error(l, K"ErrorAmbiguousNumericConstant") # `1.+`
777778
end
@@ -787,14 +788,14 @@ function lex_digit(l::Lexer, kind)
787788
accept(l, "+-−")
788789
if accept_batch(l, isdigit)
789790
pc,ppc = dpeekchar(l)
790-
if pc === '.' && !dotop2(ppc)
791+
if pc === '.' && !is_dottable_operator_start_char(ppc)
791792
readchar(l)
792793
return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e1.`
793794
end
794795
else
795796
return emit_error(l, K"ErrorInvalidNumericConstant") # `1.e`
796797
end
797-
elseif pc == '.' && ppc != '.' && !is_operator_start_char(ppc)
798+
elseif pc == '.' && ppc != '.' && !is_dottable_operator_start_char(ppc)
798799
readchar(l)
799800
return emit_error(l, K"ErrorInvalidNumericConstant") # `1.1.`
800801
elseif !had_fraction_digs && (is_identifier_start_char(pc) ||
@@ -808,7 +809,7 @@ function lex_digit(l::Lexer, kind)
808809
accept(l, "+-−")
809810
if accept_batch(l, isdigit)
810811
pc,ppc = dpeekchar(l)
811-
if pc === '.' && !dotop2(ppc)
812+
if pc === '.' && !is_dottable_operator_start_char(ppc)
812813
accept(l, '.')
813814
return emit_error(l, K"ErrorInvalidNumericConstant") # `1e1.`
814815
end
@@ -948,7 +949,7 @@ function lex_dot(l::Lexer)
948949
if accept(l, '.')
949950
return emit(l, K"...")
950951
else
951-
if dotop2(peekchar(l))
952+
if is_dottable_operator_start_char(peekchar(l))
952953
readchar(l)
953954
return emit_error(l, K"ErrorInvalidOperator")
954955
else
@@ -959,10 +960,7 @@ function lex_dot(l::Lexer)
959960
return lex_digit(l, K"Float")
960961
else
961962
pc, dpc = dpeekchar(l)
962-
if dotop1(pc)
963-
l.dotop = true
964-
return _next_token(l, readchar(l))
965-
elseif pc =='+'
963+
if pc == '+'
966964
l.dotop = true
967965
readchar(l)
968966
return lex_plus(l)
@@ -1040,6 +1038,9 @@ function lex_dot(l::Lexer)
10401038
l.dotop = true
10411039
readchar(l)
10421040
return lex_equal(l)
1041+
elseif is_dottable_operator_start_char(pc)
1042+
l.dotop = true
1043+
return _next_token(l, readchar(l))
10431044
end
10441045
return emit(l, K".")
10451046
end

0 commit comments

Comments
 (0)