Skip to content

Commit 75d18ca

Browse files
committed
Stop emitting K".." and K"..." in lexer
Unfortunately, the sequences `..` and `...` do not always refer to the `..` operator or the `...` syntax. There are two and a half cases where they don't: 1. After `@` in macrocall, where they are both regular identifiers 2. In `import ...A` where the dots specify the level 3. `:(...)` treats `...` as quoted identifier Case 1 was handled in a previous commit by lexing these as identifiers after `2`. However, as a result of case 2, it is problematic to tokenize these dots together; we essentially have to untokenize them in the import parser. It is also infeasible to change the lexer to have speical context-sensitive lexing in `import`, because there could be arbitrary interpolations, `@eval import A, $(f(x..y)), ..b`, so deciding whether a particular `..` after import refers to the operator or a level specifier requires the parser. Currently the parser handles this by splitting the obtained tokens again in the import parser, but this is undesirable, because it invalidates the invariant that the tokens produced by the lexer correspond to the non-terminals of the final parse tree. This PR attempts to address this by only ever having the lexer emit `K"."` and having the parser decide which case it refers to. The new non-terminal `K"dots"` handles the identifier cases (ordinary `..` and quoted `:(...)` ). K"..." is now exclusively used for splat/slurp, and is no longer used in its non-terminal form for case 3.
1 parent 6b3664c commit 75d18ca

File tree

8 files changed

+88
-66
lines changed

8 files changed

+88
-66
lines changed

src/integration/expr.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ end
338338
return adjust_macro_name!(retexpr.args[1], k)
339339
elseif k == K"?"
340340
retexpr.head = :if
341+
elseif k == K"dots"
342+
n = numeric_flags(flags(nodehead))
343+
return n == 2 ? :(..) : :(...)
341344
elseif k == K"op=" && length(args) == 3
342345
lhs = args[1]
343346
op = args[2]

src/julia/julia_parse_stream.jl

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
137137
is_postfix_op_call(head) && (str = str*"-post")
138138

139139
k = kind(head)
140-
# Handle numeric flags for nrow/ncat nodes
141-
if k in KSet"nrow ncat typed_ncat"
140+
# Handle numeric flags for nodes that take them
141+
if k in KSet"nrow ncat typed_ncat dots"
142142
n = numeric_flags(head)
143143
n != 0 && (str = str*"-"*string(n))
144144
else
@@ -307,7 +307,12 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
307307
isdotted = kind(t) == K"."
308308
if isdotted
309309
t2 = peek_token(ps, 2)
310-
if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2))
310+
if (!allow_whitespace && preceding_whitespace(t2))
311+
isdotted = false
312+
elseif !is_operator(t2)
313+
isdotted = false
314+
elseif kind(t2) == K"." && peek(ps, 3) == K"."
315+
# Treat `..` as dotted K".", unless there's another dot after
311316
isdotted = false
312317
else
313318
t = t2
@@ -316,13 +321,13 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
316321
return (isdotted, t)
317322
end
318323

319-
function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
324+
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
320325
if isdot
321-
if emit_dot_node
322-
dotmark = position(ps)
323-
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
324-
else
325-
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
326+
dotmark = position(ps)
327+
bump(ps, TRIVIA_FLAG)
328+
if kind(t) == K"."
329+
bump(ps, TRIVIA_FLAG)
330+
return emit(ps, dotmark, K"dots", set_numeric_flags(2))
326331
end
327332
end
328333
pos = bump(ps, flags, remap_kind=remap_kind)

src/julia/kinds.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,6 @@ register_kinds!(JuliaSyntax, 0, [
278278
"ErrorInvalidOperator"
279279
"Error**"
280280

281-
"..."
282-
283281
# Level 1
284282
"BEGIN_ASSIGNMENTS"
285283
"BEGIN_SYNTACTIC_ASSIGNMENTS"
@@ -774,7 +772,6 @@ register_kinds!(JuliaSyntax, 0, [
774772
# Level 8
775773
"BEGIN_COLON"
776774
":"
777-
".."
778775
"…"
779776
"⁝"
780777
"⋮"
@@ -1033,6 +1030,10 @@ register_kinds!(JuliaSyntax, 0, [
10331030
"typed_ncat"
10341031
"row"
10351032
"nrow"
1033+
# splat/slurp
1034+
"..."
1035+
# ../... as a identifier
1036+
"dots"
10361037
# Comprehensions
10371038
"generator"
10381039
"filter"

src/julia/parser.jl

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ function parse_RtoL(ps::ParseState, down, is_op, self)
371371
down(ps)
372372
isdot, tk = peek_dotted_op_token(ps)
373373
if is_op(tk)
374-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
374+
bump_dotted(ps, isdot, tk, remap_kind=K"Identifier")
375375
self(ps)
376376
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
377377
end
@@ -598,7 +598,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
598598
# a .~ b ==> (dotcall-i a ~ b)
599599
# [a ~ b c] ==> (hcat (call-i a ~ b) c)
600600
# [a~b] ==> (vect (call-i a ~ b))
601-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
601+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
602602
bump_trivia(ps)
603603
parse_assignment(ps, down)
604604
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
@@ -617,7 +617,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
617617
(-1, K"Identifier", EMPTY_FLAGS), # op
618618
(1, K"=", TRIVIA_FLAG))
619619
else
620-
bump_dotted(ps, isdot, TRIVIA_FLAG)
620+
bump_dotted(ps, isdot, t, TRIVIA_FLAG)
621621
end
622622
bump_trivia(ps)
623623
# Syntax Edition TODO: We'd like to call `down` here when
@@ -743,7 +743,7 @@ function parse_arrow(ps::ParseState)
743743
# x <--> y ==> (call-i x <--> y)
744744
# x .--> y ==> (dotcall-i x --> y)
745745
# x -->₁ y ==> (call-i x -->₁ y)
746-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
746+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
747747
parse_arrow(ps)
748748
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
749749
end
@@ -771,7 +771,7 @@ function parse_lazy_cond(ps::ParseState, down, is_op, self)
771771
(isdot, t) = peek_dotted_op_token(ps)
772772
k = kind(t)
773773
if is_op(k)
774-
bump_dotted(ps, isdot, TRIVIA_FLAG)
774+
bump_dotted(ps, isdot, t, TRIVIA_FLAG)
775775
self(ps)
776776
emit(ps, mark, isdot ? dotted(k) : k, flags(t))
777777
if isdot
@@ -819,7 +819,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false)
819819
while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t))
820820
n_comparisons += 1
821821
op_dotted = isdot
822-
op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier")
822+
op_pos = bump_dotted(ps, isdot, t, emit_dot_node=true, remap_kind=K"Identifier")
823823
parse_pipe_lt(ps)
824824
end
825825
if n_comparisons == 1
@@ -873,15 +873,16 @@ end
873873
function parse_range(ps::ParseState)
874874
mark = position(ps)
875875
parse_invalid_ops(ps)
876+
876877
(initial_dot, initial_tok) = peek_dotted_op_token(ps)
877878
initial_kind = kind(initial_tok)
878-
if initial_kind != K":" && is_prec_colon(initial_kind)
879-
# a..b ==> (call-i a .. b)
879+
if initial_kind != K":" && (is_prec_colon(initial_kind) || (initial_dot && initial_kind == K"."))
880+
# a..b ==> (call-i a (dots-2) b)
880881
# a … b ==> (call-i a … b)
881882
# a .… b ==> (dotcall-i a … b)
882-
bump_dotted(ps, initial_dot, remap_kind=K"Identifier")
883+
bump_dotted(ps, initial_dot, initial_tok, remap_kind=K"Identifier")
883884
parse_invalid_ops(ps)
884-
emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG)
885+
emit(ps, mark, (initial_dot && initial_kind != K".") ? K"dotcall" : K"call", INFIX_FLAG)
885886
elseif initial_kind == K":" && ps.range_colon_enabled
886887
# a ? b : c:d ==> (? a b (call-i c : d))
887888
n_colons = 0
@@ -948,8 +949,10 @@ function parse_range(ps::ParseState)
948949
# x... ==> (... x)
949950
# x:y... ==> (... (call-i x : y))
950951
# x..y... ==> (... (call-i x .. y)) # flisp parser fails here
951-
if peek(ps) == K"..."
952+
if peek(ps) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"."
952953
bump(ps, TRIVIA_FLAG)
954+
bump(ps, TRIVIA_FLAG) # second dot
955+
bump(ps, TRIVIA_FLAG) # third dot
953956
emit(ps, mark, K"...")
954957
end
955958
end
@@ -965,7 +968,7 @@ function parse_invalid_ops(ps::ParseState)
965968
parse_expr(ps)
966969
while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**")
967970
bump_trivia(ps)
968-
bump_dotted(ps, isdot)
971+
bump_dotted(ps, isdot, t)
969972
parse_expr(ps)
970973
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
971974
end
@@ -1006,7 +1009,7 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
10061009
# [x+y + z] ==> (vect (call-i x + y z))
10071010
break
10081011
end
1009-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
1012+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
10101013
down(ps)
10111014
if kind(t) in chain_ops && !is_suffixed(t) && !isdot
10121015
# a + b + c ==> (call-i a + b c)
@@ -1258,7 +1261,7 @@ function parse_unary(ps::ParseState)
12581261
#
12591262
# (The flisp parser only considers commas before `;` and thus gets this
12601263
# last case wrong)
1261-
op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
1264+
op_pos = bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier")
12621265

12631266
space_before_paren = preceding_whitespace(t2)
12641267
if space_before_paren
@@ -1352,12 +1355,12 @@ function parse_unary(ps::ParseState)
13521355
# -0x1 ==> (call-pre - 0x01)
13531356
# - 2 ==> (call-pre - 2)
13541357
# .-2 ==> (dotcall-pre - 2)
1355-
op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier")
1358+
op_pos = bump_dotted(ps, op_dotted, op_t, remap_kind=K"Identifier")
13561359
else
13571360
# /x ==> (call-pre (error /) x)
13581361
# +₁ x ==> (call-pre (error +₁) x)
13591362
# .<: x ==> (dotcall-pre (error (. <:)) x)
1360-
bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
1363+
bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier")
13611364
op_pos = emit(ps, mark, K"error", error="not a unary operator")
13621365
end
13631366
parse_unary(ps)
@@ -1388,7 +1391,7 @@ end
13881391
function parse_factor_with_initial_ex(ps::ParseState, mark)
13891392
parse_decl_with_initial_ex(ps, mark)
13901393
if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t)))
1391-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
1394+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
13921395
parse_factor_after(ps)
13931396
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
13941397
end
@@ -2452,11 +2455,11 @@ function parse_import_atsym(ps::ParseState, allow_quotes=true)
24522455
end
24532456
end
24542457
b = peek_behind(ps, pos)
2455-
if warn_parens && b.orig_kind != K".."
2458+
if warn_parens && b.kind != K"dots"
24562459
emit_diagnostic(ps, mark, warning="parentheses are not required here")
24572460
end
24582461
ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) ||
2459-
(!b.is_leaf && b.kind in KSet"$ var")
2462+
(!b.is_leaf && (b.kind in KSet"$ var" || b.kind == K"dots"))
24602463
if !ok
24612464
emit(ps, mark, K"error", error="expected identifier")
24622465
end
@@ -2565,10 +2568,6 @@ function parse_import_path(ps::ParseState)
25652568
end
25662569
if k == K"."
25672570
bump(ps)
2568-
elseif k == K".."
2569-
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
2570-
elseif k == K"..."
2571-
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
25722571
else
25732572
break
25742573
end
@@ -2587,6 +2586,17 @@ function parse_import_path(ps::ParseState)
25872586
# import A.⋆.f ==> (import (importpath A ⋆ f))
25882587
next_tok = peek_token(ps, 2)
25892588
if is_operator(kind(next_tok))
2589+
if kind(next_tok) == K"." && peek(ps, 3) == K"."
2590+
# Import the .. operator
2591+
# import A... ==> (import (importpath A (dots-2)))
2592+
bump_disallowed_space(ps)
2593+
bump(ps, TRIVIA_FLAG)
2594+
dotmark = position(ps)
2595+
bump(ps, TRIVIA_FLAG)
2596+
bump(ps, TRIVIA_FLAG)
2597+
emit(ps, dotmark, K"dots", set_numeric_flags(2))
2598+
continue
2599+
end
25902600
if preceding_whitespace(t)
25912601
# Whitespace in import path allowed but discouraged
25922602
# import A .== ==> (import (importpath A ==))
@@ -2599,10 +2609,6 @@ function parse_import_path(ps::ParseState)
25992609
end
26002610
bump(ps, TRIVIA_FLAG)
26012611
parse_import_atsym(ps)
2602-
elseif k == K"..."
2603-
# Import the .. operator
2604-
# import A... ==> (import (importpath A ..))
2605-
bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS))
26062612
elseif k in KSet"NewlineWs ; , : EndMarker"
26072613
# import A; B ==> (import (importpath A))
26082614
break
@@ -3472,6 +3478,16 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
34723478
# . ==> (error .)
34733479
emit(ps, mark, K"error", error="invalid identifier")
34743480
end
3481+
elseif kind(leading_tok) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"."
3482+
# ...
3483+
bump(ps, TRIVIA_FLAG)
3484+
bump(ps, TRIVIA_FLAG)
3485+
bump(ps, TRIVIA_FLAG)
3486+
emit(ps, mark, K"dots", set_numeric_flags(3))
3487+
if check_identifiers
3488+
# ... ==> (error ...)
3489+
emit(ps, mark, K"error", error="invalid identifier")
3490+
end
34753491
elseif is_error(leading_kind)
34763492
# Errors for bad tokens are emitted in validate_tokens() rather than
34773493
# here.
@@ -3559,9 +3575,9 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
35593575
@label is_operator
35603576
# + ==> +
35613577
# .+ ==> (. +)
3562-
bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind=
3578+
bump_dotted(ps, leading_dot, leading_tok, emit_dot_node=true, remap_kind=
35633579
is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier")
3564-
if check_identifiers && !is_valid_identifier(leading_kind)
3580+
if check_identifiers && !(is_valid_identifier(leading_kind) || (leading_dot && leading_kind == K"."))
35653581
# += ==> (error (op= +))
35663582
# ? ==> (error ?)
35673583
# .+= ==> (error (. (op= +)))

src/julia/tokenize.jl

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ end
153153
function optakessuffix(k)
154154
(K"BEGIN_OPS" <= k <= K"END_OPS") &&
155155
!(
156-
k == K"..." ||
157156
K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" ||
158157
k == K"?" ||
159158
k == K"<:" ||
@@ -165,7 +164,6 @@ function optakessuffix(k)
165164
k == K"≔" ||
166165
k == K"⩴" ||
167166
k == K":" ||
168-
k == K".." ||
169167
k == K"$" ||
170168
k == K"::" ||
171169
k == K"where" ||
@@ -987,7 +985,7 @@ function lex_digit(l::Lexer, kind)
987985
pc,ppc = dpeekchar(l)
988986
if pc == '.'
989987
if ppc == '.'
990-
# Number followed by K".." or K"..."
988+
# Number followed by K"."
991989
return emit(l, kind)
992990
elseif kind === K"Float"
993991
# If we enter the function with kind == K"Float" then a '.' has been parsed.
@@ -1166,23 +1164,19 @@ function lex_backslash(l::Lexer)
11661164
end
11671165

11681166
function lex_dot(l::Lexer)
1169-
if accept(l, '.')
1167+
if l.last_token == K"@"
11701168
if accept(l, '.')
1171-
l.last_token == K"@" && return emit(l, K"Identifier")
1172-
return emit(l, K"...")
1173-
else
1174-
if is_dottable_operator_start_char(peekchar(l))
1169+
if !accept(l, '.') && is_dottable_operator_start_char(peekchar(l))
11751170
readchar(l)
11761171
return emit(l, K"ErrorInvalidOperator")
1177-
else
1178-
l.last_token == K"@" && return emit(l, K"Identifier")
1179-
return emit(l, K"..")
11801172
end
11811173
end
1182-
elseif Base.isdigit(peekchar(l))
1174+
# Emit `.`, `..` and `...` as identifiers after `@`
1175+
emit(l, K"Identifier")
1176+
elseif l.last_token != K"." && Base.isdigit(peekchar(l))
1177+
# Only start a numeric constant if the previous token wasn't a dot
11831178
return lex_digit(l, K"Float")
11841179
else
1185-
l.last_token == K"@" && return emit(l, K"Identifier")
11861180
return emit(l, K".")
11871181
end
11881182
end

test/expr.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
@test parseatom(":(a)") == QuoteNode(:a)
1515
@test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a))
1616
@test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2))
17+
@test parseatom(":...") == QuoteNode(Symbol("..."))
18+
@test parseatom(":(...)") == QuoteNode(Symbol("..."))
1719
# Compatibility hack for VERSION >= v"1.4"
1820
# https://github.com/JuliaLang/julia/pull/34077
1921
@test parseatom(":true") == Expr(:quote, true)

0 commit comments

Comments
 (0)