Skip to content

Commit f868dc4

Browse files
committed
Stop emitting K".." and K"..." in lexer
Unfortunately, the sequences `..` and `...` do not always refer to the `..` operator or the `...` syntax. There are two and a half cases where they don't: 1. After `@` in macrocall, where they are both regular identifiers 2. In `import ...A` where the dots specify the level 3. `:(...)` treats `...` as quoted identifier Case 1 was handled in a previous commit by lexing these as identifiers after `2`. However, as a result of case 2, it is problematic to tokenize these dots together; we essentially have to untokenize them in the import parser. It is also infeasible to change the lexer to have speical context-sensitive lexing in `import`, because there could be arbitrary interpolations, `@eval import A, $(f(x..y)), ..b`, so deciding whether a particular `..` after import refers to the operator or a level specifier requires the parser. Currently the parser handles this by splitting the obtained tokens again in the import parser, but this is undesirable, because it invalidates the invariant that the tokens produced by the lexer correspond to the non-terminals of the final parse tree. This PR attempts to address this by only ever having the lexer emit `K"."` and having the parser decide which case it refers to. The new non-terminal `K"dots"` handles the identifier cases (ordinary `..` and quoted `:(...)` ). K"..." is now exclusively used for splat/slurp, and is no longer used in its non-terminal form for case 3.
1 parent 63bee39 commit f868dc4

File tree

8 files changed

+98
-67
lines changed

8 files changed

+98
-67
lines changed

src/integration/expr.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ end
338338
return adjust_macro_name!(retexpr.args[1], k)
339339
elseif k == K"?"
340340
retexpr.head = :if
341+
elseif k == K"DotsIdentifier"
342+
n = numeric_flags(flags(nodehead))
343+
return n == 2 ? :(..) : :(...)
341344
elseif k == K"op=" && length(args) == 3
342345
lhs = args[1]
343346
op = args[2]

src/julia/julia_parse_stream.jl

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
137137
is_postfix_op_call(head) && (str = str*"-post")
138138

139139
k = kind(head)
140-
# Handle numeric flags for nrow/ncat nodes
141-
if k in KSet"nrow ncat typed_ncat"
140+
# Handle numeric flags for nodes that take them
141+
if k in KSet"nrow ncat typed_ncat DotsIdentifier"
142142
n = numeric_flags(head)
143143
n != 0 && (str = str*"-"*string(n))
144144
else
@@ -301,13 +301,18 @@ function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
301301
return position(stream)
302302
end
303303

304-
function peek_dotted_op_token(ps, allow_whitespace=false)
304+
function peek_dotted_op_token(ps)
305305
# Peek the next token, but if it is a dot, peek the next one as well
306306
t = peek_token(ps)
307307
isdotted = kind(t) == K"."
308308
if isdotted
309309
t2 = peek_token(ps, 2)
310-
if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2))
310+
if preceding_whitespace(t2)
311+
isdotted = false
312+
elseif !is_operator(t2)
313+
isdotted = false
314+
elseif kind(t2) == K"." && peek(ps, 3) == K"."
315+
# Treat `..` as dotted K".", unless there's another dot after
311316
isdotted = false
312317
else
313318
t = t2
@@ -316,13 +321,21 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
316321
return (isdotted, t)
317322
end
318323

319-
function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
324+
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
320325
if isdot
321-
if emit_dot_node
322-
dotmark = position(ps)
323-
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
324-
else
325-
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
326+
dotmark = position(ps)
327+
bump(ps, TRIVIA_FLAG)
328+
if kind(t) == K"."
329+
# .. => DotsIdentifier-2
330+
bump(ps, TRIVIA_FLAG)
331+
pos = emit(ps, dotmark, K"DotsIdentifier", set_numeric_flags(2))
332+
nt = peek_token(ps)
333+
if is_operator(nt) && !preceding_whitespace(nt)
334+
# a..+b => (call-i a .. (error-t) (call + b))
335+
bump_invisible(ps, K"error", TRIVIA_FLAG,
336+
error="`..` here is interpreted as a binary operator. A space is required if followed by another operator.")
337+
end
338+
return pos
326339
end
327340
end
328341
pos = bump(ps, flags, remap_kind=remap_kind)

src/julia/kinds.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,6 @@ register_kinds!(JuliaSyntax, 0, [
278278
"ErrorInvalidOperator"
279279
"Error**"
280280

281-
"..."
282-
283281
# Level 1
284282
"BEGIN_ASSIGNMENTS"
285283
"BEGIN_SYNTACTIC_ASSIGNMENTS"
@@ -774,7 +772,6 @@ register_kinds!(JuliaSyntax, 0, [
774772
# Level 8
775773
"BEGIN_COLON"
776774
":"
777-
".."
778775
""
779776
""
780777
""
@@ -1033,6 +1030,10 @@ register_kinds!(JuliaSyntax, 0, [
10331030
"typed_ncat"
10341031
"row"
10351032
"nrow"
1033+
# splat/slurp
1034+
"..."
1035+
# ../... as a identifier
1036+
"DotsIdentifier"
10361037
# Comprehensions
10371038
"generator"
10381039
"filter"

src/julia/parser.jl

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ function parse_RtoL(ps::ParseState, down, is_op, self)
371371
down(ps)
372372
isdot, tk = peek_dotted_op_token(ps)
373373
if is_op(tk)
374-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
374+
bump_dotted(ps, isdot, tk, remap_kind=K"Identifier")
375375
self(ps)
376376
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
377377
end
@@ -598,7 +598,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
598598
# a .~ b ==> (dotcall-i a ~ b)
599599
# [a ~ b c] ==> (hcat (call-i a ~ b) c)
600600
# [a~b] ==> (vect (call-i a ~ b))
601-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
601+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
602602
bump_trivia(ps)
603603
parse_assignment(ps, down)
604604
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
@@ -617,7 +617,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
617617
(-1, K"Identifier", EMPTY_FLAGS), # op
618618
(1, K"=", TRIVIA_FLAG))
619619
else
620-
bump_dotted(ps, isdot, TRIVIA_FLAG)
620+
bump_dotted(ps, isdot, t, TRIVIA_FLAG)
621621
end
622622
bump_trivia(ps)
623623
# Syntax Edition TODO: We'd like to call `down` here when
@@ -743,7 +743,7 @@ function parse_arrow(ps::ParseState)
743743
# x <--> y ==> (call-i x <--> y)
744744
# x .--> y ==> (dotcall-i x --> y)
745745
# x -->₁ y ==> (call-i x -->₁ y)
746-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
746+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
747747
parse_arrow(ps)
748748
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
749749
end
@@ -771,7 +771,7 @@ function parse_lazy_cond(ps::ParseState, down, is_op, self)
771771
(isdot, t) = peek_dotted_op_token(ps)
772772
k = kind(t)
773773
if is_op(k)
774-
bump_dotted(ps, isdot, TRIVIA_FLAG)
774+
bump_dotted(ps, isdot, t, TRIVIA_FLAG)
775775
self(ps)
776776
emit(ps, mark, isdot ? dotted(k) : k, flags(t))
777777
if isdot
@@ -819,7 +819,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false)
819819
while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t))
820820
n_comparisons += 1
821821
op_dotted = isdot
822-
op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier")
822+
op_pos = bump_dotted(ps, isdot, t, emit_dot_node=true, remap_kind=K"Identifier")
823823
parse_pipe_lt(ps)
824824
end
825825
if n_comparisons == 1
@@ -873,15 +873,16 @@ end
873873
function parse_range(ps::ParseState)
874874
mark = position(ps)
875875
parse_invalid_ops(ps)
876+
876877
(initial_dot, initial_tok) = peek_dotted_op_token(ps)
877878
initial_kind = kind(initial_tok)
878-
if initial_kind != K":" && is_prec_colon(initial_kind)
879-
# a..b ==> (call-i a .. b)
879+
if initial_kind != K":" && (is_prec_colon(initial_kind) || (initial_dot && initial_kind == K"."))
880+
# a..b ==> (call-i a (DotsIdentifier-2) b)
880881
# a … b ==> (call-i a … b)
881882
# a .… b ==> (dotcall-i a … b)
882-
bump_dotted(ps, initial_dot, remap_kind=K"Identifier")
883+
bump_dotted(ps, initial_dot, initial_tok, remap_kind=K"Identifier")
883884
parse_invalid_ops(ps)
884-
emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG)
885+
emit(ps, mark, (initial_dot && initial_kind != K".") ? K"dotcall" : K"call", INFIX_FLAG)
885886
elseif initial_kind == K":" && ps.range_colon_enabled
886887
# a ? b : c:d ==> (? a b (call-i c : d))
887888
n_colons = 0
@@ -948,8 +949,10 @@ function parse_range(ps::ParseState)
948949
# x... ==> (... x)
949950
# x:y... ==> (... (call-i x : y))
950951
# x..y... ==> (... (call-i x .. y)) # flisp parser fails here
951-
if peek(ps) == K"..."
952+
if peek(ps) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"."
952953
bump(ps, TRIVIA_FLAG)
954+
bump(ps, TRIVIA_FLAG) # second dot
955+
bump(ps, TRIVIA_FLAG) # third dot
953956
emit(ps, mark, K"...")
954957
end
955958
end
@@ -965,7 +968,7 @@ function parse_invalid_ops(ps::ParseState)
965968
parse_expr(ps)
966969
while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**")
967970
bump_trivia(ps)
968-
bump_dotted(ps, isdot)
971+
bump_dotted(ps, isdot, t)
969972
parse_expr(ps)
970973
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
971974
end
@@ -1006,7 +1009,7 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
10061009
# [x+y + z] ==> (vect (call-i x + y z))
10071010
break
10081011
end
1009-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
1012+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
10101013
down(ps)
10111014
if kind(t) in chain_ops && !is_suffixed(t) && !isdot
10121015
# a + b + c ==> (call-i a + b c)
@@ -1258,7 +1261,7 @@ function parse_unary(ps::ParseState)
12581261
#
12591262
# (The flisp parser only considers commas before `;` and thus gets this
12601263
# last case wrong)
1261-
op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
1264+
op_pos = bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier")
12621265

12631266
space_before_paren = preceding_whitespace(t2)
12641267
if space_before_paren
@@ -1352,12 +1355,12 @@ function parse_unary(ps::ParseState)
13521355
# -0x1 ==> (call-pre - 0x01)
13531356
# - 2 ==> (call-pre - 2)
13541357
# .-2 ==> (dotcall-pre - 2)
1355-
op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier")
1358+
op_pos = bump_dotted(ps, op_dotted, op_t, remap_kind=K"Identifier")
13561359
else
13571360
# /x ==> (call-pre (error /) x)
13581361
# +₁ x ==> (call-pre (error +₁) x)
13591362
# .<: x ==> (dotcall-pre (error (. <:)) x)
1360-
bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
1363+
bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier")
13611364
op_pos = emit(ps, mark, K"error", error="not a unary operator")
13621365
end
13631366
parse_unary(ps)
@@ -1388,7 +1391,7 @@ end
13881391
function parse_factor_with_initial_ex(ps::ParseState, mark)
13891392
parse_decl_with_initial_ex(ps, mark)
13901393
if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t)))
1391-
bump_dotted(ps, isdot, remap_kind=K"Identifier")
1394+
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
13921395
parse_factor_after(ps)
13931396
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
13941397
end
@@ -2476,11 +2479,11 @@ function parse_import_atsym(ps::ParseState, allow_quotes=true)
24762479
end
24772480
end
24782481
b = peek_behind(ps, pos)
2479-
if warn_parens && b.orig_kind != K".."
2482+
if warn_parens && b.kind != K"DotsIdentifier"
24802483
emit_diagnostic(ps, mark, warning="parentheses are not required here")
24812484
end
24822485
ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) ||
2483-
(!b.is_leaf && b.kind in KSet"$ var")
2486+
(!b.is_leaf && (b.kind in KSet"$ var" || b.kind == K"DotsIdentifier"))
24842487
if !ok
24852488
emit(ps, mark, K"error", error="expected identifier")
24862489
end
@@ -2589,10 +2592,6 @@ function parse_import_path(ps::ParseState)
25892592
end
25902593
if k == K"."
25912594
bump(ps)
2592-
elseif k == K".."
2593-
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
2594-
elseif k == K"..."
2595-
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
25962595
else
25972596
break
25982597
end
@@ -2611,6 +2610,17 @@ function parse_import_path(ps::ParseState)
26112610
# import A.⋆.f ==> (import (importpath A ⋆ f))
26122611
next_tok = peek_token(ps, 2)
26132612
if is_operator(kind(next_tok))
2613+
if kind(next_tok) == K"." && peek(ps, 3) == K"."
2614+
# Import the .. operator
2615+
# import A... ==> (import (importpath A (DotsIdentifier-2)))
2616+
bump_disallowed_space(ps)
2617+
bump(ps, TRIVIA_FLAG)
2618+
dotmark = position(ps)
2619+
bump(ps, TRIVIA_FLAG)
2620+
bump(ps, TRIVIA_FLAG)
2621+
emit(ps, dotmark, K"DotsIdentifier", set_numeric_flags(2))
2622+
continue
2623+
end
26142624
if preceding_whitespace(t)
26152625
# Whitespace in import path allowed but discouraged
26162626
# import A .== ==> (import (importpath A ==))
@@ -2623,10 +2633,6 @@ function parse_import_path(ps::ParseState)
26232633
end
26242634
bump(ps, TRIVIA_FLAG)
26252635
parse_import_atsym(ps)
2626-
elseif k == K"..."
2627-
# Import the .. operator
2628-
# import A... ==> (import (importpath A ..))
2629-
bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS))
26302636
elseif k in KSet"NewlineWs ; , : EndMarker"
26312637
# import A; B ==> (import (importpath A))
26322638
break
@@ -3496,6 +3502,16 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
34963502
# . ==> (error .)
34973503
emit(ps, mark, K"error", error="invalid identifier")
34983504
end
3505+
elseif kind(leading_tok) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"."
3506+
# ...
3507+
bump(ps, TRIVIA_FLAG)
3508+
bump(ps, TRIVIA_FLAG)
3509+
bump(ps, TRIVIA_FLAG)
3510+
emit(ps, mark, K"DotsIdentifier", set_numeric_flags(3))
3511+
if check_identifiers
3512+
# ... ==> (error ...)
3513+
emit(ps, mark, K"error", error="invalid identifier")
3514+
end
34993515
elseif is_error(leading_kind)
35003516
# Errors for bad tokens are emitted in validate_tokens() rather than
35013517
# here.
@@ -3583,9 +3599,9 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
35833599
@label is_operator
35843600
# + ==> +
35853601
# .+ ==> (. +)
3586-
bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind=
3602+
bump_dotted(ps, leading_dot, leading_tok, emit_dot_node=true, remap_kind=
35873603
is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier")
3588-
if check_identifiers && !is_valid_identifier(leading_kind)
3604+
if check_identifiers && !(is_valid_identifier(leading_kind) || (leading_dot && leading_kind == K"."))
35893605
# += ==> (error (op= +))
35903606
# ? ==> (error ?)
35913607
# .+= ==> (error (. (op= +)))

src/julia/tokenize.jl

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ end
153153
function optakessuffix(k)
154154
(K"BEGIN_OPS" <= k <= K"END_OPS") &&
155155
!(
156-
k == K"..." ||
157156
K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" ||
158157
k == K"?" ||
159158
k == K"<:" ||
@@ -165,7 +164,6 @@ function optakessuffix(k)
165164
k == K"≔" ||
166165
k == K"⩴" ||
167166
k == K":" ||
168-
k == K".." ||
169167
k == K"$" ||
170168
k == K"::" ||
171169
k == K"where" ||
@@ -987,7 +985,7 @@ function lex_digit(l::Lexer, kind)
987985
pc,ppc = dpeekchar(l)
988986
if pc == '.'
989987
if ppc == '.'
990-
# Number followed by K".." or K"..."
988+
# Number followed by K"."
991989
return emit(l, kind)
992990
elseif kind === K"Float"
993991
# If we enter the function with kind == K"Float" then a '.' has been parsed.
@@ -1166,23 +1164,19 @@ function lex_backslash(l::Lexer)
11661164
end
11671165

11681166
function lex_dot(l::Lexer)
1169-
if accept(l, '.')
1167+
if l.last_token == K"@"
11701168
if accept(l, '.')
1171-
l.last_token == K"@" && return emit(l, K"Identifier")
1172-
return emit(l, K"...")
1173-
else
1174-
if is_dottable_operator_start_char(peekchar(l))
1169+
if !accept(l, '.') && is_dottable_operator_start_char(peekchar(l))
11751170
readchar(l)
11761171
return emit(l, K"ErrorInvalidOperator")
1177-
else
1178-
l.last_token == K"@" && return emit(l, K"Identifier")
1179-
return emit(l, K"..")
11801172
end
11811173
end
1182-
elseif Base.isdigit(peekchar(l))
1174+
# Emit `.`, `..` and `...` as identifiers after `@`
1175+
emit(l, K"Identifier")
1176+
elseif l.last_token != K"." && Base.isdigit(peekchar(l))
1177+
# Only start a numeric constant if the previous token wasn't a dot
11831178
return lex_digit(l, K"Float")
11841179
else
1185-
l.last_token == K"@" && return emit(l, K"Identifier")
11861180
return emit(l, K".")
11871181
end
11881182
end

test/expr.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
@test parseatom(":(a)") == QuoteNode(:a)
1515
@test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a))
1616
@test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2))
17+
@test parseatom(":...") == QuoteNode(Symbol("..."))
18+
@test parseatom(":(...)") == QuoteNode(Symbol("..."))
1719
# Compatibility hack for VERSION >= v"1.4"
1820
# https://github.com/JuliaLang/julia/pull/34077
1921
@test parseatom(":true") == Expr(:quote, true)

0 commit comments

Comments
 (0)