Skip to content

Commit 734cc37

Browse files
committed
Remove separate syntax heads for each operator
This replaces all the specialized operator heads by a single K"Operator" head that encodes the precedence level in its flags (except for operators that are also used for non-operator purposes). The operators are already K"Identifier" in the final parse tree. There is very little reason to spend all of the extra effort separating them into separate heads only to undo this later. Moreover, I think it's actively misleading, because it makes people think that they can query things about an operator by looking at the head, which doesn't work for suffixed operators. Additionally, this removes the `op=` token, replacing it by two tokens, one K"Operator" with a special precendence level and one `=`. This then removes the last use of `bump_split` (since this PR is on top of #573). As a free bonus this prepares us for having compound assignment syntax for suffixed operators, which was infeasible in the flips parser. That syntax change is not part of this PR but would be trivial (this PR makes it an explicit error). Fixes #334
1 parent daf52ca commit 734cc37

File tree

10 files changed

+532
-1041
lines changed

10 files changed

+532
-1041
lines changed

src/JuliaSyntax.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@ export SourceFile
4141
@_public source_line_range
4242

4343
# Expression predicates, kinds and flags
44-
export @K_str, kind
44+
export @K_str, kind, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT,
45+
PREC_PAIRARROW, PREC_CONDITIONAL, PREC_ARROW, PREC_LAZYOR, PREC_LAZYAND,
46+
PREC_COMPARISON, PREC_PIPE_LT, PREC_PIPE_GT, PREC_COLON, PREC_PLUS,
47+
PREC_BITSHIFT, PREC_TIMES, PREC_RATIONAL, PREC_POWER, PREC_DECL,
48+
PREC_WHERE, PREC_DOT, PREC_QUOTE, PREC_UNICODE_OPS, PREC_COMPOUND_ASSIGN, generic_operators_by_level
4549
@_public Kind
4650

4751
@_public flags,

src/core/parse_stream.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,10 @@ function _buffer_lookahead_tokens(lexer, lookahead)
376376
was_whitespace = is_whitespace(k)
377377
had_whitespace |= was_whitespace
378378
f = EMPTY_FLAGS
379-
raw.suffix && (f |= SUFFIXED_FLAG)
379+
if k == K"Operator" && raw.op_precedence != Tokenize.PREC_NONE
380+
# Store operator precedence in numeric flags
381+
f |= set_numeric_flags(Int(raw.op_precedence))
382+
end
380383
push!(lookahead, SyntaxToken(SyntaxHead(k, f), k,
381384
had_whitespace, raw.endbyte + 2))
382385
token_count += 1

src/integration/expr.jl

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -341,20 +341,28 @@ end
341341
elseif k == K"dots"
342342
n = numeric_flags(flags(nodehead))
343343
return n == 2 ? :(..) : :(...)
344-
elseif k == K"op=" && length(args) == 3
345-
lhs = args[1]
346-
op = args[2]
347-
rhs = args[3]
348-
headstr = string(args[2], '=')
349-
retexpr.head = Symbol(headstr)
350-
retexpr.args = Any[lhs, rhs]
351-
elseif k == K".op=" && length(args) == 3
352-
lhs = args[1]
353-
op = args[2]
354-
rhs = args[3]
355-
headstr = '.' * string(args[2], '=')
356-
retexpr.head = Symbol(headstr)
357-
retexpr.args = Any[lhs, rhs]
344+
elseif k == K"op="
345+
if length(args) == 3
346+
lhs = args[1]
347+
op = args[2]
348+
rhs = args[3]
349+
headstr = string(args[2], '=')
350+
retexpr.head = Symbol(headstr)
351+
retexpr.args = Any[lhs, rhs]
352+
elseif length(args) == 1
353+
return Symbol(string(args[1], '='))
354+
end
355+
elseif k == K".op="
356+
if length(args) == 3
357+
lhs = args[1]
358+
op = args[2]
359+
rhs = args[3]
360+
headstr = '.' * string(args[2], '=')
361+
retexpr.head = Symbol(headstr)
362+
retexpr.args = Any[lhs, rhs]
363+
else
364+
return Symbol(string('.', args[1], '='))
365+
end
358366
elseif k == K"macrocall"
359367
if length(args) >= 2
360368
a2 = args[2]

src/julia/julia_parse_stream.jl

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -262,45 +262,6 @@ function validate_tokens(stream::ParseStream)
262262
sort!(stream.diagnostics, by=first_byte)
263263
end
264264

265-
"""
266-
bump_split(stream, token_spec1, [token_spec2 ...])
267-
268-
Bump the next token, splitting it into several pieces
269-
270-
Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
271-
If all `nbyte` are positive, the sum must equal the token length. If one
272-
`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
273-
all `nbyte` must equal zero.
274-
275-
This is a hack which helps resolves the occasional lexing ambiguity. For
276-
example
277-
* Whether .+ should be a single token or the composite (. +) which is used for
278-
standalone operators.
279-
* Whether ... is splatting (most of the time) or three . tokens in import paths
280-
281-
TODO: Are these the only cases? Can we replace this general utility with a
282-
simpler one which only splits preceding dots?
283-
"""
284-
function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
285-
tok = stream.lookahead[stream.lookahead_index]
286-
stream.lookahead_index += 1
287-
start_b = _next_byte(stream)
288-
toklen = tok.next_byte - start_b
289-
prev_b = start_b
290-
for (i, (nbyte, k, f)) in enumerate(split_spec)
291-
h = SyntaxHead(k, f)
292-
actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
293-
orig_k = k == K"." ? K"." : kind(tok)
294-
node = RawGreenNode(h, actual_nbyte, orig_k)
295-
push!(stream.output, node)
296-
prev_b += actual_nbyte
297-
stream.next_byte += actual_nbyte
298-
end
299-
@assert tok.next_byte == prev_b
300-
stream.peek_count = 0
301-
return position(stream)
302-
end
303-
304265
function peek_dotted_op_token(ps, allow_whitespace=false)
305266
# Peek the next token, but if it is a dot, peek the next one as well
306267
t = peek_token(ps)
@@ -318,7 +279,12 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
318279
t = t2
319280
end
320281
end
321-
return (isdotted, t)
282+
isassign = false
283+
if !allow_whitespace && is_operator(t)
284+
t3 = peek_token(ps, 2+isdotted)
285+
isassign = kind(t3) == K"=" && !preceding_whitespace(t3)
286+
end
287+
return (isdotted, isassign, t)
322288
end
323289

324290
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")

0 commit comments

Comments
 (0)