Skip to content

Commit 4f95341

Browse files
committed
Remove separate syntax heads for each operator
This replaces all the specialized operator heads by a single K"Operator" head that encodes the precedence level in its flags (except for operators that are also used for non-operator purposes). The operators are already K"Identifier" in the final parse tree. There is very little reason to spend all of the extra effort separating them into separate heads only to undo this later. Moreover, I think it's actively misleading, because it makes people think that they can query things about an operator by looking at the head, which doesn't work for suffixed operators. Additionally, this removes the `op=` token, replacing it by two tokens, one K"Operator" with a special precendence level and one `=`. This then removes the last use of `bump_split` (since this PR is on top of #573). As a free bonus this prepares us for having compound assignment syntax for suffixed operators, which was infeasible in the flips parser. That syntax change is not part of this PR but would be trivial (this PR makes it an explicit error). Fixes #334
1 parent daf52ca commit 4f95341

File tree

11 files changed

+541
-1074
lines changed

11 files changed

+541
-1074
lines changed

docs/src/api.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ JuliaSyntax.is_infix_op_call
101101
JuliaSyntax.is_prefix_op_call
102102
JuliaSyntax.is_postfix_op_call
103103
JuliaSyntax.is_dotted
104-
JuliaSyntax.is_suffixed
105104
JuliaSyntax.is_decorated
106105
JuliaSyntax.numeric_flags
107106
```

src/JuliaSyntax.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@ export SourceFile
4141
@_public source_line_range
4242

4343
# Expression predicates, kinds and flags
44-
export @K_str, kind
44+
export @K_str, kind, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT,
45+
PREC_PAIRARROW, PREC_CONDITIONAL, PREC_ARROW, PREC_LAZYOR, PREC_LAZYAND,
46+
PREC_COMPARISON, PREC_PIPE_LT, PREC_PIPE_GT, PREC_COLON, PREC_PLUS,
47+
PREC_BITSHIFT, PREC_TIMES, PREC_RATIONAL, PREC_POWER, PREC_DECL,
48+
PREC_WHERE, PREC_DOT, PREC_QUOTE, PREC_UNICODE_OPS, PREC_COMPOUND_ASSIGN, generic_operators_by_level
4549
@_public Kind
4650

4751
@_public flags,
@@ -53,7 +57,6 @@ export @K_str, kind
5357
is_prefix_op_call,
5458
is_postfix_op_call,
5559
is_dotted,
56-
is_suffixed,
5760
is_decorated,
5861
numeric_flags,
5962
has_flags,

src/core/parse_stream.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ kind(head::SyntaxHead) = head.kind
4545
4646
Return the flag bits of a syntactic construct. Prefer to query these with the
4747
predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`,
48-
`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`,
48+
`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`,
4949
`is_decorated`.
5050
5151
Or extract numeric portion of the flags with `numeric_flags`.
@@ -376,7 +376,10 @@ function _buffer_lookahead_tokens(lexer, lookahead)
376376
was_whitespace = is_whitespace(k)
377377
had_whitespace |= was_whitespace
378378
f = EMPTY_FLAGS
379-
raw.suffix && (f |= SUFFIXED_FLAG)
379+
if k == K"Operator" && raw.op_precedence != Tokenize.PREC_NONE
380+
# Store operator precedence in numeric flags
381+
f |= set_numeric_flags(Int(raw.op_precedence))
382+
end
380383
push!(lookahead, SyntaxToken(SyntaxHead(k, f), k,
381384
had_whitespace, raw.endbyte + 2))
382385
token_count += 1

src/integration/expr.jl

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -341,20 +341,28 @@ end
341341
elseif k == K"dots"
342342
n = numeric_flags(flags(nodehead))
343343
return n == 2 ? :(..) : :(...)
344-
elseif k == K"op=" && length(args) == 3
345-
lhs = args[1]
346-
op = args[2]
347-
rhs = args[3]
348-
headstr = string(args[2], '=')
349-
retexpr.head = Symbol(headstr)
350-
retexpr.args = Any[lhs, rhs]
351-
elseif k == K".op=" && length(args) == 3
352-
lhs = args[1]
353-
op = args[2]
354-
rhs = args[3]
355-
headstr = '.' * string(args[2], '=')
356-
retexpr.head = Symbol(headstr)
357-
retexpr.args = Any[lhs, rhs]
344+
elseif k == K"op="
345+
if length(args) == 3
346+
lhs = args[1]
347+
op = args[2]
348+
rhs = args[3]
349+
headstr = string(args[2], '=')
350+
retexpr.head = Symbol(headstr)
351+
retexpr.args = Any[lhs, rhs]
352+
elseif length(args) == 1
353+
return Symbol(string(args[1], '='))
354+
end
355+
elseif k == K".op="
356+
if length(args) == 3
357+
lhs = args[1]
358+
op = args[2]
359+
rhs = args[3]
360+
headstr = '.' * string(args[2], '=')
361+
retexpr.head = Symbol(headstr)
362+
retexpr.args = Any[lhs, rhs]
363+
else
364+
return Symbol(string('.', args[1], '='))
365+
end
358366
elseif k == K"macrocall"
359367
if length(args) >= 2
360368
a2 = args[2]

src/julia/julia_parse_stream.jl

Lines changed: 6 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
# Token flags - may be set for operator kinded tokens
2-
# Operator has a suffix
3-
const SUFFIXED_FLAG = RawFlags(1<<2)
4-
51
# Set for K"call", K"dotcall" or any syntactic operator heads
62
# Distinguish various syntaxes which are mapped to K"call"
73
const PREFIX_CALL_FLAG = RawFlags(0<<3)
@@ -110,15 +106,6 @@ Return true for postfix operator calls such as the `'ᵀ` call node parsed from
110106
"""
111107
is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
112108

113-
114-
"""
115-
is_suffixed(x)
116-
117-
Return true for operators which have suffixes, such as `+₁`
118-
"""
119-
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
120-
121-
122109
"""
123110
numeric_flags(x)
124111
@@ -164,7 +151,6 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
164151
str *= "-,"
165152
end
166153
end
167-
is_suffixed(head) && (str = str*"-suf")
168154
end
169155
str
170156
end
@@ -262,45 +248,6 @@ function validate_tokens(stream::ParseStream)
262248
sort!(stream.diagnostics, by=first_byte)
263249
end
264250

265-
"""
266-
bump_split(stream, token_spec1, [token_spec2 ...])
267-
268-
Bump the next token, splitting it into several pieces
269-
270-
Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
271-
If all `nbyte` are positive, the sum must equal the token length. If one
272-
`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
273-
all `nbyte` must equal zero.
274-
275-
This is a hack which helps resolves the occasional lexing ambiguity. For
276-
example
277-
* Whether .+ should be a single token or the composite (. +) which is used for
278-
standalone operators.
279-
* Whether ... is splatting (most of the time) or three . tokens in import paths
280-
281-
TODO: Are these the only cases? Can we replace this general utility with a
282-
simpler one which only splits preceding dots?
283-
"""
284-
function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
285-
tok = stream.lookahead[stream.lookahead_index]
286-
stream.lookahead_index += 1
287-
start_b = _next_byte(stream)
288-
toklen = tok.next_byte - start_b
289-
prev_b = start_b
290-
for (i, (nbyte, k, f)) in enumerate(split_spec)
291-
h = SyntaxHead(k, f)
292-
actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
293-
orig_k = k == K"." ? K"." : kind(tok)
294-
node = RawGreenNode(h, actual_nbyte, orig_k)
295-
push!(stream.output, node)
296-
prev_b += actual_nbyte
297-
stream.next_byte += actual_nbyte
298-
end
299-
@assert tok.next_byte == prev_b
300-
stream.peek_count = 0
301-
return position(stream)
302-
end
303-
304251
function peek_dotted_op_token(ps, allow_whitespace=false)
305252
# Peek the next token, but if it is a dot, peek the next one as well
306253
t = peek_token(ps)
@@ -318,7 +265,12 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
318265
t = t2
319266
end
320267
end
321-
return (isdotted, t)
268+
isassign = false
269+
if !allow_whitespace && is_operator(t)
270+
t3 = peek_token(ps, 2+isdotted)
271+
isassign = kind(t3) == K"=" && !preceding_whitespace(t3)
272+
end
273+
return (isdotted, isassign, t)
322274
end
323275

324276
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")

0 commit comments

Comments
 (0)