Skip to content

Remove separate syntax heads for each operator #575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: kf/dots
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ JuliaSyntax.is_infix_op_call
JuliaSyntax.is_prefix_op_call
JuliaSyntax.is_postfix_op_call
JuliaSyntax.is_dotted
JuliaSyntax.is_suffixed
JuliaSyntax.is_decorated
JuliaSyntax.numeric_flags
```
Expand Down
7 changes: 5 additions & 2 deletions src/JuliaSyntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ export SourceFile
@_public source_line_range

# Expression predicates, kinds and flags
export @K_str, kind
export @K_str, kind, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
export @K_str, kind, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT,
export @K_str, kind
@_public PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT,

Just for consistency with the rest of the detailed API (eg, things like is_trivia(), *_FLAGS etc), let's leave these public not exported.

I'm open to exporting more symbols, but we should be consistent about it.

PREC_PAIRARROW, PREC_CONDITIONAL, PREC_ARROW, PREC_LAZYOR, PREC_LAZYAND,
PREC_COMPARISON, PREC_PIPE_LT, PREC_PIPE_GT, PREC_COLON, PREC_PLUS,
PREC_BITSHIFT, PREC_TIMES, PREC_RATIONAL, PREC_POWER, PREC_DECL,
PREC_WHERE, PREC_DOT, PREC_QUOTE, PREC_UNICODE_OPS, PREC_COMPOUND_ASSIGN, generic_operators_by_level
@_public Kind

@_public flags,
Expand All @@ -53,7 +57,6 @@ export @K_str, kind
is_prefix_op_call,
is_postfix_op_call,
is_dotted,
is_suffixed,
is_decorated,
numeric_flags,
has_flags,
Expand Down
7 changes: 5 additions & 2 deletions src/core/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ kind(head::SyntaxHead) = head.kind

Return the flag bits of a syntactic construct. Prefer to query these with the
predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`,
`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`,
`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`,
`is_decorated`.

Or extract numeric portion of the flags with `numeric_flags`.
Expand Down Expand Up @@ -376,7 +376,10 @@ function _buffer_lookahead_tokens(lexer, lookahead)
was_whitespace = is_whitespace(k)
had_whitespace |= was_whitespace
f = EMPTY_FLAGS
raw.suffix && (f |= SUFFIXED_FLAG)
if k == K"Operator" && raw.op_precedence != Tokenize.PREC_NONE
# Store operator precedence in numeric flags
f |= set_numeric_flags(Int(raw.op_precedence))
end
push!(lookahead, SyntaxToken(SyntaxHead(k, f), k,
had_whitespace, raw.endbyte + 2))
token_count += 1
Expand Down
39 changes: 25 additions & 14 deletions src/integration/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -338,20 +338,31 @@ end
return adjust_macro_name!(retexpr.args[1], k)
elseif k == K"?"
retexpr.head = :if
elseif k == K"op=" && length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
elseif k == K".op=" && length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = '.' * string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
elseif k == K"dots"
n = numeric_flags(flags(nodehead))
return n == 2 ? :(..) : :(...)
elseif k == K"op="
if length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
elseif length(args) == 1
return Symbol(string(args[1], '='))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs a note in the API docs about the new 1-arg form of K"op=" and K".op=".

end
elseif k == K".op="
if length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = '.' * string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
else
return Symbol(string('.', args[1], '='))
end
elseif k == K"macrocall"
if length(args) >= 2
a2 = args[2]
Expand Down
83 changes: 20 additions & 63 deletions src/julia/julia_parse_stream.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# Token flags - may be set for operator kinded tokens
# Operator has a suffix
const SUFFIXED_FLAG = RawFlags(1<<2)

# Set for K"call", K"dotcall" or any syntactic operator heads
# Distinguish various syntaxes which are mapped to K"call"
const PREFIX_CALL_FLAG = RawFlags(0<<3)
Expand Down Expand Up @@ -110,15 +106,6 @@ Return true for postfix operator calls such as the `'ᵀ` call node parsed from
"""
is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG


"""
is_suffixed(x)

Return true for operators which have suffixes, such as `+₁`
"""
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)


"""
numeric_flags(x)

Expand All @@ -137,8 +124,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
is_postfix_op_call(head) && (str = str*"-post")

k = kind(head)
# Handle numeric flags for nrow/ncat nodes
if k in KSet"nrow ncat typed_ncat"
# Handle numeric flags for nodes that take them
if k in KSet"nrow ncat typed_ncat dots"
n = numeric_flags(head)
n != 0 && (str = str*"-"*string(n))
else
Expand All @@ -164,7 +151,6 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
str *= "-,"
end
end
is_suffixed(head) && (str = str*"-suf")
end
str
end
Expand Down Expand Up @@ -262,67 +248,38 @@ function validate_tokens(stream::ParseStream)
sort!(stream.diagnostics, by=first_byte)
end

"""
bump_split(stream, token_spec1, [token_spec2 ...])

Bump the next token, splitting it into several pieces

Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
If all `nbyte` are positive, the sum must equal the token length. If one
`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
all `nbyte` must equal zero.

This is a hack which helps resolves the occasional lexing ambiguity. For
example
* Whether .+ should be a single token or the composite (. +) which is used for
standalone operators.
* Whether ... is splatting (most of the time) or three . tokens in import paths

TODO: Are these the only cases? Can we replace this general utility with a
simpler one which only splits preceding dots?
"""
function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
tok = stream.lookahead[stream.lookahead_index]
stream.lookahead_index += 1
start_b = _next_byte(stream)
toklen = tok.next_byte - start_b
prev_b = start_b
for (i, (nbyte, k, f)) in enumerate(split_spec)
h = SyntaxHead(k, f)
actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
orig_k = k == K"." ? K"." : kind(tok)
node = RawGreenNode(h, actual_nbyte, orig_k)
push!(stream.output, node)
prev_b += actual_nbyte
stream.next_byte += actual_nbyte
end
@assert tok.next_byte == prev_b
stream.peek_count = 0
return position(stream)
end

function peek_dotted_op_token(ps, allow_whitespace=false)
# Peek the next token, but if it is a dot, peek the next one as well
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I right in thinking this is never called with allow_whitespace set to true? If so we should delete this parameter.

t = peek_token(ps)
isdotted = kind(t) == K"."
if isdotted
t2 = peek_token(ps, 2)
if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2))
if (!allow_whitespace && preceding_whitespace(t2))
isdotted = false
elseif !is_operator(t2)
isdotted = false
elseif kind(t2) == K"." && peek(ps, 3) == K"."
# Treat `..` as dotted K".", unless there's another dot after
isdotted = false
else
t = t2
end
end
return (isdotted, t)
isassign = false
if !allow_whitespace && is_operator(t)
t3 = peek_token(ps, 2+isdotted)
isassign = kind(t3) == K"=" && !preceding_whitespace(t3)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is often unnecessary work because the isassign output is ignored by most parsing functions. I'm not quite sure if that matters.

end
return (isdotted, isassign, t)
end

function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
if isdot
if emit_dot_node
dotmark = position(ps)
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
else
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
dotmark = position(ps)
bump(ps, TRIVIA_FLAG)
if kind(t) == K"."
bump(ps, TRIVIA_FLAG)
return emit(ps, dotmark, K"dots", set_numeric_flags(2))
end
end
pos = bump(ps, flags, remap_kind=remap_kind)
Expand Down
Loading
Loading