-
-
Notifications
You must be signed in to change notification settings - Fork 38
Remove separate syntax heads for each operator #575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: kf/dots
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -338,20 +338,31 @@ end | |
return adjust_macro_name!(retexpr.args[1], k) | ||
elseif k == K"?" | ||
retexpr.head = :if | ||
elseif k == K"op=" && length(args) == 3 | ||
lhs = args[1] | ||
op = args[2] | ||
rhs = args[3] | ||
headstr = string(args[2], '=') | ||
retexpr.head = Symbol(headstr) | ||
retexpr.args = Any[lhs, rhs] | ||
elseif k == K".op=" && length(args) == 3 | ||
lhs = args[1] | ||
op = args[2] | ||
rhs = args[3] | ||
headstr = '.' * string(args[2], '=') | ||
retexpr.head = Symbol(headstr) | ||
retexpr.args = Any[lhs, rhs] | ||
elseif k == K"dots" | ||
n = numeric_flags(flags(nodehead)) | ||
return n == 2 ? :(..) : :(...) | ||
elseif k == K"op=" | ||
if length(args) == 3 | ||
lhs = args[1] | ||
op = args[2] | ||
rhs = args[3] | ||
headstr = string(args[2], '=') | ||
retexpr.head = Symbol(headstr) | ||
retexpr.args = Any[lhs, rhs] | ||
elseif length(args) == 1 | ||
return Symbol(string(args[1], '=')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs a note in the API docs about the new 1-arg form of |
||
end | ||
elseif k == K".op=" | ||
if length(args) == 3 | ||
lhs = args[1] | ||
op = args[2] | ||
rhs = args[3] | ||
headstr = '.' * string(args[2], '=') | ||
retexpr.head = Symbol(headstr) | ||
retexpr.args = Any[lhs, rhs] | ||
else | ||
return Symbol(string('.', args[1], '=')) | ||
end | ||
elseif k == K"macrocall" | ||
if length(args) >= 2 | ||
a2 = args[2] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,3 @@ | ||
# Token flags - may be set for operator kinded tokens | ||
# Operator has a suffix | ||
const SUFFIXED_FLAG = RawFlags(1<<2) | ||
|
||
# Set for K"call", K"dotcall" or any syntactic operator heads | ||
# Distinguish various syntaxes which are mapped to K"call" | ||
const PREFIX_CALL_FLAG = RawFlags(0<<3) | ||
|
@@ -110,15 +106,6 @@ Return true for postfix operator calls such as the `'ᵀ` call node parsed from | |
""" | ||
is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG | ||
|
||
|
||
""" | ||
is_suffixed(x) | ||
|
||
Return true for operators which have suffixes, such as `+₁` | ||
""" | ||
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) | ||
|
||
|
||
""" | ||
numeric_flags(x) | ||
|
||
|
@@ -137,8 +124,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) | |
is_postfix_op_call(head) && (str = str*"-post") | ||
|
||
k = kind(head) | ||
# Handle numeric flags for nrow/ncat nodes | ||
if k in KSet"nrow ncat typed_ncat" | ||
# Handle numeric flags for nodes that take them | ||
if k in KSet"nrow ncat typed_ncat dots" | ||
n = numeric_flags(head) | ||
n != 0 && (str = str*"-"*string(n)) | ||
else | ||
|
@@ -164,7 +151,6 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) | |
str *= "-," | ||
end | ||
end | ||
is_suffixed(head) && (str = str*"-suf") | ||
end | ||
str | ||
end | ||
|
@@ -262,67 +248,38 @@ function validate_tokens(stream::ParseStream) | |
sort!(stream.diagnostics, by=first_byte) | ||
end | ||
|
||
""" | ||
bump_split(stream, token_spec1, [token_spec2 ...]) | ||
|
||
Bump the next token, splitting it into several pieces | ||
|
||
Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. | ||
If all `nbyte` are positive, the sum must equal the token length. If one | ||
`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of | ||
all `nbyte` must equal zero. | ||
|
||
This is a hack which helps resolves the occasional lexing ambiguity. For | ||
example | ||
* Whether .+ should be a single token or the composite (. +) which is used for | ||
standalone operators. | ||
* Whether ... is splatting (most of the time) or three . tokens in import paths | ||
|
||
TODO: Are these the only cases? Can we replace this general utility with a | ||
simpler one which only splits preceding dots? | ||
""" | ||
function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} | ||
tok = stream.lookahead[stream.lookahead_index] | ||
stream.lookahead_index += 1 | ||
start_b = _next_byte(stream) | ||
toklen = tok.next_byte - start_b | ||
prev_b = start_b | ||
for (i, (nbyte, k, f)) in enumerate(split_spec) | ||
h = SyntaxHead(k, f) | ||
actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte | ||
orig_k = k == K"." ? K"." : kind(tok) | ||
node = RawGreenNode(h, actual_nbyte, orig_k) | ||
push!(stream.output, node) | ||
prev_b += actual_nbyte | ||
stream.next_byte += actual_nbyte | ||
end | ||
@assert tok.next_byte == prev_b | ||
stream.peek_count = 0 | ||
return position(stream) | ||
end | ||
|
||
function peek_dotted_op_token(ps, allow_whitespace=false) | ||
# Peek the next token, but if it is a dot, peek the next one as well | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am I right in thinking this is never called with |
||
t = peek_token(ps) | ||
isdotted = kind(t) == K"." | ||
if isdotted | ||
t2 = peek_token(ps, 2) | ||
if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2)) | ||
if (!allow_whitespace && preceding_whitespace(t2)) | ||
isdotted = false | ||
elseif !is_operator(t2) | ||
isdotted = false | ||
elseif kind(t2) == K"." && peek(ps, 3) == K"." | ||
# Treat `..` as dotted K".", unless there's another dot after | ||
isdotted = false | ||
else | ||
t = t2 | ||
end | ||
end | ||
return (isdotted, t) | ||
isassign = false | ||
if !allow_whitespace && is_operator(t) | ||
t3 = peek_token(ps, 2+isdotted) | ||
isassign = kind(t3) == K"=" && !preceding_whitespace(t3) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is often unnecessary work because the |
||
end | ||
return (isdotted, isassign, t) | ||
end | ||
|
||
function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") | ||
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") | ||
if isdot | ||
if emit_dot_node | ||
dotmark = position(ps) | ||
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG | ||
else | ||
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG | ||
dotmark = position(ps) | ||
bump(ps, TRIVIA_FLAG) | ||
if kind(t) == K"." | ||
bump(ps, TRIVIA_FLAG) | ||
return emit(ps, dotmark, K"dots", set_numeric_flags(2)) | ||
end | ||
end | ||
pos = bump(ps, flags, remap_kind=remap_kind) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just for consistency with the rest of the detailed API (eg, things like
is_trivia()
,*_FLAGS
etc), let's leave these public not exported.I'm open to exporting more symbols, but we should be consistent about it.