Skip to content

Commit 700101e

Browse files
authored
Record fixity of call type in flags (#124)
We now record which precise call syntax was used out of the four options: * Prefix calls with parens * Prefix operator calls * Infix operator calls * Postfix operator calls This allows us to distinguish keyword arguments from assignment, fixing several bugs with = to kw conversion. Also, change to emit unadorned postfix adjoint as `(call-post x ')` rather than as a syntactic operator `(' x)`, for consistency with suffixed versions like `x'ᵀ`.
1 parent 384f745 commit 700101e

File tree

5 files changed

+125
-102
lines changed

5 files changed

+125
-102
lines changed

src/expr.jl

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -99,46 +99,46 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true,
9999
args[1] = _to_expr(node_args[1], need_linenodes=false)
100100
args[2] = _to_expr(node_args[2])
101101
else
102-
eq_to_kw = headsym == :call && !has_flags(node, INFIX_FLAG) ||
103-
headsym == :ref ||
104-
(headsym == :parameters && !inside_vect_or_braces) ||
105-
(headsym == :tuple && inside_dot_expr)
102+
eq_to_kw_in_call =
103+
headsym == :call && is_prefix_call(node) ||
104+
headsym == :ref
105+
eq_to_kw_all = headsym == :parameters && !inside_vect_or_braces ||
106+
(headsym == :tuple && inside_dot_expr)
106107
in_dot = headsym == :.
107108
in_vb = headsym == :vect || headsym == :braces
108-
if insert_linenums
109-
if isempty(node_args)
110-
push!(args, source_location(LineNumberNode, node.source, node.position))
111-
else
112-
for i in 1:length(node_args)
113-
n = node_args[i]
114-
args[2*i-1] = source_location(LineNumberNode, n.source, n.position)
115-
args[2*i] = _to_expr(n,
116-
eq_to_kw=eq_to_kw,
117-
inside_dot_expr=in_dot,
118-
inside_vect_or_braces=in_vb)
119-
end
120-
end
109+
if insert_linenums && isempty(node_args)
110+
push!(args, source_location(LineNumberNode, node.source, node.position))
121111
else
122112
for i in 1:length(node_args)
123-
args[i] = _to_expr(node_args[i],
124-
eq_to_kw=eq_to_kw,
125-
inside_dot_expr=in_dot,
126-
inside_vect_or_braces=in_vb)
113+
n = node_args[i]
114+
if insert_linenums
115+
args[2*i-1] = source_location(LineNumberNode, n.source, n.position)
116+
end
117+
eq_to_kw = eq_to_kw_in_call && i > 1 || eq_to_kw_all
118+
args[insert_linenums ? 2*i : i] =
119+
_to_expr(n, eq_to_kw=eq_to_kw,
120+
inside_dot_expr=in_dot,
121+
inside_vect_or_braces=in_vb)
127122
end
128123
end
129124
end
130-
# Julia's standard `Expr` ASTs have children stored in a canonical
131-
# order which is often not always source order. We permute the children
132-
# here as necessary to get the canonical order.
133-
if is_infix(node.raw)
134-
args[2], args[1] = args[1], args[2]
135-
end
136125

137126
# Special cases for various expression heads
138127
loc = source_location(LineNumberNode, node.source, node.position)
139128
if headsym == :macrocall
140129
insert!(args, 2, loc)
141130
elseif headsym in (:call, :ref)
131+
# Julia's standard `Expr` ASTs have children stored in a canonical
132+
# order which is often not always source order. We permute the children
133+
# here as necessary to get the canonical order.
134+
if is_infix_op_call(node) || is_postfix_op_call(node)
135+
args[2], args[1] = args[1], args[2]
136+
end
137+
# Lower (call x ') to special ' head
138+
if is_postfix_op_call(node) && args[1] == Symbol("'")
139+
popfirst!(args)
140+
headsym = Symbol("'")
141+
end
142142
# Move parameters block to args[2]
143143
if length(args) > 1 && Meta.isexpr(args[end], :parameters)
144144
insert!(args, 2, args[end])

src/parse_stream.jl

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,25 @@
55
# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias?
66
const RawFlags = UInt16
77
const EMPTY_FLAGS = RawFlags(0)
8+
# Applied to tokens which are syntax trivia after parsing
89
const TRIVIA_FLAG = RawFlags(1<<0)
9-
# Some of the following flags are head-specific and could probably be allowed
10-
# to cover the same bits...
11-
const INFIX_FLAG = RawFlags(1<<1)
12-
# Record whether syntactic operators were dotted
13-
const DOTOP_FLAG = RawFlags(1<<2)
10+
11+
# Record whether operators are dotted
12+
const DOTOP_FLAG = RawFlags(1<<1)
13+
# Record whether operator has a suffix
14+
const SUFFIXED_FLAG = RawFlags(1<<2)
15+
16+
# Distinguish various syntaxes which are mapped to K"call"
17+
const PREFIX_CALL_FLAG = RawFlags(0<<3)
18+
const INFIX_FLAG = RawFlags(1<<3)
19+
const PREFIX_OP_FLAG = RawFlags(2<<3)
20+
const POSTFIX_OP_FLAG = RawFlags(3<<3)
21+
22+
# The next two bits could overlap with the previous two if necessary
1423
# Set when kind == K"String" was triple-delimited as with """ or ```
15-
const TRIPLE_STRING_FLAG = RawFlags(1<<3)
24+
const TRIPLE_STRING_FLAG = RawFlags(1<<5)
1625
# Set when a string or identifier needs "raw string" unescaping
17-
const RAW_STRING_FLAG = RawFlags(1<<4)
18-
# Record whether operator has a suffix
19-
const SUFFIXED_FLAG = RawFlags(1<<6)
26+
const RAW_STRING_FLAG = RawFlags(1<<6)
2027

2128
# Token-only flag
2229
# Record whether a token had preceding whitespace
@@ -34,6 +41,10 @@ function set_numeric_flags(n::Integer)
3441
f
3542
end
3643

44+
function call_type_flags(f::RawFlags)
45+
f & 0b11000
46+
end
47+
3748
function numeric_flags(f::RawFlags)
3849
Int((f >> 8) % UInt8)
3950
end
@@ -70,7 +81,9 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
7081
if include_flag_suff && suffix_flags != EMPTY_FLAGS
7182
str = str*"-"
7283
is_trivia(head) && (str = str*"t")
73-
is_infix(head) && (str = str*"i")
84+
is_infix_op_call(head) && (str = str*"i")
85+
is_prefix_op_call(head) && (str = str*"pre")
86+
is_postfix_op_call(head) && (str = str*"post")
7487
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s")
7588
has_flags(head, RAW_STRING_FLAG) && (str = str*"r")
7689
is_suffixed(head) && (str = str*"S")
@@ -90,8 +103,13 @@ flags(x) = flags(head(x))
90103

91104
# Predicates based on flags()
92105
has_flags(x, test_flags) = has_flags(flags(x), test_flags)
106+
call_type_flags(x) = call_type_flags(flags(x))
107+
93108
is_trivia(x) = has_flags(x, TRIVIA_FLAG)
94-
is_infix(x) = has_flags(x, INFIX_FLAG)
109+
is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG
110+
is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG
111+
is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG
112+
is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
95113
is_dotted(x) = has_flags(x, DOTOP_FLAG)
96114
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
97115
is_decorated(x) = is_dotted(x) || is_suffixed(x)

src/parser.jl

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
544544
if k == K"~"
545545
if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2))
546546
# Unary ~ in space sensitive context is not assignment precedence
547-
# [a ~b] ==> (hcat a (call ~ b))
547+
# [a ~b] ==> (hcat a (call-pre ~ b))
548548
return
549549
end
550550
# ~ is the only non-syntactic assignment-precedence operator.
@@ -885,8 +885,8 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
885885
is_both_unary_and_binary(t) &&
886886
!preceding_whitespace(peek_token(ps, 2))
887887
# The following is two elements of a hcat
888-
# [x +y] ==> (hcat x (call + y))
889-
# [x+y +z] ==> (hcat (call-i x + y) (call + z))
888+
# [x +y] ==> (hcat x (call-pre + y))
889+
# [x+y +z] ==> (hcat (call-i x + y) (call-pre + z))
890890
# Conversely the following are infix calls
891891
# [x +₁y] ==> (vect (call-i x +₁ y))
892892
# [x+y+z] ==> (vect (call-i x + y z))
@@ -914,7 +914,7 @@ function parse_chain(ps::ParseState, down, op_kind)
914914
if ps.space_sensitive && preceding_whitespace(t) &&
915915
is_both_unary_and_binary(t) &&
916916
!preceding_whitespace(peek_token(ps, 2))
917-
# [x +y] ==> (hcat x (call + y))
917+
# [x +y] ==> (hcat x (call-pre + y))
918918
break
919919
end
920920
bump(ps, TRIVIA_FLAG)
@@ -948,16 +948,16 @@ function parse_unary_subtype(ps::ParseState)
948948
elseif k2 in KSet"{ ("
949949
# parse <:{T}(x::T) or <:(x::T) like other unary operators
950950
# <:{T}(x::T) ==> (call (curly <: T) (:: x T))
951-
# <:(x::T) ==> (<: (:: x T))
951+
# <:(x::T) ==> (<:-pre (:: x T))
952952
parse_where(ps, parse_juxtapose)
953953
else
954-
# <: A where B ==> (<: (where A B))
954+
# <: A where B ==> (<:-pre (where A B))
955955
mark = position(ps)
956956
bump(ps, TRIVIA_FLAG)
957957
parse_where(ps, parse_juxtapose)
958958
# Flisp parser handled this, but I don't know how it can happen...
959959
@check peek_behind(ps).kind != K"tuple"
960-
emit(ps, mark, k)
960+
emit(ps, mark, k, PREFIX_OP_FLAG)
961961
end
962962
else
963963
parse_where(ps, parse_juxtapose)
@@ -1015,7 +1015,7 @@ function is_juxtapose(ps, prev_k, t)
10151015
# Not juxtaposition - parse_juxtapose will consume only the first token.
10161016
# x.3 ==> x
10171017
# sqrt(2)2 ==> (call sqrt 2)
1018-
# x' y ==> x
1018+
# x' y ==> (call-post x ')
10191019
# x 'y ==> x
10201020

10211021
return !preceding_whitespace(t) &&
@@ -1039,7 +1039,7 @@ end
10391039
# 2(x) ==> (call-i 2 * x)
10401040
# (2)(3)x ==> (call-i 2 * 3 x)
10411041
# (x-1)y ==> (call-i (call-i x - 1) * y)
1042-
# x'y ==> x
1042+
# x'y ==> (call-i (call-post x ') * y)
10431043
#
10441044
# flisp: parse-juxtapose
10451045
function parse_juxtapose(ps::ParseState)
@@ -1098,11 +1098,11 @@ function parse_unary(ps::ParseState)
10981098
if is_prec_power(k3) || k3 in KSet"[ {"
10991099
# `[`, `{` (issue #18851) and `^` have higher precedence than
11001100
# unary negation
1101-
# -2^x ==> (call - (call-i 2 ^ x))
1102-
# -2[1, 3] ==> (call - (ref 2 1 3))
1101+
# -2^x ==> (call-pre - (call-i 2 ^ x))
1102+
# -2[1, 3] ==> (call-pre - (ref 2 1 3))
11031103
bump(ps)
11041104
parse_factor(ps)
1105-
emit(ps, mark, K"call")
1105+
emit(ps, mark, K"call", PREFIX_OP_FLAG)
11061106
else
11071107
# We have a signed numeric literal. Glue the operator to the
11081108
# next token to create a signed literal:
@@ -1115,17 +1115,17 @@ function parse_unary(ps::ParseState)
11151115
end
11161116
end
11171117
# Things which are not quite negative literals result in a unary call instead
1118-
# -0x1 ==> (call - 0x01)
1119-
# - 2 ==> (call - 2)
1120-
# .-2 ==> (call .- 2)
1118+
# -0x1 ==> (call-pre - 0x01)
1119+
# - 2 ==> (call-pre - 2)
1120+
# .-2 ==> (call-pre .- 2)
11211121
parse_unary_call(ps)
11221122
end
11231123

11241124
# Parse calls to unary operators and prefix calls involving arbitrary operators
11251125
# with bracketed arglists (as opposed to infix notation)
11261126
#
1127-
# +a ==> (call + a)
1128-
# +(a,b) ==> (call + a b)
1127+
# +a ==> (call-pre + a)
1128+
# +(a,b) ==> (call-pre + a b)
11291129
#
11301130
# flisp: parse-unary-call
11311131
function parse_unary_call(ps::ParseState)
@@ -1208,33 +1208,33 @@ function parse_unary_call(ps::ParseState)
12081208
else
12091209
# Unary function calls with brackets as grouping, not an arglist
12101210
if opts.is_block
1211-
# +(a;b) ==> (call + (block a b))
1211+
# +(a;b) ==> (call-pre + (block a b))
12121212
emit(ps, mark_before_paren, K"block")
12131213
end
12141214
# Not a prefix operator call but a block; `=` is not `kw`
1215-
# +(a=1) ==> (call + (= a 1))
1215+
# +(a=1) ==> (call-pre + (= a 1))
12161216
# Unary operators have lower precedence than ^
1217-
# +(a)^2 ==> (call + (call-i a ^ 2))
1218-
# +(a)(x,y)^2 ==> (call + (call-i (call a x y) ^ 2))
1217+
# +(a)^2 ==> (call-pre + (call-i a ^ 2))
1218+
# +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2))
12191219
parse_call_chain(ps, mark_before_paren)
12201220
parse_factor_with_initial_ex(ps, mark_before_paren)
1221-
emit(ps, mark, op_node_kind)
1221+
emit(ps, mark, op_node_kind, PREFIX_OP_FLAG)
12221222
end
12231223
else
12241224
if is_unary_op(op_t)
12251225
# Normal unary calls
1226-
# +x ==> (call + x)
1227-
# √x ==> (call √ x)
1228-
# ±x ==> (call ± x)
1226+
# +x ==> (call-pre + x)
1227+
# √x ==> (call-pre √ x)
1228+
# ±x ==> (call-pre ± x)
12291229
bump(ps, op_tok_flags)
12301230
else
1231-
# /x ==> (call (error /) x)
1232-
# +₁ x ==> (call (error +₁) x)
1233-
# .<: x ==> (call (error .<:) x)
1231+
# /x ==> (call-pre (error /) x)
1232+
# +₁ x ==> (call-pre (error +₁) x)
1233+
# .<: x ==> (call-pre (error .<:) x)
12341234
bump(ps, error="not a unary operator")
12351235
end
12361236
parse_unary(ps)
1237-
emit(ps, mark, op_node_kind)
1237+
emit(ps, mark, op_node_kind, PREFIX_OP_FLAG)
12381238
end
12391239
end
12401240

@@ -1433,6 +1433,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14331433
finish_macroname(ps, mark, valid_macroname, macro_name_position)
14341434
end
14351435
# f(a,b) ==> (call f a b)
1436+
# f(a; b=1) ==> (call f a (parameters (b 1)))
1437+
# (a=1)() ==> (call (= a 1))
14361438
# f (a) ==> (call f (error-t) a b)
14371439
bump_disallowed_space(ps)
14381440
bump(ps, TRIVIA_FLAG)
@@ -1457,6 +1459,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14571459
K"]", ps.end_symbol)
14581460
# a[i] ==> (ref a i)
14591461
# a[i,j] ==> (ref a i j)
1462+
# (a=1)[] ==> (ref (= a 1))
14601463
# T[x y] ==> (typed_hcat T x y)
14611464
# T[x ; y] ==> (typed_vcat T x y)
14621465
# T[a b; c d] ==> (typed_vcat T (row a b) (row c d))
@@ -1562,15 +1565,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15621565
this_iter_valid_macroname = true
15631566
end
15641567
elseif k == K"'" && !preceding_whitespace(t)
1565-
if !is_suffixed(t)
1566-
# f' ==> (' f)
1567-
bump(ps, TRIVIA_FLAG)
1568-
emit(ps, mark, k)
1569-
else
1570-
# f'ᵀ ==> (call 'ᵀ f)
1571-
bump(ps)
1572-
emit(ps, mark, K"call", INFIX_FLAG)
1573-
end
1568+
# f' ==> (call-post f ')
1569+
# f'ᵀ ==> (call-post f 'ᵀ)
1570+
bump(ps)
1571+
emit(ps, mark, K"call", POSTFIX_OP_FLAG)
15741572
elseif k == K"{"
15751573
# Type parameter curlies and macro calls
15761574
if is_macrocall

src/tokenize.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,6 @@ function lex_backslash(l::Lexer)
945945
return emit(l, K"\\")
946946
end
947947

948-
# TODO .op
949948
function lex_dot(l::Lexer)
950949
if accept(l, '.')
951950
if accept(l, '.')

0 commit comments

Comments
 (0)