Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 36 additions & 31 deletions src/julia/julia_parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,54 @@ const INFIX_FLAG = RawFlags(1<<3)
const PREFIX_OP_FLAG = RawFlags(2<<3)
const POSTFIX_OP_FLAG = RawFlags(3<<3)

# The following flags are quite head-specific and may overlap
# The following flags are quite head-specific and may overlap with numeric flags

"""
Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ```
"""
const TRIPLE_STRING_FLAG = RawFlags(1<<5)
const TRIPLE_STRING_FLAG = RawFlags(1<<8)

"""
Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping
"""
const RAW_STRING_FLAG = RawFlags(1<<6)
const RAW_STRING_FLAG = RawFlags(1<<9)

"""
Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses
"""
const PARENS_FLAG = RawFlags(1<<5)
const PARENS_FLAG = RawFlags(1<<8)

"""
Set for various delimited constructs when they contains a trailing comma. For
example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where
this applies are: `tuple call dotcall macrocall vect curly braces <: >:`.
"""
const TRAILING_COMMA_FLAG = RawFlags(1<<6)
const TRAILING_COMMA_FLAG = RawFlags(1<<9)

"""
Set for K"quote" for the short form `:x` as opposed to long form `quote x end`
"""
const COLON_QUOTE = RawFlags(1<<5)
const COLON_QUOTE = RawFlags(1<<8)

"""
Set for K"toplevel" which is delimited by parentheses
"""
const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5)
const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<8)

"""
Set for K"function" in short form definitions such as `f() = 1`
"""
const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5)
const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<8)

"""
Set for K"struct" when mutable
"""
const MUTABLE_FLAG = RawFlags(1<<5)
const MUTABLE_FLAG = RawFlags(1<<8)

"""
Set for K"module" when it's not bare (`module`, not `baremodule`)
"""
const BARE_MODULE_FLAG = RawFlags(1<<5)
const BARE_MODULE_FLAG = RawFlags(1<<8)

# Flags holding the dimension of an nrow or other UInt8 not held in the source
# TODO: Given this is only used for nrow/ncat, we could actually use all the flags?
Expand Down Expand Up @@ -137,29 +137,34 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
is_postfix_op_call(head) && (str = str*"-post")

k = kind(head)
if k in KSet"string cmdstring Identifier"
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s")
has_flags(head, RAW_STRING_FLAG) && (str = str*"-r")
elseif k in KSet"tuple block macrocall"
has_flags(head, PARENS_FLAG) && (str = str*"-p")
elseif k == K"quote"
has_flags(head, COLON_QUOTE) && (str = str*"-:")
elseif k == K"toplevel"
has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;")
elseif k == K"function"
has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=")
elseif k == K"struct"
has_flags(head, MUTABLE_FLAG) && (str = str*"-mut")
elseif k == K"module"
has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare")
end
if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" &&
has_flags(head, TRAILING_COMMA_FLAG)
str *= "-,"
# Handle numeric flags for nrow/ncat nodes
if k in KSet"nrow ncat typed_ncat"
n = numeric_flags(head)
n != 0 && (str = str*"-"*string(n))
else
# Handle head-specific flags that overlap with numeric flags
if k in KSet"string cmdstring Identifier"
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s")
has_flags(head, RAW_STRING_FLAG) && (str = str*"-r")
elseif k in KSet"tuple block macrocall"
has_flags(head, PARENS_FLAG) && (str = str*"-p")
elseif k == K"quote"
has_flags(head, COLON_QUOTE) && (str = str*"-:")
elseif k == K"toplevel"
has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;")
elseif k == K"function"
has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=")
elseif k == K"struct"
has_flags(head, MUTABLE_FLAG) && (str = str*"-mut")
elseif k == K"module"
has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare")
end
if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" &&
has_flags(head, TRAILING_COMMA_FLAG)
str *= "-,"
end
end
is_suffixed(head) && (str = str*"-suf")
n = numeric_flags(head)
n != 0 && (str = str*"-"*string(n))
end
str
end
Expand Down
44 changes: 24 additions & 20 deletions src/julia/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,8 @@ function parse_where_chain(ps0::ParseState, mark)
# x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys)))))
m = position(ps)
bump(ps, TRIVIA_FLAG)
ckind, cflags = parse_cat(ps, K"}", ps.end_symbol)
emit_braces(ps, m, ckind, cflags)
ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol)
emit_braces(ps, m, ckind, cflags, dim)
emit(ps, mark, K"where")
else
# x where T ==> (where x T)
Expand Down Expand Up @@ -1589,7 +1589,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
# a [i] ==> (ref a (error-t) i)
bump_disallowed_space(ps)
bump(ps, TRIVIA_FLAG)
ckind, cflags = parse_cat(ParseState(ps, end_symbol=true),
ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true),
K"]", ps.end_symbol)
if is_macrocall
# @S[a,b] ==> (macrocall @S (vect a b))
Expand All @@ -1600,7 +1600,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
#v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b))
#v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b)))
fix_macro_name_kind!(ps, macro_name_position)
emit(ps, m, ckind, cflags)
emit(ps, m, ckind, cflags | set_numeric_flags(dim))
check_ncat_compat(ps, m, ckind)
emit(ps, mark, K"macrocall")
is_macrocall = false
Expand All @@ -1621,7 +1621,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
ckind == K"comprehension" ? K"typed_comprehension" :
ckind == K"ncat" ? K"typed_ncat" :
internal_error("unrecognized kind in parse_cat ", string(ckind))
emit(ps, mark, outk, cflags)
emit(ps, mark, outk, cflags | set_numeric_flags(dim))
check_ncat_compat(ps, mark, ckind)
end
elseif k == K"."
Expand Down Expand Up @@ -2840,7 +2840,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
if binding_power == typemin(Int)
# [x@y ==> (hcat x (error-t ✘ y))
bump_closing_token(ps, closer)
return (K"hcat", EMPTY_FLAGS)
return (K"hcat", 0)
end
while true
(next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order)
Expand All @@ -2856,9 +2856,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
binding_power = next_bp
end
bump_closing_token(ps, closer)
return binding_power == -1 ? (K"vcat", EMPTY_FLAGS) :
binding_power == 0 ? (K"hcat", EMPTY_FLAGS) :
(K"ncat", set_numeric_flags(dim))
return binding_power == -1 ? (K"vcat", 0) :
binding_power == 0 ? (K"hcat", 0) :
(K"ncat", dim)
end

# Parse equal and ascending precedence chains of array concatenation operators -
Expand Down Expand Up @@ -3012,7 +3012,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
mark = position(ps)
if k == closer
# [] ==> (vect)
return parse_vect(ps, closer, false)
ckind, cflags = parse_vect(ps, closer, false)
return (ckind, cflags, 0)
elseif k == K";"
#v1.8: [;] ==> (ncat-1)
#v1.8: [;;] ==> (ncat-2)
Expand All @@ -3022,7 +3023,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
dim, _ = parse_array_separator(ps, Ref(:unknown))
min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax")
bump_closing_token(ps, closer)
return (K"ncat", set_numeric_flags(dim))
return (K"ncat", EMPTY_FLAGS, dim)
end
parse_eq_star(ps)
k = peek(ps, skip_newlines=true)
Expand All @@ -3035,15 +3036,18 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
# [x] ==> (vect x)
# [x \n ] ==> (vect x)
# [x ==> (vect x (error-t))
parse_vect(ps, closer, prefix_trailing_comma)
ckind, cflags = parse_vect(ps, closer, prefix_trailing_comma)
return (ckind, cflags, 0)
elseif k == K"for"
# [x for a in as] ==> (comprehension (generator x (iteration (in a as))))
# [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as))))
parse_comprehension(ps, mark, closer)
ckind, cflags = parse_comprehension(ps, mark, closer)
return (ckind, cflags, 0)
else
# [x y] ==> (hcat x y)
# and other forms; See parse_array.
parse_array(ps, mark, closer, end_is_symbol)
ckind, dim = parse_array(ps, mark, closer, end_is_symbol)
return (ckind, EMPTY_FLAGS, dim)
end
end

Expand Down Expand Up @@ -3448,13 +3452,13 @@ function parse_string(ps::ParseState, raw::Bool)
emit(ps, mark, string_kind, str_flags)
end

function emit_braces(ps, mark, ckind, cflags)
function emit_braces(ps, mark, ckind, cflags, dim=0)
if ckind == K"hcat"
# {x y} ==> (bracescat (row x y))
emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG)
elseif ckind == K"ncat"
# {x ;;; y} ==> (bracescat (nrow-3 x y))
emit(ps, mark, K"nrow", cflags & ~TRAILING_COMMA_FLAG)
emit(ps, mark, K"nrow", set_numeric_flags(dim))
end
check_ncat_compat(ps, mark, ckind)
outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat"
Expand Down Expand Up @@ -3638,13 +3642,13 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
parse_paren(ps, check_identifiers, has_unary_prefix)
elseif leading_kind == K"[" # cat expression
bump(ps, TRIVIA_FLAG)
ckind, cflags = parse_cat(ps, K"]", ps.end_symbol)
emit(ps, mark, ckind, cflags)
ckind, cflags, dim = parse_cat(ps, K"]", ps.end_symbol)
emit(ps, mark, ckind, cflags | set_numeric_flags(dim))
check_ncat_compat(ps, mark, ckind)
elseif leading_kind == K"{" # cat expression
bump(ps, TRIVIA_FLAG)
ckind, cflags = parse_cat(ps, K"}", ps.end_symbol)
emit_braces(ps, mark, ckind, cflags)
ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol)
emit_braces(ps, mark, ckind, cflags, dim)
elseif leading_kind == K"@" # macro call
# Macro names can be keywords
# @end x ==> (macrocall @end x)
Expand Down
7 changes: 7 additions & 0 deletions test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,8 @@ tests = [
"{x,y,}" => "(braces-, x y)"
"{x y}" => "(bracescat (row x y))"
((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))"
((v=v"1.7",), "{a ;; b}") => "(bracescat (nrow-2 a b))"
((v=v"1.7",), "{a ;;;; b}") => "(bracescat (nrow-4 a b))"
# Macro names can be keywords
"@end x" => "(macrocall @end x)"
# __dot__ macro
Expand Down Expand Up @@ -929,6 +931,11 @@ tests = [
# Column major
((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") =>
"(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))"
# Dimension 4 ncat
((v=v"1.7",), "[x ;;;; y]") => "(ncat-4 x y)"
((v=v"1.7",), "[a ; b ;;;; c ; d]") => "(ncat-4 (nrow-1 a b) (nrow-1 c d))"
((v=v"1.7",), "[a b ; c d ;;;; e f ; g h]") =>
"(ncat-4 (nrow-1 (row a b) (row c d)) (nrow-1 (row e f) (row g h)))"
# Array separators
# Newlines before semicolons are not significant
"[a \n ;]" => "(vcat a)"
Expand Down
Loading