diff --git a/src/julia/julia_parse_stream.jl b/src/julia/julia_parse_stream.jl index fc3eac28..87ad0386 100644 --- a/src/julia/julia_parse_stream.jl +++ b/src/julia/julia_parse_stream.jl @@ -9,54 +9,54 @@ const INFIX_FLAG = RawFlags(1<<3) const PREFIX_OP_FLAG = RawFlags(2<<3) const POSTFIX_OP_FLAG = RawFlags(3<<3) -# The following flags are quite head-specific and may overlap +# The following flags are quite head-specific and may overlap with numeric flags """ Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` """ -const TRIPLE_STRING_FLAG = RawFlags(1<<5) +const TRIPLE_STRING_FLAG = RawFlags(1<<8) """ Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping """ -const RAW_STRING_FLAG = RawFlags(1<<6) +const RAW_STRING_FLAG = RawFlags(1<<9) """ Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses """ -const PARENS_FLAG = RawFlags(1<<5) +const PARENS_FLAG = RawFlags(1<<8) """ Set for various delimited constructs when they contains a trailing comma. For example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. """ -const TRAILING_COMMA_FLAG = RawFlags(1<<6) +const TRAILING_COMMA_FLAG = RawFlags(1<<9) """ Set for K"quote" for the short form `:x` as opposed to long form `quote x end` """ -const COLON_QUOTE = RawFlags(1<<5) +const COLON_QUOTE = RawFlags(1<<8) """ Set for K"toplevel" which is delimited by parentheses """ -const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) +const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<8) """ Set for K"function" in short form definitions such as `f() = 1` """ -const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) +const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<8) """ Set for K"struct" when mutable """ -const MUTABLE_FLAG = RawFlags(1<<5) +const MUTABLE_FLAG = RawFlags(1<<8) """ Set for K"module" when it's not bare (`module`, not `baremodule`) """ -const BARE_MODULE_FLAG = RawFlags(1<<5) +const BARE_MODULE_FLAG = RawFlags(1<<8) # Flags holding the dimension of an nrow or other UInt8 not held in the source # TODO: Given this is only used for nrow/ncat, we could actually use all the flags? @@ -137,29 +137,34 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_postfix_op_call(head) && (str = str*"-post") k = kind(head) - if k in KSet"string cmdstring Identifier" - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - elseif k in KSet"tuple block macrocall" - has_flags(head, PARENS_FLAG) && (str = str*"-p") - elseif k == K"quote" - has_flags(head, COLON_QUOTE) && (str = str*"-:") - elseif k == K"toplevel" - has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") - elseif k == K"function" - has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") - elseif k == K"struct" - has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") - elseif k == K"module" - has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") - end - if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && - has_flags(head, TRAILING_COMMA_FLAG) - str *= "-," + # Handle numeric flags for nrow/ncat nodes + if k in KSet"nrow ncat typed_ncat" + n = numeric_flags(head) + n != 0 && (str = str*"-"*string(n)) + else + # Handle head-specific flags that overlap with numeric flags + if k in KSet"string cmdstring Identifier" + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + elseif k in KSet"tuple block macrocall" + has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif k == K"quote" + has_flags(head, COLON_QUOTE) && (str = str*"-:") + elseif k == K"toplevel" + has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") + elseif k == K"function" + has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") + elseif k == K"struct" + has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") + elseif k == K"module" + has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") + end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end end is_suffixed(head) && (str = str*"-suf") - n = numeric_flags(head) - n != 0 && (str = str*"-"*string(n)) end str end diff --git a/src/julia/parser.jl b/src/julia/parser.jl index a1a0a96b..49ba902c 100644 --- a/src/julia/parser.jl +++ b/src/julia/parser.jl @@ -1098,8 +1098,8 @@ function parse_where_chain(ps0::ParseState, mark) # x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys))))) m = position(ps) bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) - emit_braces(ps, m, ckind, cflags) + ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, m, ckind, cflags, dim) emit(ps, mark, K"where") else # x where T ==> (where x T) @@ -1589,7 +1589,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # a [i] ==> (ref a (error-t) i) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ParseState(ps, end_symbol=true), + ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true), K"]", ps.end_symbol) if is_macrocall # @S[a,b] ==> (macrocall @S (vect a b)) @@ -1600,7 +1600,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) #v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b)) #v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b))) fix_macro_name_kind!(ps, macro_name_position) - emit(ps, m, ckind, cflags) + emit(ps, m, ckind, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, m, ckind) emit(ps, mark, K"macrocall") is_macrocall = false @@ -1621,7 +1621,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) ckind == K"comprehension" ? K"typed_comprehension" : ckind == K"ncat" ? K"typed_ncat" : internal_error("unrecognized kind in parse_cat ", string(ckind)) - emit(ps, mark, outk, cflags) + emit(ps, mark, outk, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, mark, ckind) end elseif k == K"." @@ -2840,7 +2840,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) if binding_power == typemin(Int) # [x@y ==> (hcat x (error-t ✘ y)) bump_closing_token(ps, closer) - return (K"hcat", EMPTY_FLAGS) + return (K"hcat", 0) end while true (next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order) @@ -2856,9 +2856,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol) binding_power = next_bp end bump_closing_token(ps, closer) - return binding_power == -1 ? (K"vcat", EMPTY_FLAGS) : - binding_power == 0 ? (K"hcat", EMPTY_FLAGS) : - (K"ncat", set_numeric_flags(dim)) + return binding_power == -1 ? (K"vcat", 0) : + binding_power == 0 ? (K"hcat", 0) : + (K"ncat", dim) end # Parse equal and ascending precedence chains of array concatenation operators - @@ -3012,7 +3012,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) mark = position(ps) if k == closer # [] ==> (vect) - return parse_vect(ps, closer, false) + ckind, cflags = parse_vect(ps, closer, false) + return (ckind, cflags, 0) elseif k == K";" #v1.8: [;] ==> (ncat-1) #v1.8: [;;] ==> (ncat-2) @@ -3022,7 +3023,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) dim, _ = parse_array_separator(ps, Ref(:unknown)) min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") bump_closing_token(ps, closer) - return (K"ncat", set_numeric_flags(dim)) + return (K"ncat", EMPTY_FLAGS, dim) end parse_eq_star(ps) k = peek(ps, skip_newlines=true) @@ -3035,15 +3036,18 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x] ==> (vect x) # [x \n ] ==> (vect x) # [x ==> (vect x (error-t)) - parse_vect(ps, closer, prefix_trailing_comma) + ckind, cflags = parse_vect(ps, closer, prefix_trailing_comma) + return (ckind, cflags, 0) elseif k == K"for" # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) - parse_comprehension(ps, mark, closer) + ckind, cflags = parse_comprehension(ps, mark, closer) + return (ckind, cflags, 0) else # [x y] ==> (hcat x y) # and other forms; See parse_array. - parse_array(ps, mark, closer, end_is_symbol) + ckind, dim = parse_array(ps, mark, closer, end_is_symbol) + return (ckind, EMPTY_FLAGS, dim) end end @@ -3448,13 +3452,13 @@ function parse_string(ps::ParseState, raw::Bool) emit(ps, mark, string_kind, str_flags) end -function emit_braces(ps, mark, ckind, cflags) +function emit_braces(ps, mark, ckind, cflags, dim=0) if ckind == K"hcat" # {x y} ==> (bracescat (row x y)) emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG) elseif ckind == K"ncat" # {x ;;; y} ==> (bracescat (nrow-3 x y)) - emit(ps, mark, K"nrow", cflags & ~TRAILING_COMMA_FLAG) + emit(ps, mark, K"nrow", set_numeric_flags(dim)) end check_ncat_compat(ps, mark, ckind) outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat" @@ -3638,13 +3642,13 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal parse_paren(ps, check_identifiers, has_unary_prefix) elseif leading_kind == K"[" # cat expression bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ps, K"]", ps.end_symbol) - emit(ps, mark, ckind, cflags) + ckind, cflags, dim = parse_cat(ps, K"]", ps.end_symbol) + emit(ps, mark, ckind, cflags | set_numeric_flags(dim)) check_ncat_compat(ps, mark, ckind) elseif leading_kind == K"{" # cat expression bump(ps, TRIVIA_FLAG) - ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) - emit_braces(ps, mark, ckind, cflags) + ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, mark, ckind, cflags, dim) elseif leading_kind == K"@" # macro call # Macro names can be keywords # @end x ==> (macrocall @end x) diff --git a/test/parser.jl b/test/parser.jl index 3e4b8012..f8400928 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -890,6 +890,8 @@ tests = [ "{x,y,}" => "(braces-, x y)" "{x y}" => "(bracescat (row x y))" ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" + ((v=v"1.7",), "{a ;; b}") => "(bracescat (nrow-2 a b))" + ((v=v"1.7",), "{a ;;;; b}") => "(bracescat (nrow-4 a b))" # Macro names can be keywords "@end x" => "(macrocall @end x)" # __dot__ macro @@ -929,6 +931,11 @@ tests = [ # Column major ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" + # Dimension 4 ncat + ((v=v"1.7",), "[x ;;;; y]") => "(ncat-4 x y)" + ((v=v"1.7",), "[a ; b ;;;; c ; d]") => "(ncat-4 (nrow-1 a b) (nrow-1 c d))" + ((v=v"1.7",), "[a b ; c d ;;;; e f ; g h]") => + "(ncat-4 (nrow-1 (row a b) (row c d)) (nrow-1 (row e f) (row g h)))" # Array separators # Newlines before semicolons are not significant "[a \n ;]" => "(vcat a)"