@@ -2572,10 +2572,6 @@ end
25722572# Mismatched rows
25732573# [x y ; z] ==> (vcat (row x y) z)
25742574#
2575- # Double semicolon with spaces allowed (only) for line continuation
2576- # v1.7: [x y ;;\n z w] ==> (hcat x y z w)
2577- # v1.7: [x y ;; z w] ==> (hcat x y (error) z w)
2578- #
25792575# Single elements in rows
25802576# v1.7: [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z)
25812577# v1.7: [x y ;;; z ] ==> (ncat-3 (row x y) z)
@@ -2592,6 +2588,7 @@ end
25922588function parse_array (ps:: ParseState , mark, closer, end_is_symbol)
25932589 ps = ParseState (ps, end_symbol= end_is_symbol)
25942590
2591+ array_order = Ref (:unknown )
25952592 # Outer array parsing loop - parse chain of separators with descending
25962593 # precedence such as
25972594 # v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e)
@@ -2604,9 +2601,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
26042601 #
26052602 # For an excellent overview of Pratt parsing, see
26062603 # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
2607- (dim, binding_power) = parse_array_separator (ps)
2604+ (dim, binding_power) = parse_array_separator (ps, array_order )
26082605 while true
2609- (next_dim, next_bp) = parse_array_inner (ps, binding_power)
2606+ (next_dim, next_bp) = parse_array_inner (ps, binding_power, array_order )
26102607 if next_bp == typemin (Int)
26112608 break
26122609 end
@@ -2624,20 +2621,20 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
26242621 (K " ncat" , set_numeric_flags (dim))
26252622end
26262623
2627- # Parse equal and ascending precedence chains of array concatenation operators
2628- # ( semicolons, newlines and whitespace) . Invariants:
2624+ # Parse equal and ascending precedence chains of array concatenation operators -
2625+ # semicolons, newlines and whitespace. Invariants:
26292626#
26302627# * The caller must have already consumed
26312628# - The left hand side
2632- # - The concatenation operator, providing the current binding_power.
2633- # So eg, we're here in the input stream
2629+ # - The concatenation operator, providing ` binding_power` .
2630+ # So eg, we're here in the input stream, either at an element or closing token
26342631# |
26352632# [a ;; b ; c ]
26362633# [a ;; ]
26372634#
26382635# * The caller must call emit() to delimit the AST node for this binding power.
26392636#
2640- function parse_array_inner (ps, binding_power)
2637+ function parse_array_inner (ps, binding_power, array_order )
26412638 mark = NO_POSITION
26422639 dim = - 1
26432640 bp = binding_power
@@ -2655,13 +2652,13 @@ function parse_array_inner(ps, binding_power)
26552652 # Parse one expression
26562653 mark = position (ps)
26572654 parse_eq_star (ps)
2658- (next_dim, next_bp) = parse_array_separator (ps)
2655+ (next_dim, next_bp) = parse_array_separator (ps, array_order )
26592656 else # bp > binding_power
26602657 # Recurse to parse a separator with greater binding power. Eg:
26612658 # [a ;; b ; c ]
26622659 # | ^------ the next input is here
26632660 # '---------- the mark is here
2664- (next_dim, next_bp) = parse_array_inner (ps, bp)
2661+ (next_dim, next_bp) = parse_array_inner (ps, bp, array_order )
26652662 if bp == 0
26662663 emit (ps, mark, K " row" )
26672664 else
@@ -2674,46 +2671,83 @@ end
26742671
26752672# Parse a separator in an array concatenation
26762673#
2677- # Here we aim to identify:
2674+ # Here we return a tuple (dim, binding_power) containing
26782675# * Dimension on which the next separator acts
26792676# * Binding power (precedence) of the separator, where whitespace binds
26802677# tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding
26812678# power of 0 for whitespace and negative numbers for other separators.
26822679#
26832680# FIXME : Error messages for mixed spaces and ;; delimiters
2684- function parse_array_separator (ps; skip_newlines= false )
2685- t = peek_token (ps; skip_newlines= skip_newlines)
2686- k = kind (t)
2687- if k == K " ;"
2681+ function parse_array_separator (ps, array_order)
2682+ sep_mismatch_err = " cannot mix space and ;; separators in an array expression, except to wrap a line"
2683+ mark = position (ps)
2684+ t = peek_token (ps, skip_newlines= true )
2685+ if kind (t) == K " ;"
2686+ # Newlines before semicolons are not significant
2687+ # [a \n ;] ==> (vcat a)
2688+ bump_trivia (ps)
26882689 n_semis = 1
26892690 while true
2690- bump (ps, TRIVIA_FLAG; skip_newlines = skip_newlines )
2691+ bump (ps, TRIVIA_FLAG)
26912692 t = peek_token (ps)
2692- if kind (t) != K " ;" || t . had_whitespace
2693+ if kind (t) != K " ;"
26932694 break
26942695 end
2696+ if t. had_whitespace
2697+ bump_disallowed_space (ps)
2698+ end
26952699 n_semis += 1
26962700 end
2697- # FIXME - following is ncat, not line continuation
2698- # [a ;; \n c]
2699- if n_semis == 2 && peek (ps) == K " NewlineWs"
2700- # Line continuation
2701- # [a b ;; \n \n c]
2702- while peek (ps) == K " NewlineWs"
2703- bump (ps, TRIVIA_FLAG)
2701+ had_newline = peek (ps) == K " NewlineWs"
2702+ # Newlines after semicolons are not significant
2703+ # [a ; \n] ==> (vcat a)
2704+ # [a ; \n\n b] ==> (vcat a b)
2705+ # v1.7: [a ;; \n b] ==> (ncat-2 a b)
2706+ bump_trivia (ps)
2707+ if n_semis == 2
2708+ if array_order[] === :row_major
2709+ if had_newline
2710+ # In hcat with spaces as separators, `;;` is a line
2711+ # continuation character
2712+ # v1.7: [a b ;; \n c] ==> (hcat a b c)
2713+ # v1.7: [a b \n ;; c] ==> (ncat-2 (row a b (error-t)) c)
2714+ return (2 , 0 )
2715+ else
2716+ # Can't mix spaces and multiple ;;
2717+ # v1.7: [a b ;; c] ==> (ncat-2 (row a b (error-t)) c)
2718+ emit (ps, mark, K " error" , TRIVIA_FLAG, error= sep_mismatch_err)
2719+ end
2720+ else
2721+ array_order[] = :column_major
27042722 end
2705- return (2 , 0 )
2706- else
2707- return (n_semis, - n_semis)
27082723 end
2709- elseif k == K " NewlineWs"
2724+ return (n_semis, - n_semis)
2725+ end
2726+ t = peek_token (ps)
2727+ k = kind (t)
2728+ if k == K " NewlineWs"
27102729 bump_trivia (ps)
2711- # Newlines separate the first dimension
2730+ # Treat a linebreak prior to a value as a semicolon (ie, separator for
2731+ # the first dimension) if no previous semicolons observed
2732+ # [a \n b] ==> (vcat a b)
2733+ return (1 , - 1 )
2734+ elseif k == K " ,"
2735+ # Treat `,` as semicolon for the purposes of recovery
2736+ # [a; b, c] ==> (vcat a b (error-t) c)
2737+ bump (ps, TRIVIA_FLAG, error= " unexpected comma in array expression" )
27122738 return (1 , - 1 )
27132739 else
27142740 if t. had_whitespace && ! is_closing_token (ps, k)
2741+ if array_order[] === :column_major
2742+ # Can't mix multiple ;'s and spaces
2743+ # v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c))
2744+ bump_trivia (ps, TRIVIA_FLAG, error= sep_mismatch_err)
2745+ else
2746+ array_order[] = :row_major
2747+ end
27152748 return (2 , 0 )
27162749 else
2750+ # Something else; use typemin to exit array parsing
27172751 return (typemin (Int), typemin (Int))
27182752 end
27192753 end
@@ -2739,10 +2773,11 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
27392773 # v1.8: [;;] ==> (ncat-2)
27402774 # v1.8: [\n ;; \n ] ==> (ncat-2)
27412775 # v1.7: [;;] ==> (ncat-2 (error))
2742- n_semis, _ = parse_array_separator (ps; skip_newlines= true )
2776+ bump_trivia (ps)
2777+ dim, _ = parse_array_separator (ps, Ref (:unknown ))
27432778 min_supported_version (v " 1.8" , ps, mark, " empty multidimensional array syntax" )
27442779 bump_closing_token (ps, closer)
2745- return (K " ncat" , set_numeric_flags (n_semis ))
2780+ return (K " ncat" , set_numeric_flags (dim ))
27462781 end
27472782 parse_eq_star (ps)
27482783 k = peek (ps, skip_newlines= true )
0 commit comments