@@ -2572,10 +2572,6 @@ end
2572
2572
# Mismatched rows
2573
2573
# [x y ; z] ==> (vcat (row x y) z)
2574
2574
#
2575
- # Double semicolon with spaces allowed (only) for line continuation
2576
- # v1.7: [x y ;;\n z w] ==> (hcat x y z w)
2577
- # v1.7: [x y ;; z w] ==> (hcat x y (error) z w)
2578
- #
2579
2575
# Single elements in rows
2580
2576
# v1.7: [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z)
2581
2577
# v1.7: [x y ;;; z ] ==> (ncat-3 (row x y) z)
@@ -2592,6 +2588,7 @@ end
2592
2588
function parse_array (ps:: ParseState , mark, closer, end_is_symbol)
2593
2589
ps = ParseState (ps, end_symbol= end_is_symbol)
2594
2590
2591
+ array_order = Ref (:unknown )
2595
2592
# Outer array parsing loop - parse chain of separators with descending
2596
2593
# precedence such as
2597
2594
# v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e)
@@ -2604,9 +2601,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
2604
2601
#
2605
2602
# For an excellent overview of Pratt parsing, see
2606
2603
# https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
2607
- (dim, binding_power) = parse_array_separator (ps)
2604
+ (dim, binding_power) = parse_array_separator (ps, array_order )
2608
2605
while true
2609
- (next_dim, next_bp) = parse_array_inner (ps, binding_power)
2606
+ (next_dim, next_bp) = parse_array_inner (ps, binding_power, array_order )
2610
2607
if next_bp == typemin (Int)
2611
2608
break
2612
2609
end
@@ -2624,20 +2621,20 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
2624
2621
(K " ncat" , set_numeric_flags (dim))
2625
2622
end
2626
2623
2627
- # Parse equal and ascending precedence chains of array concatenation operators
2628
- # ( semicolons, newlines and whitespace) . Invariants:
2624
+ # Parse equal and ascending precedence chains of array concatenation operators -
2625
+ # semicolons, newlines and whitespace. Invariants:
2629
2626
#
2630
2627
# * The caller must have already consumed
2631
2628
# - The left hand side
2632
- # - The concatenation operator, providing the current binding_power.
2633
- # So eg, we're here in the input stream
2629
+ # - The concatenation operator, providing ` binding_power` .
2630
+ # So eg, we're here in the input stream, either at an element or closing token
2634
2631
# |
2635
2632
# [a ;; b ; c ]
2636
2633
# [a ;; ]
2637
2634
#
2638
2635
# * The caller must call emit() to delimit the AST node for this binding power.
2639
2636
#
2640
- function parse_array_inner (ps, binding_power)
2637
+ function parse_array_inner (ps, binding_power, array_order )
2641
2638
mark = NO_POSITION
2642
2639
dim = - 1
2643
2640
bp = binding_power
@@ -2655,13 +2652,13 @@ function parse_array_inner(ps, binding_power)
2655
2652
# Parse one expression
2656
2653
mark = position (ps)
2657
2654
parse_eq_star (ps)
2658
- (next_dim, next_bp) = parse_array_separator (ps)
2655
+ (next_dim, next_bp) = parse_array_separator (ps, array_order )
2659
2656
else # bp > binding_power
2660
2657
# Recurse to parse a separator with greater binding power. Eg:
2661
2658
# [a ;; b ; c ]
2662
2659
# | ^------ the next input is here
2663
2660
# '---------- the mark is here
2664
- (next_dim, next_bp) = parse_array_inner (ps, bp)
2661
+ (next_dim, next_bp) = parse_array_inner (ps, bp, array_order )
2665
2662
if bp == 0
2666
2663
emit (ps, mark, K " row" )
2667
2664
else
@@ -2674,46 +2671,83 @@ end
2674
2671
2675
2672
# Parse a separator in an array concatenation
2676
2673
#
2677
- # Here we aim to identify:
2674
+ # Here we return a tuple (dim, binding_power) containing
2678
2675
# * Dimension on which the next separator acts
2679
2676
# * Binding power (precedence) of the separator, where whitespace binds
2680
2677
# tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding
2681
2678
# power of 0 for whitespace and negative numbers for other separators.
2682
2679
#
2683
2680
# FIXME : Error messages for mixed spaces and ;; delimiters
2684
- function parse_array_separator (ps; skip_newlines= false )
2685
- t = peek_token (ps; skip_newlines= skip_newlines)
2686
- k = kind (t)
2687
- if k == K " ;"
2681
+ function parse_array_separator (ps, array_order)
2682
+ sep_mismatch_err = " cannot mix space and ;; separators in an array expression, except to wrap a line"
2683
+ mark = position (ps)
2684
+ t = peek_token (ps, skip_newlines= true )
2685
+ if kind (t) == K " ;"
2686
+ # Newlines before semicolons are not significant
2687
+ # [a \n ;] ==> (vcat a)
2688
+ bump_trivia (ps)
2688
2689
n_semis = 1
2689
2690
while true
2690
- bump (ps, TRIVIA_FLAG; skip_newlines = skip_newlines )
2691
+ bump (ps, TRIVIA_FLAG)
2691
2692
t = peek_token (ps)
2692
- if kind (t) != K " ;" || t . had_whitespace
2693
+ if kind (t) != K " ;"
2693
2694
break
2694
2695
end
2696
+ if t. had_whitespace
2697
+ bump_disallowed_space (ps)
2698
+ end
2695
2699
n_semis += 1
2696
2700
end
2697
- # FIXME - following is ncat, not line continuation
2698
- # [a ;; \n c]
2699
- if n_semis == 2 && peek (ps) == K " NewlineWs"
2700
- # Line continuation
2701
- # [a b ;; \n \n c]
2702
- while peek (ps) == K " NewlineWs"
2703
- bump (ps, TRIVIA_FLAG)
2701
+ had_newline = peek (ps) == K " NewlineWs"
2702
+ # Newlines after semicolons are not significant
2703
+ # [a ; \n] ==> (vcat a)
2704
+ # [a ; \n\n b] ==> (vcat a b)
2705
+ # v1.7: [a ;; \n b] ==> (ncat-2 a b)
2706
+ bump_trivia (ps)
2707
+ if n_semis == 2
2708
+ if array_order[] === :row_major
2709
+ if had_newline
2710
+ # In hcat with spaces as separators, `;;` is a line
2711
+ # continuation character
2712
+ # v1.7: [a b ;; \n c] ==> (hcat a b c)
2713
+ # v1.7: [a b \n ;; c] ==> (ncat-2 (row a b (error-t)) c)
2714
+ return (2 , 0 )
2715
+ else
2716
+ # Can't mix spaces and multiple ;;
2717
+ # v1.7: [a b ;; c] ==> (ncat-2 (row a b (error-t)) c)
2718
+ emit (ps, mark, K " error" , TRIVIA_FLAG, error= sep_mismatch_err)
2719
+ end
2720
+ else
2721
+ array_order[] = :column_major
2704
2722
end
2705
- return (2 , 0 )
2706
- else
2707
- return (n_semis, - n_semis)
2708
2723
end
2709
- elseif k == K " NewlineWs"
2724
+ return (n_semis, - n_semis)
2725
+ end
2726
+ t = peek_token (ps)
2727
+ k = kind (t)
2728
+ if k == K " NewlineWs"
2710
2729
bump_trivia (ps)
2711
- # Newlines separate the first dimension
2730
+ # Treat a linebreak prior to a value as a semicolon (ie, separator for
2731
+ # the first dimension) if no previous semicolons observed
2732
+ # [a \n b] ==> (vcat a b)
2733
+ return (1 , - 1 )
2734
+ elseif k == K " ,"
2735
+ # Treat `,` as semicolon for the purposes of recovery
2736
+ # [a; b, c] ==> (vcat a b (error-t) c)
2737
+ bump (ps, TRIVIA_FLAG, error= " unexpected comma in array expression" )
2712
2738
return (1 , - 1 )
2713
2739
else
2714
2740
if t. had_whitespace && ! is_closing_token (ps, k)
2741
+ if array_order[] === :column_major
2742
+ # Can't mix multiple ;'s and spaces
2743
+ # v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c))
2744
+ bump_trivia (ps, TRIVIA_FLAG, error= sep_mismatch_err)
2745
+ else
2746
+ array_order[] = :row_major
2747
+ end
2715
2748
return (2 , 0 )
2716
2749
else
2750
+ # Something else; use typemin to exit array parsing
2717
2751
return (typemin (Int), typemin (Int))
2718
2752
end
2719
2753
end
@@ -2739,10 +2773,11 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
2739
2773
# v1.8: [;;] ==> (ncat-2)
2740
2774
# v1.8: [\n ;; \n ] ==> (ncat-2)
2741
2775
# v1.7: [;;] ==> (ncat-2 (error))
2742
- n_semis, _ = parse_array_separator (ps; skip_newlines= true )
2776
+ bump_trivia (ps)
2777
+ dim, _ = parse_array_separator (ps, Ref (:unknown ))
2743
2778
min_supported_version (v " 1.8" , ps, mark, " empty multidimensional array syntax" )
2744
2779
bump_closing_token (ps, closer)
2745
- return (K " ncat" , set_numeric_flags (n_semis ))
2780
+ return (K " ncat" , set_numeric_flags (dim ))
2746
2781
end
2747
2782
parse_eq_star (ps)
2748
2783
k = peek (ps, skip_newlines= true )
0 commit comments