Skip to content

Commit e6fd15d

Browse files
authored
Fixes for ;; and spaces in array concatenation (#7)
* Dectect whether array concatenation is either column-major or row-major in the first and second dimesions. Report errors for mixed orderings. * Treat ;; as line continuation when used in hcat * Treat newlines as insignificant when mixed with semicolons as separators.
1 parent e7f06d2 commit e6fd15d

File tree

5 files changed

+99
-47
lines changed

5 files changed

+99
-47
lines changed

src/green_tree.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ function _show_green_node(io, node, indent, pos, str, show_trivia)
7979
if is_leaf
8080
line = string(posstr, indent, summary(node))
8181
else
82-
line = string(posstr, indent, '[', summary(node), "]")
82+
line = string(posstr, indent, '[', summary(node), ']')
8383
end
8484
if !is_trivia(node) && is_leaf
8585
line = rpad(line, 40) * ""

src/parse_stream.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ numeric_flags(head::SyntaxHead) = numeric_flags(flags(head))
5151
is_error(head::SyntaxHead) = kind(head) == K"error"
5252

5353
function Base.summary(head::SyntaxHead)
54-
_kind_str(kind(head))
54+
untokenize(head, unique=false, include_flag_suff=false)
5555
end
5656

57-
function untokenize(head::SyntaxHead; include_flag_suff=true)
58-
str = untokenize(kind(head))
57+
function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
58+
str = untokenize(kind(head); unique=unique)
5959
if is_dotted(head)
6060
str = "."*str
6161
end

src/parser.jl

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2572,10 +2572,6 @@ end
25722572
# Mismatched rows
25732573
# [x y ; z] ==> (vcat (row x y) z)
25742574
#
2575-
# Double semicolon with spaces allowed (only) for line continuation
2576-
#v1.7: [x y ;;\n z w] ==> (hcat x y z w)
2577-
#v1.7: [x y ;; z w] ==> (hcat x y (error) z w)
2578-
#
25792575
# Single elements in rows
25802576
#v1.7: [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z)
25812577
#v1.7: [x y ;;; z ] ==> (ncat-3 (row x y) z)
@@ -2592,6 +2588,7 @@ end
25922588
function parse_array(ps::ParseState, mark, closer, end_is_symbol)
25932589
ps = ParseState(ps, end_symbol=end_is_symbol)
25942590

2591+
array_order = Ref(:unknown)
25952592
# Outer array parsing loop - parse chain of separators with descending
25962593
# precedence such as
25972594
#v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e)
@@ -2604,9 +2601,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
26042601
#
26052602
# For an excellent overview of Pratt parsing, see
26062603
# https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
2607-
(dim, binding_power) = parse_array_separator(ps)
2604+
(dim, binding_power) = parse_array_separator(ps, array_order)
26082605
while true
2609-
(next_dim, next_bp) = parse_array_inner(ps, binding_power)
2606+
(next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order)
26102607
if next_bp == typemin(Int)
26112608
break
26122609
end
@@ -2624,20 +2621,20 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
26242621
(K"ncat", set_numeric_flags(dim))
26252622
end
26262623

2627-
# Parse equal and ascending precedence chains of array concatenation operators
2628-
# (semicolons, newlines and whitespace). Invariants:
2624+
# Parse equal and ascending precedence chains of array concatenation operators -
2625+
# semicolons, newlines and whitespace. Invariants:
26292626
#
26302627
# * The caller must have already consumed
26312628
# - The left hand side
2632-
# - The concatenation operator, providing the current binding_power.
2633-
# So eg, we're here in the input stream
2629+
# - The concatenation operator, providing `binding_power`.
2630+
# So eg, we're here in the input stream, either at an element or closing token
26342631
# |
26352632
# [a ;; b ; c ]
26362633
# [a ;; ]
26372634
#
26382635
# * The caller must call emit() to delimit the AST node for this binding power.
26392636
#
2640-
function parse_array_inner(ps, binding_power)
2637+
function parse_array_inner(ps, binding_power, array_order)
26412638
mark = NO_POSITION
26422639
dim = -1
26432640
bp = binding_power
@@ -2655,13 +2652,13 @@ function parse_array_inner(ps, binding_power)
26552652
# Parse one expression
26562653
mark = position(ps)
26572654
parse_eq_star(ps)
2658-
(next_dim, next_bp) = parse_array_separator(ps)
2655+
(next_dim, next_bp) = parse_array_separator(ps, array_order)
26592656
else # bp > binding_power
26602657
# Recurse to parse a separator with greater binding power. Eg:
26612658
# [a ;; b ; c ]
26622659
# | ^------ the next input is here
26632660
# '---------- the mark is here
2664-
(next_dim, next_bp) = parse_array_inner(ps, bp)
2661+
(next_dim, next_bp) = parse_array_inner(ps, bp, array_order)
26652662
if bp == 0
26662663
emit(ps, mark, K"row")
26672664
else
@@ -2674,46 +2671,83 @@ end
26742671

26752672
# Parse a separator in an array concatenation
26762673
#
2677-
# Here we aim to identify:
2674+
# Here we return a tuple (dim, binding_power) containing
26782675
# * Dimension on which the next separator acts
26792676
# * Binding power (precedence) of the separator, where whitespace binds
26802677
# tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding
26812678
# power of 0 for whitespace and negative numbers for other separators.
26822679
#
26832680
# FIXME: Error messages for mixed spaces and ;; delimiters
2684-
function parse_array_separator(ps; skip_newlines=false)
2685-
t = peek_token(ps; skip_newlines=skip_newlines)
2686-
k = kind(t)
2687-
if k == K";"
2681+
function parse_array_separator(ps, array_order)
2682+
sep_mismatch_err = "cannot mix space and ;; separators in an array expression, except to wrap a line"
2683+
mark = position(ps)
2684+
t = peek_token(ps, skip_newlines=true)
2685+
if kind(t) == K";"
2686+
# Newlines before semicolons are not significant
2687+
# [a \n ;] ==> (vcat a)
2688+
bump_trivia(ps)
26882689
n_semis = 1
26892690
while true
2690-
bump(ps, TRIVIA_FLAG; skip_newlines=skip_newlines)
2691+
bump(ps, TRIVIA_FLAG)
26912692
t = peek_token(ps)
2692-
if kind(t) != K";" || t.had_whitespace
2693+
if kind(t) != K";"
26932694
break
26942695
end
2696+
if t.had_whitespace
2697+
bump_disallowed_space(ps)
2698+
end
26952699
n_semis += 1
26962700
end
2697-
# FIXME - following is ncat, not line continuation
2698-
# [a ;; \n c]
2699-
if n_semis == 2 && peek(ps) == K"NewlineWs"
2700-
# Line continuation
2701-
# [a b ;; \n \n c]
2702-
while peek(ps) == K"NewlineWs"
2703-
bump(ps, TRIVIA_FLAG)
2701+
had_newline = peek(ps) == K"NewlineWs"
2702+
# Newlines after semicolons are not significant
2703+
# [a ; \n] ==> (vcat a)
2704+
# [a ; \n\n b] ==> (vcat a b)
2705+
#v1.7: [a ;; \n b] ==> (ncat-2 a b)
2706+
bump_trivia(ps)
2707+
if n_semis == 2
2708+
if array_order[] === :row_major
2709+
if had_newline
2710+
# In hcat with spaces as separators, `;;` is a line
2711+
# continuation character
2712+
#v1.7: [a b ;; \n c] ==> (hcat a b c)
2713+
#v1.7: [a b \n ;; c] ==> (ncat-2 (row a b (error-t)) c)
2714+
return (2, 0)
2715+
else
2716+
# Can't mix spaces and multiple ;;
2717+
#v1.7: [a b ;; c] ==> (ncat-2 (row a b (error-t)) c)
2718+
emit(ps, mark, K"error", TRIVIA_FLAG, error=sep_mismatch_err)
2719+
end
2720+
else
2721+
array_order[] = :column_major
27042722
end
2705-
return (2, 0)
2706-
else
2707-
return (n_semis, -n_semis)
27082723
end
2709-
elseif k == K"NewlineWs"
2724+
return (n_semis, -n_semis)
2725+
end
2726+
t = peek_token(ps)
2727+
k = kind(t)
2728+
if k == K"NewlineWs"
27102729
bump_trivia(ps)
2711-
# Newlines separate the first dimension
2730+
# Treat a linebreak prior to a value as a semicolon (ie, separator for
2731+
# the first dimension) if no previous semicolons observed
2732+
# [a \n b] ==> (vcat a b)
2733+
return (1, -1)
2734+
elseif k == K","
2735+
# Treat `,` as semicolon for the purposes of recovery
2736+
# [a; b, c] ==> (vcat a b (error-t) c)
2737+
bump(ps, TRIVIA_FLAG, error="unexpected comma in array expression")
27122738
return (1, -1)
27132739
else
27142740
if t.had_whitespace && !is_closing_token(ps, k)
2741+
if array_order[] === :column_major
2742+
# Can't mix multiple ;'s and spaces
2743+
#v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c))
2744+
bump_trivia(ps, TRIVIA_FLAG, error=sep_mismatch_err)
2745+
else
2746+
array_order[] = :row_major
2747+
end
27152748
return (2, 0)
27162749
else
2750+
# Something else; use typemin to exit array parsing
27172751
return (typemin(Int), typemin(Int))
27182752
end
27192753
end
@@ -2739,10 +2773,11 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
27392773
#v1.8: [;;] ==> (ncat-2)
27402774
#v1.8: [\n ;; \n ] ==> (ncat-2)
27412775
#v1.7: [;;] ==> (ncat-2 (error))
2742-
n_semis, _ = parse_array_separator(ps; skip_newlines=true)
2776+
bump_trivia(ps)
2777+
dim, _ = parse_array_separator(ps, Ref(:unknown))
27432778
min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax")
27442779
bump_closing_token(ps, closer)
2745-
return (K"ncat", set_numeric_flags(n_semis))
2780+
return (K"ncat", set_numeric_flags(dim))
27462781
end
27472782
parse_eq_star(ps)
27482783
k = peek(ps, skip_newlines=true)

src/tokens.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,17 @@ function is_whitespace(t)
113113
kind(t) in (K"Whitespace", K"NewlineWs")
114114
end
115115

116-
function _kind_str(k::Kind)
117-
_kind_to_str[k]
118-
end
119-
120116
"""
121117
Return the string representation of a token kind, or `nothing` if the kind
122118
represents a class of tokens like K"Identifier".
119+
120+
When `unique=true` only return a string when the kind uniquely defines the
121+
corresponding input token, otherwise return `nothing`. When `unique=false`,
122+
return the name of the kind.
123+
124+
TODO: Replace `untokenize()` with `Base.string()`?
123125
"""
124-
function untokenize(k::Kind)
125-
get(_kind_to_str_unique, k, nothing)
126+
function untokenize(k::Kind; unique=true)
127+
get(unique ? _kind_to_str_unique : _kind_to_str, k, nothing)
126128
end
127129

test/parser.jl

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -625,9 +625,6 @@ tests = [
625625
"[x y]" => "(hcat x y)"
626626
# Mismatched rows
627627
"[x y ; z]" => "(vcat (row x y) z)"
628-
# Double semicolon with spaces allowed (only) for line continuation
629-
"[x y ;;\n z w]" => "(hcat x y z w)"
630-
# "[x y ;; z w]" => "(hcat x y (error) z w)" # FIXME
631628
# Single elements in rows
632629
((v=v"1.7",), "[x ; y ;; z ]") => "(ncat-2 (nrow-1 x y) z)"
633630
((v=v"1.7",), "[x y ;;; z ]") => "(ncat-3 (row x y) z)"
@@ -638,6 +635,24 @@ tests = [
638635
# Column major
639636
((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") =>
640637
"(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))"
638+
# Array separators
639+
# Newlines before semicolons are not significant
640+
"[a \n ;]" => "(vcat a)"
641+
# Newlines after semicolons are not significant
642+
"[a ; \n]" => "(vcat a)"
643+
"[a ; \n\n b]" => "(vcat a b)"
644+
((v=v"1.7",), "[a ;; \n b]") => "(ncat-2 a b)"
645+
# In hcat with spaces as separators, `;;` is a line
646+
# continuation character
647+
((v=v"1.7",), "[a b ;; \n c]") => "(hcat a b c)"
648+
((v=v"1.7",), "[a b \n ;; c]") => "(ncat-2 (row a b (error-t)) c)"
649+
# Can't mix spaces and multiple ;'s
650+
((v=v"1.7",), "[a b ;; c]") => "(ncat-2 (row a b (error-t)) c)"
651+
# Treat a linebreak prior to a value as a semicolon (ie, separator for
652+
# the first dimension) if no previous semicolons observed
653+
"[a \n b]" => "(vcat a b)"
654+
# Can't mix multiple ;'s and spaces
655+
((v=v"1.7",), "[a ;; b c]") => "(ncat-2 a (row b (error-t) c))"
641656
# Empty nd arrays
642657
((v=v"1.8",), "[;]") => "(ncat-1)"
643658
((v=v"1.8",), "[;;]") => "(ncat-2)"

0 commit comments

Comments
 (0)