Skip to content

Commit c606e9d

Browse files
authored
Merge branch 'main' into sp/ternary-unexpected-kw
2 parents c67f95c + 82b9705 commit c606e9d

File tree

11 files changed

+342
-227
lines changed

11 files changed

+342
-227
lines changed

README.md

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,33 @@ julia> text = "x = \"\"\"\n \$a\n b\"\"\""
435435
21:23 │ """ "\"\"\""
436436
```
437437

438+
### Less redundant `block`s
439+
440+
Sometimes `Expr` needs to contain redundant block constructs in order to have a
441+
place to store `LineNumberNode`s, but we don't need these and avoid adding them
442+
in several cases:
443+
* The right hand side of short form function syntax
444+
* The conditional in `elseif`
445+
* The body of anonymous functions after the `->`
446+
447+
### Distinct conditional ternary expression
448+
449+
The syntax `a ? b : c` is the same as `if a b else c` in `Expr` so macros can't
450+
distinguish these cases. Instead, we use a distinct expression head `K"?"` and
451+
lower to `Expr(:if)` during `Expr` conversion.
452+
453+
### String nodes always wrapped in `K"string"` or `K"cmdstring"`
454+
455+
All strings are surrounded by a node of kind `K"string"`, even non-interpolated
456+
literals, so `"x"` parses as `(string "x")`. This makes string handling simpler
457+
and more systematic because interpolations and triple strings with embedded
458+
trivia don't need to be treated differently. It also gives a container in which
459+
to attach the delimiting quotes.
460+
461+
The same goes for command strings which are always wrapped in `K"cmdstring"`
462+
regardless of whether they have multiple pieces (due to triple-quoted
463+
dedenting) or otherwise.
464+
438465
## More about syntax kinds
439466

440467
We generally track the type of syntax nodes with a syntax "kind", stored
@@ -549,6 +576,25 @@ name of compatibility, perhaps with a warning.)
549576
arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent
550577
digits, all of which are detected as invalid except for a trailing `f` when
551578
processed by `isnumtok_base`.
579+
* `begin` and `end` are not parsed as keywords when indexing. Typed comprehensions
580+
initially look the same, but can be distinguished from indexing once we handle
581+
a `for` token; it is safe to treat `begin` and `end` as keywords afterwards. The
582+
reference parser *only* handles this well when there's a newline before `for`:
583+
```julia
584+
Any[foo(i)
585+
for i in x if begin
586+
true
587+
end
588+
]
589+
```
590+
works, while
591+
```julia
592+
Any[foo(i) for i in x if begin
593+
true
594+
end
595+
]
596+
```
597+
does not. JuliaSyntax handles both cases.
552598

553599
## Parsing / AST oddities and warts
554600

@@ -775,7 +821,7 @@ Here's a few links to relevant Julia issues.
775821
#### Lowering
776822

777823
* A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201
778-
some of this should be ported.
824+
some of this should be ported. (Last commit at https://github.com/JuliaLang/julia/tree/df61138fcf97d03dcbbba10e962571af9700db56/ )
779825
* The closure capture problem https://github.com/JuliaLang/julia/issues/15276
780826
would be interesting to see whether we can tackle some of the harder cases in
781827
a new implementation.

src/expr.jl

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ function is_eventually_call(ex)
66
is_eventually_call(ex.args[1]))
77
end
88

9+
function is_stringchunk(node)
10+
k = kind(node)
11+
return k == K"String" || k == K"CmdString"
12+
end
13+
914
function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true)
1015
if !haschildren(node)
1116
val = node.val
@@ -24,10 +29,62 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true)
2429
return val
2530
end
2631
end
27-
headstr = untokenize(head(node), include_flag_suff=false)
28-
headsym = !isnothing(headstr) ? Symbol(headstr) :
29-
error("Can't untokenize head of kind $(kind(node))")
32+
if kind(node) == K"?"
33+
headsym = :if
34+
else
35+
headstr = untokenize(head(node), include_flag_suff=false)
36+
headsym = !isnothing(headstr) ? Symbol(headstr) :
37+
error("Can't untokenize head of kind $(kind(node))")
38+
end
3039
node_args = children(node)
40+
if headsym == :string || headsym == :cmdstring
41+
# Julia string literals may be interspersed with trivia in two situations:
42+
# 1. Triple quoted string indentation is trivia
43+
# 2. An \ before newline removes the newline and any following indentation
44+
#
45+
# Such trivia is eagerly removed by the reference parser, so here we
46+
# concatenate adjacent string chunks together for compatibility.
47+
args = Vector{Any}()
48+
i = 1
49+
while i <= length(node_args)
50+
if is_stringchunk(node_args[i])
51+
if i < length(node_args) && is_stringchunk(node_args[i+1])
52+
buf = IOBuffer()
53+
while i <= length(node_args) && is_stringchunk(node_args[i])
54+
write(buf, node_args[i].val)
55+
i += 1
56+
end
57+
push!(args, String(take!(buf)))
58+
else
59+
push!(args, node_args[i].val)
60+
i += 1
61+
end
62+
else
63+
e = _to_expr(node_args[i])
64+
if e isa String && headsym == :string
65+
# Wrap interpolated literal strings in (string) so we can
66+
# distinguish them from the surrounding text (issue #38501)
67+
# Ie, "$("str")" vs "str"
68+
# https://github.com/JuliaLang/julia/pull/38692
69+
e = Expr(:string, e)
70+
end
71+
push!(args, e)
72+
i += 1
73+
end
74+
end
75+
if length(args) == 1 && args[1] isa String
76+
# If there's a single string remaining after joining, we unwrap
77+
# to give a string literal.
78+
# """\n a\n b""" ==> "a\nb"
79+
# headsym === :cmdstring follows this branch
80+
return only(args)
81+
else
82+
@check headsym === :string
83+
return Expr(headsym, args...)
84+
end
85+
end
86+
87+
# Convert children
3188
insert_linenums = (headsym == :block || headsym == :toplevel) && need_linenodes
3289
args = Vector{Any}(undef, length(node_args)*(insert_linenums ? 2 : 1))
3390
if headsym == :for && length(node_args) == 2
@@ -121,38 +178,6 @@ function _to_expr(node::SyntaxNode, iteration_spec=false, need_linenodes=true)
121178
pushfirst!(args, numeric_flags(flags(node)))
122179
elseif headsym == :typed_ncat
123180
insert!(args, 2, numeric_flags(flags(node)))
124-
elseif headsym == :string && length(args) > 1
125-
# Julia string literals may be interspersed with trivia in two situations:
126-
# 1. Triple quoted string indentation is trivia
127-
# 2. An \ before newline removes the newline and any following indentation
128-
#
129-
# Such trivia is eagerly removed by the reference parser, so here we
130-
# concatenate adjacent string chunks together for compatibility.
131-
#
132-
# TODO: Manage the non-interpolation cases with String and CmdString
133-
# kinds instead?
134-
args2 = Vector{Any}()
135-
i = 1
136-
while i <= length(args)
137-
if args[i] isa String && i < length(args) && args[i+1] isa String
138-
buf = IOBuffer()
139-
while i <= length(args) && args[i] isa String
140-
write(buf, args[i])
141-
i += 1
142-
end
143-
push!(args2, String(take!(buf)))
144-
else
145-
push!(args2, args[i])
146-
i += 1
147-
end
148-
end
149-
args = args2
150-
if length(args2) == 1 && args2[1] isa String
151-
# If there's a single string remaining after joining we unwrap to
152-
# give a string literal.
153-
# """\n a\n b""" ==> "a\nb"
154-
return args2[1]
155-
end
156181
# elseif headsym == :string && length(args) == 1 && version <= (1,5)
157182
# Strip string from interpolations in 1.5 and lower to preserve
158183
# "hi$("ho")" ==> (string "hi" "ho")

src/kinds.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,7 @@ const _kind_names =
873873
"curly"
874874
"inert" # QuoteNode; not quasiquote
875875
"string" # A string interior node (possibly containing interpolations)
876+
"cmdstring" # A cmd string node (containing delimiters plus string)
876877
"macrocall"
877878
"kw" # the = in f(a=1)
878879
"parameters" # the list after ; in f(; a=1)

0 commit comments

Comments
 (0)