Skip to content

Commit 8190fe8

Browse files
authored
Rewrite most gensym macros to use automatic hygiene instead (#59239)
Manual `gensym` code often contains a lot of mistakes, either because the user uses something like `.` or `+`, or because it combines code from multiple modules (gensym is only unique within a pre-compile unit). This replaces most uses for macro local variables with proper scope markers. I did not rewrite `_lift_one_interp_helper` or `replace_ref_begin_end_` however, since, while possible by adding `esc` to every argument that has not used a gensym value, if any other argument did is a value, I worried that could lead to macroexpand.scm making more new mistakes so I left if for a separate PR. Better yet, we could make a unhygienic-scope and unescape pair for marking the inverse/dual of the usual operations (marking a symbol as unescaped within a region of unhygienic (escaped) code to make these various uses easier to implement. But also do rewrite `replace_ref_begin_end_` to respect argument order and evaluation count (similar to its julia-syntax.scm counterpart) and scoping (not adding `let` unpredictably).
2 parents 822be59 + cd02671 commit 8190fe8

File tree

17 files changed

+445
-280
lines changed

17 files changed

+445
-280
lines changed

base/Base.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ let os = ccall(:jl_get_UNAME, Any, ())
3030
end
3131
end
3232

33+
# metaprogramming
34+
include("meta.jl")
35+
3336
# subarrays
3437
include("subarray.jl")
3538
include("views.jl")
@@ -157,9 +160,6 @@ include("weakkeydict.jl")
157160
# ScopedValues
158161
include("scopedvalues.jl")
159162

160-
# metaprogramming
161-
include("meta.jl")
162-
163163
# Logging
164164
include("logging/logging.jl")
165165
using .CoreLogging

base/boot.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ end
746746

747747
# module providing the IR object model
748748
# excluding types already exported by Core (GlobalRef, QuoteNode, Expr, LineNumberNode)
749-
# any type beyond these is self-quoting (see also Base.is_ast_node)
749+
# any type beyond these is self-quoting (see also Base.isa_ast_node)
750750
module IR
751751

752752
export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,

base/cartesian.jl

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,14 @@ If you want just a post-expression, supply [`nothing`](@ref) for the pre-express
3636
parentheses and semicolons, you can supply multi-statement expressions.
3737
"""
3838
macro nloops(N, itersym, rangeexpr, args...)
39-
_nloops(N, itersym, rangeexpr, args...)
39+
_nloops(N, itersym, true, rangeexpr, args...)
4040
end
4141

42-
function _nloops(N::Int, itersym::Symbol, arraysym::Symbol, args::Expr...)
43-
@gensym d
44-
_nloops(N, itersym, :($d->Base.axes($arraysym, $d)), args...)
42+
function _nloops(N::Int, itersym::Symbol, esc_rng::Bool, arraysym::Symbol, args::Expr...)
43+
_nloops(N, itersym, false, :(d->axes($(esc(arraysym)), d)), args...)
4544
end
4645

47-
function _nloops(N::Int, itersym::Symbol, rangeexpr::Expr, args::Expr...)
46+
function _nloops(N::Int, itersym::Symbol, esc_rng::Bool, rangeexpr::Expr, args::Expr...)
4847
if rangeexpr.head !== :->
4948
throw(ArgumentError("second argument must be an anonymous function expression to compute the range"))
5049
end
@@ -55,14 +54,16 @@ function _nloops(N::Int, itersym::Symbol, rangeexpr::Expr, args::Expr...)
5554
ex = Expr(:escape, body)
5655
for dim = 1:N
5756
itervar = inlineanonymous(itersym, dim)
57+
itervar = esc(itervar)
5858
rng = inlineanonymous(rangeexpr, dim)
59-
preexpr = length(args) > 1 ? inlineanonymous(args[1], dim) : (:(nothing))
60-
postexpr = length(args) > 2 ? inlineanonymous(args[2], dim) : (:(nothing))
59+
esc_rng && (rng = esc(rng))
60+
preexpr = length(args) > 1 ? esc(inlineanonymous(args[1], dim)) : nothing
61+
postexpr = length(args) > 2 ? esc(inlineanonymous(args[2], dim)) : nothing
6162
ex = quote
62-
for $(esc(itervar)) = $(esc(rng))
63-
$(esc(preexpr))
63+
for $itervar = $rng
64+
$preexpr
6465
$ex
65-
$(esc(postexpr))
66+
$postexpr
6667
end
6768
end
6869
end
@@ -290,14 +291,15 @@ struct LReplace{S<:AbstractString}
290291
end
291292
LReplace(sym::Symbol, val::Integer) = LReplace(sym, string(sym), val)
292293

293-
lreplace(ex::Expr, sym::Symbol, val) = lreplace!(copy(ex), LReplace(sym, val))
294+
lreplace(ex::Expr, sym::Symbol, val) = lreplace!(copy(ex), LReplace(sym, val), false, 0)
294295

295-
function lreplace!(sym::Symbol, r::LReplace)
296+
function lreplace!(sym::Symbol, r::LReplace, in_quote_context::Bool, escs::Int)
297+
escs == 0 || return sym
296298
sym == r.pat_sym && return r.val
297-
Symbol(lreplace!(string(sym), r))
299+
Symbol(lreplace_string!(string(sym), r))
298300
end
299301

300-
function lreplace!(str::AbstractString, r::LReplace)
302+
function lreplace_string!(str::String, r::LReplace)
301303
i = firstindex(str)
302304
pat = r.pat_str
303305
j = firstindex(pat)
@@ -329,7 +331,7 @@ function lreplace!(str::AbstractString, r::LReplace)
329331
if matching && j > lastindex(pat)
330332
if i > lastindex(str) || str[i] == '_'
331333
# We have a match
332-
return string(str[1:prevind(str, istart)], r.val, lreplace!(str[i:end], r))
334+
return string(str[1:prevind(str, istart)], r.val, lreplace_string!(str[i:end], r))
333335
end
334336
matching = false
335337
j = firstindex(pat)
@@ -339,24 +341,42 @@ function lreplace!(str::AbstractString, r::LReplace)
339341
str
340342
end
341343

342-
function lreplace!(ex::Expr, r::LReplace)
344+
function lreplace!(ex::Expr, r::LReplace, in_quote_context::Bool, escs::Int)
343345
# Curly-brace notation, which acts like parentheses
344-
if ex.head === :curly && length(ex.args) == 2 && isa(ex.args[1], Symbol) && endswith(string(ex.args[1]::Symbol), "_")
345-
excurly = exprresolve(lreplace!(ex.args[2], r))
346+
if !in_quote_context && ex.head === :curly && length(ex.args) == 2 && isa(ex.args[1], Symbol) && endswith(string(ex.args[1]::Symbol), "_")
347+
excurly = exprresolve(lreplace!(ex.args[2], r, in_quote_context, escs))
346348
if isa(excurly, Int)
347349
return Symbol(ex.args[1]::Symbol, excurly)
348350
else
349351
ex.args[2] = excurly
350352
return ex
351353
end
354+
elseif ex.head === :meta || ex.head === :inert
355+
return ex
356+
elseif ex.head === :$
357+
# no longer an executable expression (handle all equivalent forms of :inert, :quote, and QuoteNode the same way)
358+
in_quote_context = false
359+
elseif ex.head === :quote
360+
# executable again
361+
in_quote_context = true
362+
elseif ex.head === :var"hygienic-scope"
363+
# no longer our expression
364+
escs += 1
365+
elseif ex.head === :escape
366+
# our expression again once zero
367+
escs == 0 && return ex
368+
escs -= 1
369+
elseif ex.head === :macrocall
370+
# n.b. blithely go about altering arguments to macros also, assuming that is at all what the user intended
371+
# it is probably the user's fault if they put a macro inside here and didn't mean for it to get rewritten
352372
end
353373
for i in 1:length(ex.args)
354-
ex.args[i] = lreplace!(ex.args[i], r)
374+
ex.args[i] = lreplace!(ex.args[i], r, in_quote_context, escs)
355375
end
356376
ex
357377
end
358378

359-
lreplace!(arg, r::LReplace) = arg
379+
lreplace!(@nospecialize(arg), r::LReplace, in_quote_context::Bool, escs::Int) = arg
360380

361381

362382
poplinenum(arg) = arg

base/experimental.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ within this scope, even if the compiler can't prove this to be the case.
4141
Experimental API. Subject to change without deprecation.
4242
"""
4343
macro aliasscope(body)
44-
sym = gensym()
44+
sym = :aliasscope_result
4545
quote
4646
$(Expr(:aliasscope))
4747
$sym = $(esc(body))

base/meta.jl

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,121 @@ export quot,
1818

1919
public parse
2020

21-
using Base: isidentifier, isoperator, isunaryoperator, isbinaryoperator, ispostfixoperator
2221
import Base: isexpr
2322

23+
## AST decoding helpers ##
24+
25+
is_id_start_char(c::AbstractChar) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
26+
is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
27+
28+
"""
29+
isidentifier(s) -> Bool
30+
31+
Return whether the symbol or string `s` contains characters that are parsed as
32+
a valid ordinary identifier (not a binary/unary operator) in Julia code;
33+
see also [`Base.isoperator`](@ref).
34+
35+
Internally Julia allows any sequence of characters in a `Symbol` (except `\\0`s),
36+
and macros automatically use variable names containing `#` in order to avoid
37+
naming collision with the surrounding code. In order for the parser to
38+
recognize a variable, it uses a limited set of characters (greatly extended by
39+
Unicode). `isidentifier()` makes it possible to query the parser directly
40+
whether a symbol contains valid characters.
41+
42+
# Examples
43+
```jldoctest
44+
julia> Meta.isidentifier(:x), Meta.isidentifier("1x")
45+
(true, false)
46+
```
47+
"""
48+
function isidentifier(s::AbstractString)
49+
x = Iterators.peel(s)
50+
isnothing(x) && return false
51+
(s == "true" || s == "false") && return false
52+
c, rest = x
53+
is_id_start_char(c) || return false
54+
return all(is_id_char, rest)
55+
end
56+
isidentifier(s::Symbol) = isidentifier(string(s))
57+
58+
is_op_suffix_char(c::AbstractChar) = ccall(:jl_op_suffix_char, Cint, (UInt32,), c) != 0
59+
60+
_isoperator(s) = ccall(:jl_is_operator, Cint, (Cstring,), s) != 0
61+
62+
"""
63+
isoperator(s::Symbol)
64+
65+
Return `true` if the symbol can be used as an operator, `false` otherwise.
66+
67+
# Examples
68+
```jldoctest
69+
julia> Meta.isoperator(:+), Meta.isoperator(:f)
70+
(true, false)
71+
```
72+
"""
73+
isoperator(s::Union{Symbol,AbstractString}) = _isoperator(s) || ispostfixoperator(s)
74+
75+
"""
76+
isunaryoperator(s::Symbol)
77+
78+
Return `true` if the symbol can be used as a unary (prefix) operator, `false` otherwise.
79+
80+
# Examples
81+
```jldoctest
82+
julia> Meta.isunaryoperator(:-), Meta.isunaryoperator(:√), Meta.isunaryoperator(:f)
83+
(true, true, false)
84+
```
85+
"""
86+
isunaryoperator(s::Symbol) = ccall(:jl_is_unary_operator, Cint, (Cstring,), s) != 0
87+
is_unary_and_binary_operator(s::Symbol) = ccall(:jl_is_unary_and_binary_operator, Cint, (Cstring,), s) != 0
88+
is_syntactic_operator(s::Symbol) = ccall(:jl_is_syntactic_operator, Cint, (Cstring,), s) != 0
89+
90+
"""
91+
isbinaryoperator(s::Symbol)
92+
93+
Return `true` if the symbol can be used as a binary (infix) operator, `false` otherwise.
94+
95+
# Examples
96+
```jldoctest
97+
julia> Meta.isbinaryoperator(:-), Meta.isbinaryoperator(:√), Meta.isbinaryoperator(:f)
98+
(true, false, false)
99+
```
100+
"""
101+
function isbinaryoperator(s::Symbol)
102+
return _isoperator(s) && (!isunaryoperator(s) || is_unary_and_binary_operator(s)) &&
103+
s !== Symbol("'")
104+
end
105+
106+
"""
107+
ispostfixoperator(s::Union{Symbol,AbstractString})
108+
109+
Return `true` if the symbol can be used as a postfix operator, `false` otherwise.
110+
111+
# Examples
112+
```jldoctest
113+
julia> Meta.ispostfixoperator(Symbol("'")), Meta.ispostfixoperator(Symbol("'ᵀ")), Meta.ispostfixoperator(:-)
114+
(true, true, false)
115+
```
116+
"""
117+
function ispostfixoperator(s::Union{Symbol,AbstractString})
118+
s = String(s)::String
119+
return startswith(s, '\'') && all(is_op_suffix_char, SubString(s, 2))
120+
end
121+
122+
const keyword_syms = IdSet{Symbol}([
123+
:baremodule, :begin, :break, :catch, :const, :continue, :do, :else, :elseif,
124+
:end, :export, :var"false", :finally, :for, :function, :global, :if, :import,
125+
:let, :local, :macro, :module, :public, :quote, :return, :struct, :var"true",
126+
:try, :using, :while ])
127+
128+
function is_valid_identifier(sym)
129+
return (isidentifier(sym) && !(sym in keyword_syms)) ||
130+
(_isoperator(sym) &&
131+
!(sym in (Symbol("'"), :(::), :?)) &&
132+
!is_syntactic_operator(sym)
133+
)
134+
end
135+
24136
"""
25137
Meta.quot(ex)::Expr
26138
@@ -516,6 +628,21 @@ function unescape(@nospecialize ex)
516628
return ex
517629
end
518630

631+
"""
632+
Meta.reescape(unescaped_expr, original_expr)
633+
634+
Re-wrap `unescaped_expr` with the same level of escaping as `original_expr` had.
635+
This is the inverse operation of [`unescape`](@ref) - if the original expression
636+
was escaped, the unescaped expression is wrapped in `:escape` again.
637+
"""
638+
function reescape(@nospecialize(unescaped_expr), @nospecialize(original_expr))
639+
if isexpr(original_expr, :escape) || isexpr(original_expr, :var"hygienic-scope")
640+
return reescape(Expr(:escape, unescaped_expr), original_expr.args[1])
641+
else
642+
return unescaped_expr
643+
end
644+
end
645+
519646
"""
520647
Meta.uncurly(expr)
521648

0 commit comments

Comments
 (0)