Skip to content

Commit c510357

Browse files
committed
fix normalization and string parsing (again)
1 parent 7cd8c30 commit c510357

File tree

4 files changed

+43
-12
lines changed

4 files changed

+43
-12
lines changed

src/components/strings.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ function parse_string_or_cmd(ps::ParseState, prefixed=false)
129129
elseif !isempty(str)
130130
push!(ret, ex)
131131
end
132+
!iscmd && _rm_escaped_newlines(ex)
132133
istrip && adjust_lcp(ex)
133134
startbytes = 0
134135
op = EXPR(:OPERATOR, 1, 1, "\$")
@@ -227,6 +228,7 @@ function parse_string_or_cmd(ps::ParseState, prefixed=false)
227228
# only mark non-interpolated triple u_strings
228229
ex = EXPR(length(ret) == 0 ? :TRIPLESTRING : :STRING, lspan + ps.nt.startbyte - ps.t.endbyte - 1 + startbytes, lspan + startbytes, str)
229230
# find lcp for escaped string
231+
!iscmd && _rm_escaped_newlines(ex)
230232
adjust_lcp(ex, true)
231233
# we only want to drop the leading new line if it's a literal newline, not if it's `\n`
232234
if startswith(str, "\\n")
@@ -288,6 +290,7 @@ function parse_string_or_cmd(ps::ParseState, prefixed=false)
288290
ret = unwrapped
289291
end
290292
if !iscmd && prefixed == false
293+
_rm_escaped_newlines(ret)
291294
_unescape_string_expr(ret)
292295
end
293296
update_span!(ret)
@@ -297,14 +300,24 @@ end
297300

298301
function _unescape_string_expr(expr)
299302
if headof(expr) === :STRING || headof(expr) === :TRIPLESTRING
300-
expr.val = _unescape_string(replace(valof(expr), r"(?<!\\)((?:\\\\)*)\\\n[\s\n]*" => s"\1"))
303+
expr.val = _unescape_string(valof(expr))
301304
else
302305
for a in expr
303306
_unescape_string_expr(a)
304307
end
305308
end
306309
end
307310

311+
function _rm_escaped_newlines(expr)
312+
if headof(expr) === :STRING || headof(expr) === :TRIPLESTRING
313+
expr.val = replace(valof(expr), r"(?<!\\)((?:\\\\)*)\\\n[\s\n]*" => s"\1")
314+
else
315+
for a in expr
316+
_rm_escaped_newlines(a)
317+
end
318+
end
319+
end
320+
308321
function adjustspan(x::EXPR)
309322
x.fullspan = x.span
310323
return x

src/conversion.jl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
function julia_normalization_map(c::Int32, x::Ptr{Nothing})::Int32
33
return c == 0x00B5 ? 0x03BC : # micro sign -> greek small letter mu
44
c == 0x025B ? 0x03B5 : # latin small letter open e -> greek small letter
5+
c == 0x00B7 ? 0x22C5 :
6+
c == 0x0387 ? 0x22C5 :
7+
c == 0x2212 ? 0x002D :
58
c
69
end
710

@@ -155,7 +158,7 @@ function to_codeobject(x::EXPR)
155158
return Symbol(lowercase(string(headof(x))))
156159
end
157160
elseif isoperator(x)
158-
return Symbol(valof(x))
161+
return Symbol(normalize_julia_identifier(valof(x)))
159162
elseif ispunctuation(x)
160163
if headof(x) === :DOT
161164
if x.args === nothing
@@ -207,13 +210,11 @@ function to_codeobject(x::EXPR)
207210
# Special conversion needed - the initial text section is treated as empty for the represented string following lowest-common-prefix adjustments, but exists in the source.
208211
Expr(:string, to_codeobject.(x.args[2:end])...)
209212
elseif x.args === nothing
210-
# this is mostly useful for ncat
213+
# for ncat/nrow etc
211214
int = tryparse(Int, String(x.head))
212-
if int === nothing
213-
Expr(Symbol(lowercase(String(x.head))))
214-
else
215-
int
216-
end
215+
int !== nothing && return int
216+
217+
Expr(Symbol(lowercase(String(x.head))))
217218
elseif x.head === :errortoken
218219
Expr(:error)
219220
else

src/precompile.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,9 @@ function _precompile()
2121
precompile(EXPR, (Symbol, Vector{EXPR}, Int, Int))
2222
precompile(EXPR, (Symbol, Vector{EXPR}))
2323

24-
24+
2525
precompile(INSTANCE, (ParseState,))
2626

27-
precompile(tostr, (IOBuffer,))
2827
precompile(str_value, (EXPR,))
2928

3029
precompile(CSTParser.parse_expression, (ParseState,))
@@ -109,4 +108,4 @@ function _precompile()
109108
precompile(Tokenize.Lexers.lex_star, (Tokenize.Lexers.Lexer{IOBuffer,Tokens.RawToken},))
110109
precompile(Tokenize.Lexers.lex_whitespace, (Tokenize.Lexers.Lexer{IOBuffer,Tokens.RawToken},))
111110
precompile(Tokenize.Lexers.lex_xor, (Tokenize.Lexers.Lexer{IOBuffer,Tokens.RawToken},))
112-
end
111+
end

test/parser.jl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,9 +738,20 @@ end
738738
@test """throw(ArgumentError("invalid \$(m == 2 ? "hex (\\\\x)" :
739739
"unicode (\\\$u)") escape sequence"))""" |> test_expr
740740
@test "\"a\\\\\\\\\\\nb\"" |> test_expr
741-
for c in 0:12
741+
for c in 0:20
742742
@test test_expr(string("\"a", '\\'^c, "\nb\""))
743+
@test test_expr(string("\"\"\"a", '\\'^c, "\nb\"\"\""))
743744
end
745+
for c in 0:20
746+
@test test_expr(string("`a", '\\'^c, "\nb`"))
747+
@test test_expr(string("```a", '\\'^c, "\nb```"))
748+
end
749+
750+
@test "\"\"\"\n a\\\n b\"\"\"" |> test_expr
751+
@test "\"\"\"\n a\\\n b\"\"\"" |> test_expr
752+
@test "\"\"\"\na\\\n b\"\"\"" |> test_expr
753+
@test "\"\"\"\na\\\nb\"\"\"" |> test_expr
754+
@test "\"\"\"\n a\\\n b\"\"\"" |> test_expr
744755
end
745756
end
746757

@@ -1473,6 +1484,13 @@ end
14731484
end
14741485
end
14751486

1487+
if VERSION > v"1.7-"
1488+
@testset "normalized unicode ops" begin
1489+
@test "(·) == (·) == (⋅) == 5" |> test_expr
1490+
@test "(−) == (-) == 6" |> test_expr
1491+
end
1492+
end
1493+
14761494
@testset "pair tuple" begin
14771495
@test test_expr("a => b")
14781496
@test test_expr("a => b, c, d")

0 commit comments

Comments
 (0)