Skip to content

Commit 0f51a63

Browse files
authored
ascii=true and fullhex=true flags for escape_string (#55099)
1 parent 602b582 commit 0f51a63

File tree

3 files changed

+29
-9
lines changed

3 files changed

+29
-9
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ New library features
9292
data-races. Or use the callback form of `open` to have all that handled
9393
automatically.
9494
* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
95+
* `escape_string` takes additional keyword arguments `ascii=true` (to escape all
96+
non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers
97+
for u/U escapes, e.g. for C compatibility) [#55099]).
9598
* `filter` can now act on a `NamedTuple` ([#50795]).
9699
* `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in
97100
the uniquing checking ([#53474])

base/strings/io.jl

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,8 @@ escape_nul(c::Union{Nothing, AbstractChar}) =
382382
(c !== nothing && '0' <= c <= '7') ? "\\x00" : "\\0"
383383

384384
"""
385-
escape_string(str::AbstractString[, esc]; keep = ())::AbstractString
386-
escape_string(io, str::AbstractString[, esc]; keep = ())::Nothing
385+
escape_string(str::AbstractString[, esc]; keep=(), ascii=false, fullhex=false)::AbstractString
386+
escape_string(io, str::AbstractString[, esc]; keep=())::Nothing
387387
388388
General escaping of traditional C and Unicode escape sequences. The first form returns the
389389
escaped string, the second prints the result to `io`.
@@ -398,11 +398,23 @@ escaped by a prepending backslash (`\"` is also escaped by default in the first
398398
The argument `keep` specifies a collection of characters which are to be kept as
399399
they are. Notice that `esc` has precedence here.
400400
401+
The argument `ascii` can be set to `true` to escape all non-ASCII characters,
402+
whereas the default `ascii=false` outputs printable Unicode characters as-is.
403+
(`keep` takes precedence over `ascii`.)
404+
405+
The argument `fullhex` can be set to `true` to require all `\\u` escapes to be
406+
printed with 4 hex digits, and `\\U` escapes to be printed with 8 hex digits,
407+
whereas by default (`fullhex=false`) they are printed with fewer digits if
408+
possible (omitting leading zeros).
409+
401410
See also [`unescape_string`](@ref) for the reverse operation.
402411
403412
!!! compat "Julia 1.7"
404413
The `keep` argument is available as of Julia 1.7.
405414
415+
!!! compat "Julia 1.12"
416+
The `ascii` and `fullhex` arguments require Julia 1.12.
417+
406418
# Examples
407419
```jldoctest
408420
julia> escape_string("aaa\\nbbb")
@@ -421,7 +433,7 @@ julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
421433
"ℵ\\\\x000"
422434
```
423435
"""
424-
function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
436+
function escape_string(io::IO, s::AbstractString, esc=""; keep = (), ascii::Bool=false, fullhex::Bool=false)
425437
a = Iterators.Stateful(s)
426438
for c::AbstractChar in a
427439
if c in esc
@@ -436,10 +448,10 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
436448
isprint(c) ? print(io, c) :
437449
print(io, "\\x", string(UInt32(c), base = 16, pad = 2))
438450
elseif !isoverlong(c) && !ismalformed(c)
439-
isprint(c) ? print(io, c) :
440-
c <= '\x7f' ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
441-
c <= '\uffff' ? print(io, "\\u", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
442-
print(io, "\\U", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
451+
!ascii && isprint(c) ? print(io, c) :
452+
c <= '\x7f' ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
453+
c <= '\uffff' ? print(io, "\\u", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
454+
print(io, "\\U", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
443455
else # malformed or overlong
444456
u = bswap(reinterpret(UInt32, c)::UInt32)
445457
while true
@@ -450,8 +462,8 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
450462
end
451463
end
452464

453-
escape_string(s::AbstractString, esc=('\"',); keep = ()) =
454-
sprint((io)->escape_string(io, s, esc; keep = keep), sizehint=lastindex(s))
465+
escape_string(s::AbstractString, esc=('\"',); keep = (), ascii::Bool=false, fullhex::Bool=false) =
466+
sprint((io)->escape_string(io, s, esc; keep, ascii, fullhex), sizehint=lastindex(s))
455467

456468
function print_quoted(io, s::AbstractString)
457469
print(io, '"')

test/strings/io.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@
165165
@test Base.escape_raw_string(raw"some\"string\\", '`') == "some\"string\\\\"
166166
@test Base.escape_raw_string(raw"some\"string") == "some\\\"string"
167167
@test Base.escape_raw_string(raw"some`string", '`') == "some\\`string"
168+
169+
# ascii and fullhex flags:
170+
@test escape_string("\u00e4\u00f6\u00fc") == "\u00e4\u00f6\u00fc"
171+
@test escape_string("\u00e4\u00f6\u00fc", ascii=true) == "\\ue4\\uf6\\ufc"
172+
@test escape_string("\u00e4\u00f6\u00fc", ascii=true, fullhex=true) == "\\u00e4\\u00f6\\u00fc"
168173
end
169174
@testset "join()" begin
170175
@test join([]) == join([],",") == ""

0 commit comments

Comments
 (0)