Skip to content

Commit b128daf

Browse files
committed
Add read convenience methods
These methods differ a bit from existing ones since they don't return strings (for which the UTF-8 encoding is known thanks to their type), but bytes without any encoding information. But this shouldn't be too confusing, and it can be useful e.g. for CSV.jl which expects a vector of bytes in UTF-8 rather than a string.
1 parent cab40b5 commit b128daf

File tree

3 files changed

+35
-4
lines changed

3 files changed

+35
-4
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ noël
105105

106106
julia> readuntil(path, enc"UTF-16", "ë")
107107
"café\nno"
108+
109+
julia> String(read(path, enc"UTF-16"))
110+
"café\nnoël"
111+
112+
julia> String(read(path, 5, enc"UTF-16"))
113+
"café"
108114
```
109115

110116
When performing more complex operations on an encoded text file, it will often be easier to specify the encoding only once when opening it. The resulting I/O stream can then be passed to functions that are unaware of encodings (i.e. that assume UTF-8 text):

src/StringEncodings.jl

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ end
202202
203203
Returns a new write-only I/O stream, which converts any text in the encoding `from`
204204
written to it into text in the encoding `to` written to `stream`. Calling `close` on the
205-
stream is necessary to complete the encoding (but does not close `stream`).
205+
returned object is necessary to complete the encoding (but it does not close `stream`).
206206
207207
`to` and `from` can be specified either as a string or as an `Encoding` object.
208208
"""
@@ -404,13 +404,26 @@ function open(fname::AbstractString, enc::Encoding, mode::AbstractString)
404404
end
405405

406406
"""
407+
read(stream::IO, [nb::Integer,] enc::Encoding)
408+
read(filename::AbstractString, [nb::Integer,] enc::Encoding)
407409
read(stream::IO, ::Type{String}, enc::Encoding)
408410
read(filename::AbstractString, ::Type{String}, enc::Encoding)
409411
410-
Methods to read text in character encoding `enc`.
412+
Methods to read text in character encoding `enc`. See documentation for corresponding methods
413+
without the `enc` argument for details.
411414
"""
412-
Base.read(s::IO, ::Type{String}, enc::Encoding) = read(StringDecoder(s, enc), String)
413-
Base.read(filename::AbstractString, ::Type{String}, enc::Encoding) = open(io->read(io, String, enc), filename)
415+
Base.read(s::IO, enc::Encoding) =
416+
read(StringDecoder(s, enc))
417+
Base.read(filename::AbstractString, enc::Encoding) =
418+
open(io->read(io, enc), filename)
419+
Base.read(s::IO, nb::Integer, enc::Encoding) =
420+
read(StringDecoder(s, enc), nb)
421+
Base.read(filename::AbstractString, nb::Integer, enc::Encoding) =
422+
open(io->read(io, nb, enc), filename)
423+
Base.read(s::IO, ::Type{String}, enc::Encoding) =
424+
read(StringDecoder(s, enc), String)
425+
Base.read(filename::AbstractString, ::Type{String}, enc::Encoding) =
426+
open(io->read(io, String, enc), filename)
414427

415428
"""
416429
readline(stream::IO, enc::Encoding; keep::Bool=false)

test/runtests.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,18 @@ mktemp() do path, io
141141
write(io, s)
142142
end
143143

144+
@test String(read(path, enc"ISO-2022-JP")) == s
145+
@test String(open(io->read(io, enc"ISO-2022-JP"), path)) == s
146+
@test String(open(io->read(io), path, enc"ISO-2022-JP")) == s
147+
148+
@test String(read(path, 1000, enc"ISO-2022-JP")) == s
149+
@test String(open(io->read(io, 1000, enc"ISO-2022-JP"), path)) == s
150+
@test String(open(io->read(io, 1000), path, enc"ISO-2022-JP")) == s
151+
152+
@test String(read(path, 10, enc"ISO-2022-JP")) == first(s, 10)
153+
@test String(open(io->read(io, 10, enc"ISO-2022-JP"), path)) == first(s, 10)
154+
@test String(open(io->read(io, 10), path, enc"ISO-2022-JP")) == first(s, 10)
155+
144156
@test read(path, String, enc"ISO-2022-JP") == s
145157
@test open(io->read(io, String, enc"ISO-2022-JP"), path) == s
146158
@test open(io->read(io, String), path, enc"ISO-2022-JP") == s

0 commit comments

Comments
 (0)