Skip to content

Commit e8f9e09

Browse files
committed
Add more convenience wrappers: open(), eachline(), readline(), readlines()
Requires a new closestream field in StringEncoder/StringDecoder so that calling close() on the result of open() also closes the underlying stream, which is not shown to the user.
1 parent 6ae7c18 commit e8f9e09

File tree

2 files changed

+133
-13
lines changed

2 files changed

+133
-13
lines changed

src/StringEncodings.jl

Lines changed: 99 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# This file is a part of StringEncodings.jl. License is MIT: http://julialang.org/license
22

33
module StringEncodings
4-
import Base: close, eof, flush, read, readall, write, show
4+
import Base: close, eachline, eof, flush, isreadable, iswritable,
5+
open, read, readline, readlines, show, write
56
import Base.Libc: errno, strerror, E2BIG, EINVAL, EILSEQ
67
import Compat: read
78

@@ -82,6 +83,7 @@ const BUFSIZE = 100
8283

8384
type StringEncoder{S<:IO} <: IO
8485
ostream::S
86+
closestream::Bool
8587
cd::Ptr{Void}
8688
inbuf::Vector{UInt8}
8789
outbuf::Vector{UInt8}
@@ -93,6 +95,7 @@ end
9395

9496
type StringDecoder{S<:IO} <: IO
9597
istream::S
98+
closestream::Bool
9699
cd::Ptr{Void}
97100
inbuf::Vector{UInt8}
98101
outbuf::Vector{UInt8}
@@ -178,7 +181,7 @@ end
178181
StringEncoder(istream, to, from=enc"UTF-8")
179182
180183
Returns a new write-only I/O stream, which converts any text in the encoding `from`
181-
written to it into text in the encoding `to` written to ostream. Calling `close` on the
184+
written to it into text in the encoding `to` written to `ostream`. Calling `close` on the
182185
stream is necessary to complete the encoding (but does not close `ostream`).
183186
184187
`to` and `from` can be specified either as a string or as an `Encoding` object.
@@ -187,7 +190,7 @@ function StringEncoder(ostream::IO, to::Encoding, from::Encoding=enc"UTF-8")
187190
cd = iconv_open(ASCIIString(to), ASCIIString(from))
188191
inbuf = Vector{UInt8}(BUFSIZE)
189192
outbuf = Vector{UInt8}(BUFSIZE)
190-
s = StringEncoder(ostream, cd, inbuf, outbuf,
193+
s = StringEncoder(ostream, false, cd, inbuf, outbuf,
191194
Ref{Ptr{UInt8}}(pointer(inbuf)), Ref{Ptr{UInt8}}(pointer(outbuf)),
192195
Ref{Csize_t}(0), Ref{Csize_t}(BUFSIZE))
193196
finalizer(s, finalize)
@@ -221,6 +224,9 @@ function close(s::StringEncoder)
221224
iconv_reset!(s)
222225
# Make sure C memory/resources are returned
223226
finalize(s)
227+
if s.closestream
228+
close(s.ostream)
229+
end
224230
# flush() wasn't able to empty input buffer, which cannot happen with correct data
225231
s.inbytesleft[] == 0 || throw(IncompleteSequenceError())
226232
end
@@ -238,7 +244,8 @@ end
238244
StringDecoder(istream, from, to=enc"UTF-8")
239245
240246
Returns a new read-only I/O stream, which converts text in the encoding `from`
241-
read from `istream` into text in the encoding `to`.
247+
read from `istream` into text in the encoding `to`. Calling `close` on the
248+
stream does not close `ostream`.
242249
243250
`to` and `from` can be specified either as a string or as an `Encoding` object.
244251
@@ -249,7 +256,7 @@ function StringDecoder(istream::IO, from::Encoding, to::Encoding=enc"UTF-8")
249256
cd = iconv_open(ASCIIString(to), ASCIIString(from))
250257
inbuf = Vector{UInt8}(BUFSIZE)
251258
outbuf = Vector{UInt8}(BUFSIZE)
252-
s = StringDecoder(istream, cd, inbuf, outbuf,
259+
s = StringDecoder(istream, false, cd, inbuf, outbuf,
253260
Ref{Ptr{UInt8}}(pointer(inbuf)), Ref{Ptr{UInt8}}(pointer(outbuf)),
254261
Ref{Csize_t}(0), Ref{Csize_t}(BUFSIZE), 0)
255262
finalizer(s, finalize)
@@ -293,6 +300,9 @@ function close(s::StringDecoder)
293300
iconv_reset!(s)
294301
# Make sure C memory/resources are returned
295302
finalize(s)
303+
if s.closestream
304+
close(s.istream)
305+
end
296306
# iconv_reset!() wasn't able to empty input buffer, which cannot happen with correct data
297307
s.inbytesleft[] == 0 || throw(IncompleteSequenceError())
298308
end
@@ -301,26 +311,105 @@ function read(s::StringDecoder, ::Type{UInt8})
301311
eof(s) ? throw(EOFError()) : s.outbuf[s.skip+=1]
302312
end
303313

314+
isreadable(s::StringDecoder) = isreadable(s.istream)
315+
iswritable(s::StringDecoder) = false
316+
317+
isreadable(s::StringEncoder) = false
318+
iswritable(s::StringEncoder) = iswritable(s.ostream)
319+
304320

305321
## Convenience I/O functions
322+
function wrap_stream(s::IO, enc::Encoding)
323+
if iswritable(s) && isreadable(s) # Should never happen
324+
throw(ArgumentError("cannot open encoded text files in read and write/append modes at the same time"))
325+
end
326+
s = iswritable(s) ? StringEncoder(s, enc) : StringDecoder(s, enc)
327+
s.closestream = true
328+
s
329+
end
330+
331+
"""
332+
open(filename::AbstractString, enc::Encoding[, args...])
333+
334+
Open a text file in encoding `enc`, converting its contents to UTF-8 on the fly
335+
using `StringDecoder` (when reading) or `StringEncoder` (when writing).
336+
`args` is passed to `open`, so this function can be used as a replacement for all `open`
337+
variants for working with files.
338+
339+
Note that calling `close` on the returned I/O stream will also close the associated file handle;
340+
this operation is necessary to complete the encoding in write mode. Opening a file for both
341+
reading and writing/appending is not supported.
342+
343+
The returned I/O stream can be passed to functions working on strings without
344+
specifying the encoding again.
345+
"""
346+
open(fname::AbstractString, enc::Encoding, args...) = wrap_stream(open(fname, args...), enc)
347+
348+
function open(fname::AbstractString, enc::Encoding,
349+
rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool)
350+
if rd && (wr || ff)
351+
throw(ArgumentError("cannot open encoded text files in read and write/append modes at the same time"))
352+
end
353+
wrap_stream(open(fname, rd, wr, cr, tr, ff), enc)
354+
end
355+
356+
function open(fname::AbstractString, enc::Encoding, mode::AbstractString)
357+
if mode in ("r+", "w+", "a+")
358+
throw(ArgumentError("cannot open encoded text files in read and write/append modes at the same time"))
359+
end
360+
wrap_stream(open(fname, mode), enc)
361+
end
362+
306363
if isdefined(Base, :readstring)
307364
@doc """
308-
readstring(stream or filename, enc::Encoding)
365+
readstring(stream::IO, enc::Encoding)
366+
readstring(filename::AbstractString, enc::Encoding)
309367
310-
Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
368+
Methods to read text in character encoding `enc`.
311369
""" ->
312370
Base.readstring(s::IO, enc::Encoding) = readstring(StringDecoder(s, enc))
313371
Base.readstring(filename::AbstractString, enc::Encoding) = open(io->readstring(io, enc), filename)
314372
else # Compatibility with Julia 0.4
315373
@doc """
316-
readall(stream or filename, enc::Encoding)
374+
readall(stream::IO, enc::Encoding)
375+
readall(filename::AbstractString, enc::Encoding)
317376
318-
Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
377+
Methods to read text in character encoding `enc`.
319378
""" ->
320379
Base.readall(s::IO, enc::Encoding) = readall(StringDecoder(s, enc))
321380
Base.readall(filename::AbstractString, enc::Encoding) = open(io->readall(io, enc), filename)
322381
end
323382

383+
"""
384+
readline(stream::IO, enc::Encoding)
385+
readline(filename::AbstractString, enc::Encoding)
386+
387+
Methods to read text in character encoding `enc`.
388+
"""
389+
readline(s::IO, enc::Encoding) = readline(StringDecoder(s, enc))
390+
readline(filename::AbstractString, enc::Encoding) = open(io->readline(io, enc), filename)
391+
392+
"""
393+
readlines(stream::IO, enc::Encoding)
394+
readlines(filename::AbstractString, enc::Encoding)
395+
396+
Methods to read text in character encoding `enc`.
397+
"""
398+
readlines(s::IO, enc::Encoding) = readlines(StringDecoder(s, enc))
399+
readlines(filename::AbstractString, enc::Encoding) = open(io->readlines(io, enc), filename)
400+
401+
"""
402+
eachline(stream::IO, enc::Encoding)
403+
eachline(filename::AbstractString, enc::Encoding)
404+
405+
Methods to read text in character encoding `enc`. Decoding is performed on the fly.
406+
"""
407+
eachline(s::IO, enc::Encoding) = eachline(StringDecoder(s, enc))
408+
function eachline(filename::AbstractString, enc::Encoding)
409+
s = open(filename, enc)
410+
EachLine(s, ()->close(s))
411+
end
412+
324413

325414
## Functions to encode/decode strings
326415

@@ -359,7 +448,7 @@ function encode(s::AbstractString, enc::Encoding)
359448
b = IOBuffer()
360449
p = StringEncoder(b, enc, encoding(typeof(s)))
361450
write(p, s)
362-
close(p)
451+
flush(p)
363452
takebuf_array(b)
364453
end
365454

test/runtests.jl

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@ for (s, enc) in (("noël", "ISO-8859-1"),
2828
end
2929

3030
# Test that attempt to close stream in the middle of incomplete sequence throws
31-
# TODO: use more specific errors
3231
let s = "a string チャネルパートナーの選択"
32+
# First, correct version
33+
p = StringEncoder(IOBuffer(), "UTF-16LE")
34+
write(p, s.data)
35+
close(p)
36+
3337
p = StringEncoder(IOBuffer(), "UTF-16LE")
3438
write(p, s.data[1:10])
3539
@test_throws IncompleteSequenceError close(p)
@@ -129,12 +133,39 @@ catch err
129133
end
130134

131135
mktemp() do path, io
132-
s = "a string \0チャネルパ\0\0トナーの選択 with embedded and trailing nuls\0"
133-
write(io, encode(s, "ISO-2022-JP"))
136+
s = "a string \0チャネルパ\0\0トナーの選択 with embedded and trailing nuls\0\nand a second line"
134137
close(io)
138+
open(path, enc"ISO-2022-JP", "w") do io
139+
@test iswritable(io) && !isreadable(io)
140+
write(io, s)
141+
end
135142

136143
@test readstring(path, enc"ISO-2022-JP") == s
137144
@test open(io->readstring(io, enc"ISO-2022-JP"), path) == s
145+
@test open(readstring, path, enc"ISO-2022-JP") == s
146+
@test readline(path, enc"ISO-2022-JP") == string(split(s, '\n')[1], '\n')
147+
@test open(readline, path, enc"ISO-2022-JP") == string(split(s, '\n')[1], '\n')
148+
a = readlines(path, enc"ISO-2022-JP")
149+
b = open(readlines, path, enc"ISO-2022-JP")
150+
c = collect(eachline(path, enc"ISO-2022-JP"))
151+
d = open(io->collect(eachline(io, enc"ISO-2022-JP")), path)
152+
@test a[1] == b[1] == c[1] == d[1] == string(split(s, '\n')[1], '\n')
153+
@test a[2] == b[2] == c[2] == d[2] == split(s, '\n')[2]
154+
155+
# Test alternative syntaxes for open()
156+
open(path, enc"ISO-2022-JP", "r") do io
157+
@test isreadable(io) && !iswritable(io)
158+
@test readstring(io) == s
159+
end
160+
open(path, enc"ISO-2022-JP", true, false, false, false, false) do io
161+
@test isreadable(io) && !iswritable(io)
162+
@test readstring(io) == s
163+
end
164+
@test_throws ArgumentError open(path, enc"ISO-2022-JP", "r+")
165+
@test_throws ArgumentError open(path, enc"ISO-2022-JP", "w+")
166+
@test_throws ArgumentError open(path, enc"ISO-2022-JP", "a+")
167+
@test_throws ArgumentError open(path, enc"ISO-2022-JP", true, true, false, false, false)
168+
@test_throws ArgumentError open(path, enc"ISO-2022-JP", true, false, false, false, true)
138169
end
139170

140171

0 commit comments

Comments
 (0)