1
1
# This file is a part of StringEncodings.jl. License is MIT: http://julialang.org/license
2
2
3
3
module StringEncodings
4
- import Base: close, eof, flush, read, readall, write, show
4
+ import Base: close, eachline, eof, flush, isreadable, iswritable,
5
+ open, read, readline, readlines, show, write
5
6
import Base. Libc: errno, strerror, E2BIG, EINVAL, EILSEQ
6
7
import Compat: read
7
8
@@ -82,6 +83,7 @@ const BUFSIZE = 100
82
83
83
84
type StringEncoder{S<: IO } <: IO
84
85
ostream:: S
86
+ closestream:: Bool
85
87
cd:: Ptr{Void}
86
88
inbuf:: Vector{UInt8}
87
89
outbuf:: Vector{UInt8}
93
95
94
96
type StringDecoder{S<: IO } <: IO
95
97
istream:: S
98
+ closestream:: Bool
96
99
cd:: Ptr{Void}
97
100
inbuf:: Vector{UInt8}
98
101
outbuf:: Vector{UInt8}
178
181
StringEncoder(istream, to, from=enc"UTF-8")
179
182
180
183
Returns a new write-only I/O stream, which converts any text in the encoding `from`
181
- written to it into text in the encoding `to` written to ostream. Calling `close` on the
184
+ written to it into text in the encoding `to` written to ` ostream` . Calling `close` on the
182
185
stream is necessary to complete the encoding (but does not close `ostream`).
183
186
184
187
`to` and `from` can be specified either as a string or as an `Encoding` object.
@@ -187,7 +190,7 @@ function StringEncoder(ostream::IO, to::Encoding, from::Encoding=enc"UTF-8")
187
190
cd = iconv_open (ASCIIString (to), ASCIIString (from))
188
191
inbuf = Vector {UInt8} (BUFSIZE)
189
192
outbuf = Vector {UInt8} (BUFSIZE)
190
- s = StringEncoder (ostream, cd, inbuf, outbuf,
193
+ s = StringEncoder (ostream, false , cd, inbuf, outbuf,
191
194
Ref {Ptr{UInt8}} (pointer (inbuf)), Ref {Ptr{UInt8}} (pointer (outbuf)),
192
195
Ref {Csize_t} (0 ), Ref {Csize_t} (BUFSIZE))
193
196
finalizer (s, finalize)
@@ -221,6 +224,9 @@ function close(s::StringEncoder)
221
224
iconv_reset! (s)
222
225
# Make sure C memory/resources are returned
223
226
finalize (s)
227
+ if s. closestream
228
+ close (s. ostream)
229
+ end
224
230
# flush() wasn't able to empty input buffer, which cannot happen with correct data
225
231
s. inbytesleft[] == 0 || throw (IncompleteSequenceError ())
226
232
end
238
244
StringDecoder(istream, from, to=enc"UTF-8")
239
245
240
246
Returns a new read-only I/O stream, which converts text in the encoding `from`
241
- read from `istream` into text in the encoding `to`.
247
+ read from `istream` into text in the encoding `to`. Calling `close` on the
248
+ stream does not close `ostream`.
242
249
243
250
`to` and `from` can be specified either as a string or as an `Encoding` object.
244
251
@@ -249,7 +256,7 @@ function StringDecoder(istream::IO, from::Encoding, to::Encoding=enc"UTF-8")
249
256
cd = iconv_open (ASCIIString (to), ASCIIString (from))
250
257
inbuf = Vector {UInt8} (BUFSIZE)
251
258
outbuf = Vector {UInt8} (BUFSIZE)
252
- s = StringDecoder (istream, cd, inbuf, outbuf,
259
+ s = StringDecoder (istream, false , cd, inbuf, outbuf,
253
260
Ref {Ptr{UInt8}} (pointer (inbuf)), Ref {Ptr{UInt8}} (pointer (outbuf)),
254
261
Ref {Csize_t} (0 ), Ref {Csize_t} (BUFSIZE), 0 )
255
262
finalizer (s, finalize)
@@ -293,6 +300,9 @@ function close(s::StringDecoder)
293
300
iconv_reset! (s)
294
301
# Make sure C memory/resources are returned
295
302
finalize (s)
303
+ if s. closestream
304
+ close (s. istream)
305
+ end
296
306
# iconv_reset!() wasn't able to empty input buffer, which cannot happen with correct data
297
307
s. inbytesleft[] == 0 || throw (IncompleteSequenceError ())
298
308
end
@@ -301,26 +311,105 @@ function read(s::StringDecoder, ::Type{UInt8})
301
311
eof (s) ? throw (EOFError ()) : s. outbuf[s. skip+= 1 ]
302
312
end
303
313
314
+ isreadable (s:: StringDecoder ) = isreadable (s. istream)
315
+ iswritable (s:: StringDecoder ) = false
316
+
317
+ isreadable (s:: StringEncoder ) = false
318
+ iswritable (s:: StringEncoder ) = iswritable (s. ostream)
319
+
304
320
305
321
# # Convenience I/O functions
322
+ function wrap_stream (s:: IO , enc:: Encoding )
323
+ if iswritable (s) && isreadable (s) # Should never happen
324
+ throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
325
+ end
326
+ s = iswritable (s) ? StringEncoder (s, enc) : StringDecoder (s, enc)
327
+ s. closestream = true
328
+ s
329
+ end
330
+
331
+ """
332
+ open(filename::AbstractString, enc::Encoding[, args...])
333
+
334
+ Open a text file in encoding `enc`, converting its contents to UTF-8 on the fly
335
+ using `StringDecoder` (when reading) or `StringEncoder` (when writing).
336
+ `args` is passed to `open`, so this function can be used as a replacement for all `open`
337
+ variants for working with files.
338
+
339
+ Note that calling `close` on the returned I/O stream will also close the associated file handle;
340
+ this operation is necessary to complete the encoding in write mode. Opening a file for both
341
+ reading and writing/appending is not supported.
342
+
343
+ The returned I/O stream can be passed to functions working on strings without
344
+ specifying the encoding again.
345
+ """
346
+ open (fname:: AbstractString , enc:: Encoding , args... ) = wrap_stream (open (fname, args... ), enc)
347
+
348
+ function open (fname:: AbstractString , enc:: Encoding ,
349
+ rd:: Bool , wr:: Bool , cr:: Bool , tr:: Bool , ff:: Bool )
350
+ if rd && (wr || ff)
351
+ throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
352
+ end
353
+ wrap_stream (open (fname, rd, wr, cr, tr, ff), enc)
354
+ end
355
+
356
+ function open (fname:: AbstractString , enc:: Encoding , mode:: AbstractString )
357
+ if mode in (" r+" , " w+" , " a+" )
358
+ throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
359
+ end
360
+ wrap_stream (open (fname, mode), enc)
361
+ end
362
+
306
363
if isdefined (Base, :readstring )
307
364
@doc """
308
- readstring(stream or filename, enc::Encoding)
365
+ readstring(stream::IO, enc::Encoding)
366
+ readstring(filename::AbstractString, enc::Encoding)
309
367
310
- Read the entire contents of an I/O stream or a file in encoding `enc` as a string .
368
+ Methods to read text in character encoding `enc`.
311
369
""" ->
312
370
Base. readstring (s:: IO , enc:: Encoding ) = readstring (StringDecoder (s, enc))
313
371
Base. readstring (filename:: AbstractString , enc:: Encoding ) = open (io-> readstring (io, enc), filename)
314
372
else # Compatibility with Julia 0.4
315
373
@doc """
316
- readall(stream or filename, enc::Encoding)
374
+ readall(stream::IO, enc::Encoding)
375
+ readall(filename::AbstractString, enc::Encoding)
317
376
318
- Read the entire contents of an I/O stream or a file in encoding `enc` as a string .
377
+ Methods to read text in character encoding `enc`.
319
378
""" ->
320
379
Base. readall (s:: IO , enc:: Encoding ) = readall (StringDecoder (s, enc))
321
380
Base. readall (filename:: AbstractString , enc:: Encoding ) = open (io-> readall (io, enc), filename)
322
381
end
323
382
383
+ """
384
+ readline(stream::IO, enc::Encoding)
385
+ readline(filename::AbstractString, enc::Encoding)
386
+
387
+ Methods to read text in character encoding `enc`.
388
+ """
389
+ readline (s:: IO , enc:: Encoding ) = readline (StringDecoder (s, enc))
390
+ readline (filename:: AbstractString , enc:: Encoding ) = open (io-> readline (io, enc), filename)
391
+
392
+ """
393
+ readlines(stream::IO, enc::Encoding)
394
+ readlines(filename::AbstractString, enc::Encoding)
395
+
396
+ Methods to read text in character encoding `enc`.
397
+ """
398
+ readlines (s:: IO , enc:: Encoding ) = readlines (StringDecoder (s, enc))
399
+ readlines (filename:: AbstractString , enc:: Encoding ) = open (io-> readlines (io, enc), filename)
400
+
401
+ """
402
+ eachline(stream::IO, enc::Encoding)
403
+ eachline(filename::AbstractString, enc::Encoding)
404
+
405
+ Methods to read text in character encoding `enc`. Decoding is performed on the fly.
406
+ """
407
+ eachline (s:: IO , enc:: Encoding ) = eachline (StringDecoder (s, enc))
408
+ function eachline (filename:: AbstractString , enc:: Encoding )
409
+ s = open (filename, enc)
410
+ EachLine (s, ()-> close (s))
411
+ end
412
+
324
413
325
414
# # Functions to encode/decode strings
326
415
@@ -359,7 +448,7 @@ function encode(s::AbstractString, enc::Encoding)
359
448
b = IOBuffer ()
360
449
p = StringEncoder (b, enc, encoding (typeof (s)))
361
450
write (p, s)
362
- close (p)
451
+ flush (p)
363
452
takebuf_array (b)
364
453
end
365
454
0 commit comments