1- # This file is a part of Julia . License is MIT: http://julialang.org/license
1+ # This file is a part of StringEncodings.jl . License is MIT: http://julialang.org/license
22
33module StringEncodings
44import Base: close, eof, flush, read, readall, write, show
@@ -8,6 +8,7 @@ export StringEncoder, StringDecoder, encode, decode, encodings
88export StringEncodingError, OutputBufferError, IConvError
99export InvalidEncodingError, InvalidSequenceError, IncompleteSequenceError
1010
11+ include(" encodings.jl" )
1112
1213abstract StringEncodingError
1314
@@ -62,7 +63,7 @@ function iconv_close(cd::Ptr{Void})
6263 end
6364end
6465
65- function iconv_open(tocode, fromcode)
66+ function iconv_open(tocode:: ASCIIString , fromcode:: ASCIIString )
6667 p = ccall((:iconv_open, libiconv), Ptr{Void}, (Cstring, Cstring), tocode, fromcode)
6768 if p != Ptr{Void}(- 1 )
6869 return p
@@ -173,14 +174,16 @@ end
173174# # StringEncoder
174175
175176"""
176- StringEncoder(istream, to, from="UTF-8")
177+ StringEncoder(istream, to, from=enc "UTF-8")
177178
178179Returns a new write-only I/O stream, which converts any text in the encoding `from`
179180written to it into text in the encoding `to` written to ostream. Calling `close` on the
180181stream is necessary to complete the encoding (but does not close `ostream`).
182+
183+ `to` and `from` can be specified either as a string or as an `Encoding` object.
181184"""
182- function StringEncoder(ostream:: IO , to:: ASCIIString , from:: ASCIIString = " UTF-8" )
183- cd = iconv_open(to, from)
185+ function StringEncoder(ostream:: IO , to:: Encoding , from:: Encoding = enc " UTF-8" )
186+ cd = iconv_open(ASCIIString(to), ASCIIString( from) )
184187 inbuf = Vector{UInt8}(BUFSIZE)
185188 outbuf = Vector{UInt8}(BUFSIZE)
186189 s = StringEncoder(ostream, cd, inbuf, outbuf,
@@ -190,6 +193,11 @@ function StringEncoder(ostream::IO, to::ASCIIString, from::ASCIIString="UTF-8")
190193 s
191194end
192195
196+ StringEncoder(ostream:: IO , to:: AbstractString , from:: Encoding = enc" UTF-8" ) =
197+ StringEncoder(ostream, Encoding(to), from)
198+ StringEncoder(ostream:: IO , to:: AbstractString , from:: AbstractString ) =
199+ StringEncoder(ostream, Encoding(to), Encoding(from))
200+
193201# Flush input buffer and convert it into output buffer
194202# Returns the number of bytes written to output buffer
195203function flush(s:: StringEncoder )
@@ -226,16 +234,18 @@ end
226234# # StringDecoder
227235
228236"""
229- StringDecoder(istream, from, to= "UTF-8")
237+ StringDecoder(istream, from::Encoding , to::Encoding=enc "UTF-8")
230238
231239Returns a new read-only I/O stream, which converts text in the encoding `from`
232240read from `istream` into text in the encoding `to`.
233241
242+ `to` and `from` can be specified either as a string or as an `Encoding` object.
243+
234244Note that some implementations (notably the Windows one) may accept invalid sequences
235245in the input data without raising an error.
236246"""
237- function StringDecoder(istream:: IO , from:: ASCIIString , to:: ASCIIString = " UTF-8" )
238- cd = iconv_open(to, from)
247+ function StringDecoder(istream:: IO , from:: Encoding , to:: Encoding = enc " UTF-8" )
248+ cd = iconv_open(ASCIIString(to), ASCIIString( from) )
239249 inbuf = Vector{UInt8}(BUFSIZE)
240250 outbuf = Vector{UInt8}(BUFSIZE)
241251 s = StringDecoder(istream, cd, inbuf, outbuf,
@@ -245,6 +255,11 @@ function StringDecoder(istream::IO, from::ASCIIString, to::ASCIIString="UTF-8")
245255 s
246256end
247257
258+ StringDecoder(istream:: IO , from:: AbstractString , to:: Encoding = enc" UTF-8" ) =
259+ StringDecoder(istream, Encoding(from), to)
260+ StringDecoder(istream:: IO , from:: AbstractString , to:: AbstractString ) =
261+ StringDecoder(istream, Encoding(from), Encoding(to))
262+
248263# Fill input buffer and convert it into output buffer
249264# Returns the number of bytes written to output buffer
250265function fill_buffer!(s:: StringDecoder )
@@ -289,68 +304,67 @@ end
289304# # Convenience I/O functions
290305if isdefined(Base, :readstring)
291306 @doc """
292- readstring(stream or filename, enc::ASCIIString )
307+ readstring(stream or filename, enc::Encoding )
293308
294309 Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
295310 """ ->
296- Base. readstring(s:: IO , enc:: ASCIIString ) = readstring(StringDecoder(s, enc))
297- Base. readstring(filename:: AbstractString , enc:: ASCIIString ) = open(io-> readstring(io, enc), filename)
311+ Base. readstring(s:: IO , enc:: Encoding ) = readstring(StringDecoder(s, enc))
312+ Base. readstring(filename:: AbstractString , enc:: Encoding ) = open(io-> readstring(io, enc), filename)
298313else # Compatibility with Julia 0.4
299314 @doc """
300- readall(stream or filename, enc::ASCIIString )
315+ readall(stream or filename, enc::Encoding )
301316
302317 Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
303318 """ ->
304- Base. readall(s:: IO , enc:: ASCIIString ) = readall(StringDecoder(s, enc))
305- Base. readall(filename:: AbstractString , enc:: ASCIIString ) = open(io-> readall(io, enc), filename)
319+ Base. readall(s:: IO , enc:: Encoding ) = readall(StringDecoder(s, enc))
320+ Base. readall(filename:: AbstractString , enc:: Encoding ) = open(io-> readall(io, enc), filename)
306321end
307322
308323
309324# # Functions to encode/decode strings
310325
311- encoding_string(:: Type{ASCIIString} ) = " ASCII"
312- encoding_string(:: Type{UTF8String} ) = " UTF-8"
313- encoding_string(:: Type{UTF16String} ) = (ENDIAN_BOM == 0x04030201 ) ? " UTF-16LE" : " UTF-16BE"
314- encoding_string(:: Type{UTF32String} ) = (ENDIAN_BOM == 0x04030201 ) ? " UTF-32LE" : " UTF-32BE"
315-
316326"""
317- decode([T,] a::Vector{UInt8}, enc::ASCIIString )
327+ decode([T,] a::Vector{UInt8}, enc)
318328
319329Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`.
320330By default, a `UTF8String` is returned.
321331
332+ `enc` can be specified either as a string or as an `Encoding` object.
333+
322334Note that some implementations (notably the Windows one) may accept invalid sequences
323335in the input data without raising an error.
324336"""
325- function decode{T<: AbstractString }(:: Type{T} , a:: Vector{UInt8} , enc:: ASCIIString )
337+ function decode{T<: AbstractString }(:: Type{T} , a:: Vector{UInt8} , enc:: Encoding )
326338 b = IOBuffer(a)
327339 try
328- T(readbytes(StringDecoder(b, enc, encoding_string (T))))
340+ T(readbytes(StringDecoder(b, enc, encoding (T))))
329341 finally
330342 close(b)
331343 end
332344end
333345
334- decode(a:: Vector{UInt8} , enc:: ASCIIString ) = decode(UTF8String, a, enc)
346+ decode{T<: AbstractString }(:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) = decode(T, a, Encoding(enc))
347+
348+ decode(a:: Vector{UInt8} , enc:: AbstractString ) = decode(UTF8String, a, Encoding(enc))
349+ decode(a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) = decode(UTF8String, a, enc)
335350
336351"""
337- encode(s::AbstractString, enc::ASCIIString )
352+ encode(s::AbstractString, enc)
338353
339354Convert string `s` to an array of bytes representing text in encoding `enc`.
355+ `enc` can be specified either as a string or as an `Encoding` object.
340356"""
341- function encode(s:: AbstractString , enc:: ASCIIString )
357+ function encode(s:: AbstractString , enc:: Encoding )
342358 b = IOBuffer()
343- p = StringEncoder(b, enc, encoding_string (typeof(s)))
359+ p = StringEncoder(b, enc, encoding (typeof(s)))
344360 write(p, s)
345361 close(p)
346362 takebuf_array(b)
347363end
348364
365+ encode(s:: AbstractString , enc:: AbstractString ) = encode(s, Encoding(enc))
349366
350- # # Function to list supported encodings
351- include(" encodings.jl" )
352-
353- function test_encoding(enc)
367+ function test_encoding(enc:: ASCIIString )
354368 # We assume that an encoding is supported if it's possible to convert from it to UTF-8:
355369 cd = ccall((:iconv_open, libiconv), Ptr{Void}, (Cstring, Cstring), enc, " UTF-8" )
356370 if cd == Ptr{Void}(- 1 )
0 commit comments