Skip to content

Commit 0866b3c

Browse files
authored
add expectedsize and minoutsize methods (#14)
* add expectedsize and minoutsize methods * more docs [ci skip] * add size test
1 parent 01709f8 commit 0866b3c

File tree

4 files changed

+67
-8
lines changed

4 files changed

+67
-8
lines changed

docs/src/references.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ TranscodingStreams.CodecIdentity.IdentityStream
3131

3232
```@docs
3333
TranscodingStreams.Codec
34+
TranscodingStreams.expectedsize
35+
TranscodingStreams.minoutsize
3436
TranscodingStreams.initialize
3537
TranscodingStreams.finalize
3638
TranscodingStreams.startproc

src/codec.jl

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,40 @@ Transcoding protocol
1212
Transcoding proceeds by calling some functions in a specific way. We call this
1313
"transcoding protocol" and any codec must implement it as described below.
1414
15-
There are four functions for a codec to implement:
15+
There are six functions for a codec to implement:
16+
- `expectedsize`: return the expected size of transcoded data
17+
- `minoutsize`: return the minimum output size of `process`
1618
- `initialize`: initialize the codec
1719
- `finalize`: finalize the codec
1820
- `startproc`: start processing with the codec
1921
- `process`: process data with the codec.
2022
2123
These are defined in the `TranscodingStreams` and a new codec type must extend
2224
these methods if necessary. Implementing a `process` method is mandatory but
23-
other three are optional. `initialize`, `finalize`, and `startproc` have a
24-
default implementation that does nothing.
25+
others are optional. `expectedsize`, `minoutsize`, `initialize`, `finalize`,
26+
and `startproc` have a default implementation.
2527
2628
Your codec type is denoted by `C` and its object by `codec`.
2729
2830
Errors that occur in these methods are supposed to be unrecoverable and the
2931
stream will go to the panic state. Only `Base.isopen` and `Base.close` are
3032
available in that state.
3133
34+
### `expectedsize`
35+
36+
The `expectedsize(codec::C, input::Memory)::Int` method takes `codec` and
37+
`input`, and returns the expected size of transcoded data. This method will be
38+
used as a hint to determine the size of a data buffer when `transcode` is
39+
called. A good hint will reduce the number of buffer resizing and hence result
40+
in better performance.
41+
42+
### `minoutsize`
43+
44+
The `minoutsize(codec::C, input::Memory)::Int` method takes `codec` and `input`,
45+
and returns the minimum required size of the output memory when `process` is
46+
called. For example, an encoder of base64 will write at least four bytes to the
47+
output and hence it is reasonable to return 4 with this method.
48+
3249
### `initialize`
3350
3451
The `initialize(codec::C)::Void` method takes `codec` and returns `nothing`.
@@ -84,10 +101,34 @@ abstract type Codec end
84101
# Methods
85102
# -------
86103

104+
"""
105+
expectedsize(codec::Codec, input::Memory)::Int
106+
107+
Return the expected size of the transcoded `input` with `codec`.
108+
109+
The default method returns `input.size`.
110+
"""
111+
function expectedsize(codec::Codec, input::Memory)::Int
112+
return input.size
113+
end
114+
115+
"""
116+
minoutsize(codec::Codec, input::Memory)::Int
117+
118+
Return the minimum output size to be ensured when calling `process`.
119+
120+
The default method returns `max(1, div(input.size, 4))`.
121+
"""
122+
function minoutsize(codec::Codec, input::Memory)::Int
123+
return max(1, div(input.size, 4))
124+
end
125+
87126
"""
88127
initialize(codec::Codec)::Void
89128
90129
Initialize `codec`.
130+
131+
The default method does nothing.
91132
"""
92133
function initialize(codec::Codec)
93134
return nothing
@@ -97,6 +138,8 @@ end
97138
finalize(codec::Codec)::Void
98139
99140
Finalize `codec`.
141+
142+
The default method does nothing.
100143
"""
101144
function finalize(codec::Codec)::Void
102145
return nothing
@@ -106,6 +149,8 @@ end
106149
startproc(codec::Codec, state::Symbol, error::Error)::Symbol
107150
108151
Start data processing with `codec` of `state`.
152+
153+
The default method does nothing and returns `:ok`.
109154
"""
110155
function startproc(codec::Codec, state::Symbol, error::Error)::Symbol
111156
return :ok
@@ -115,6 +160,8 @@ end
115160
process(codec::Codec, input::Memory, output::Memory, error::Error)::Tuple{Int,Int,Symbol}
116161
117162
Do data processing with `codec`.
163+
164+
There is no default method.
118165
"""
119166
function process(codec::Codec, input::Memory, output::Memory, error::Error)::Tuple{Int,Int,Symbol}
120167
# no default method

src/stream.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,9 @@ julia> String(decompressed)
330330
```
331331
"""
332332
function Base.transcode(codec::Codec, data::Vector{UInt8})
333-
buffer2 = Buffer(length(data))
333+
# Add `minoutsize` because `transcode` will be called at least two times.
334+
buffer2 = Buffer(
335+
expectedsize(codec, Memory(data)) + minoutsize(codec, Memory(C_NULL, 0)))
334336
mark!(buffer2)
335337
stream = TranscodingStream(codec, DevNull, State(Buffer(data), buffer2))
336338
write(stream, TOKEN_END)
@@ -440,8 +442,9 @@ function process_to_write(stream::TranscodingStream)
440442
end
441443

442444
function call_process(codec::Codec, state::State, inbuf::Buffer, outbuf::Buffer)
443-
makemargin!(outbuf, clamp(div(length(outbuf), 4), 1, DEFAULT_BUFFER_SIZE * 8))
444-
Δin, Δout, state.code = process(codec, buffermem(inbuf), marginmem(outbuf), state.error)
445+
input = buffermem(inbuf)
446+
makemargin!(outbuf, minoutsize(codec, input))
447+
Δin, Δout, state.code = process(codec, input, marginmem(outbuf), state.error)
445448
inbuf.bufferpos += Δin
446449
outbuf.marginpos += Δout
447450
outbuf.total += Δout

test/runtests.jl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,11 @@ end
203203

204204
struct QuadrupleCodec <: TranscodingStreams.Codec end
205205

206+
const Memory = TranscodingStreams.Memory
206207
function TranscodingStreams.process(
207208
codec :: QuadrupleCodec,
208-
input :: TranscodingStreams.Memory,
209-
output :: TranscodingStreams.Memory,
209+
input :: Memory,
210+
output :: Memory,
210211
error :: TranscodingStreams.Error)
211212
i = j = 0
212213
while i + 1 endof(input) && j + 4 endof(output)
@@ -218,10 +219,16 @@ function TranscodingStreams.process(
218219
return i, j, input.size == 0 ? (:end) : (:ok)
219220
end
220221

222+
TranscodingStreams.expectedsize(::QuadrupleCodec, input::Memory) = input.size * 4
223+
TranscodingStreams.minoutsize(::QuadrupleCodec, ::Memory) = 4
224+
221225
@testset "QuadrupleCodec" begin
222226
@test transcode(QuadrupleCodec(), b"") == b""
223227
@test transcode(QuadrupleCodec(), b"a") == b"aaaa"
224228
@test transcode(QuadrupleCodec(), b"ab") == b"aaaabbbb"
229+
data = "x"^1024
230+
transcode(QuadrupleCodec(), data)
231+
@test (@allocated transcode(QuadrupleCodec(), data)) < sizeof(data) * 5
225232
end
226233

227234
for pkg in ["CodecZlib", "CodecBzip2", "CodecXz", "CodecZstd", "CodecBase"]

0 commit comments

Comments
 (0)