Skip to content

Commit e668dbd

Browse files
committed
Improve documentation of the incremental interface
1 parent aec1d51 commit e668dbd

File tree

1 file changed

+143
-20
lines changed

1 file changed

+143
-20
lines changed

Codec/Compression/Zlib/Internal.hs

Lines changed: 143 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,37 @@
1212
-----------------------------------------------------------------------------
1313
module Codec.Compression.Zlib.Internal (
1414

15-
-- * Compression
15+
-- * Pure interface
1616
compress,
17-
CompressParams(..),
18-
defaultCompressParams,
19-
20-
-- * Decompression
2117
decompress,
22-
DecompressParams(..),
23-
defaultDecompressParams,
2418

25-
-- * Incremental compression
19+
-- * Monadic incremental interface
20+
-- $incremental-compression
21+
22+
-- ** Using incremental compression
23+
-- $using-incremental-compression
24+
25+
CompressStream(..),
2626
compressST,
2727
compressIO,
28-
CompressStream(..),
2928
foldCompressStream,
3029
foldCompressStreamWithInput,
3130

32-
-- * Incremental decompression
33-
decompressST,
34-
decompressIO,
31+
-- ** Using incremental decompression
32+
-- $using-incremental-decompression
33+
3534
DecompressStream(..),
3635
DecompressError(..),
36+
decompressST,
37+
decompressIO,
3738
foldDecompressStream,
3839
foldDecompressStreamWithInput,
3940

4041
-- * The compression parameter types
42+
CompressParams(..),
43+
defaultCompressParams,
44+
DecompressParams(..),
45+
defaultDecompressParams,
4146
Stream.Format(..),
4247
Stream.gzipFormat,
4348
Stream.zlibFormat,
@@ -156,21 +161,23 @@ defaultCompressBufferSize, defaultDecompressBufferSize :: Int
156161
defaultCompressBufferSize = 16 * 1024 - L.chunkOverhead
157162
defaultDecompressBufferSize = 32 * 1024 - L.chunkOverhead
158163

159-
-- | A sequence of chunks of data produced from decompression.
160-
--
161-
-- The difference from a simple list is that it contains a representation of
162-
-- errors as data rather than as exceptions. This allows you to handle error
163-
-- conditions explicitly.
164+
-- | The unfolding of the decompression process, where you provide a sequence
165+
-- of compressed data chunks as input and receive a sequence of uncompressed
166+
-- data chunks as output. The process is incremental, in that the demand for
167+
-- input and provision of output are interleaved.
164168
--
165169
data DecompressStream m
166170
= DecompressInputRequired (S.ByteString -> m (DecompressStream m))
167171
| DecompressOutputAvailable S.ByteString (m (DecompressStream m))
172+
-- | Includes any trailing unconsumed /input/ data.
168173
| DecompressStreamEnd S.ByteString
169-
-- | An error code and a human readable error message.
174+
-- | An error code
170175
| DecompressStreamError DecompressError
171176

172177
-- | The possible error cases when decompressing a stream.
173178
--
179+
-- This can be 'show'n to give a human readable error message.
180+
--
174181
data DecompressError =
175182
-- | The compressed data stream ended prematurely. This may happen if the
176183
-- input data stream was truncated.
@@ -206,6 +213,11 @@ modprefix = ("Codec.Compression.Zlib: " ++)
206213

207214
instance Exception DecompressError
208215

216+
-- | A fold over the 'DecompressStream' in the given monad.
217+
--
218+
-- One way to look at this is that it runs the stream, using callback functions
219+
-- for the four stream events.
220+
--
209221
foldDecompressStream :: Monad m
210222
=> ((S.ByteString -> m a) -> m a)
211223
-> (S.ByteString -> m a -> m a)
@@ -223,6 +235,15 @@ foldDecompressStream input output end err = fold
223235
fold (DecompressStreamEnd inchunk) = end inchunk
224236
fold (DecompressStreamError derr) = err derr
225237

238+
-- | A variant on 'foldCompressStream' that is pure rather than operating in a
239+
-- monad and where the input is provided by a lazy 'L.ByteString'. So we only
240+
-- have to deal with the output, end and error parts, making it like a foldr on
241+
-- a list of output chunks.
242+
--
243+
-- For example:
244+
--
245+
-- > toChunks = foldDecompressStreamWithInput (:) [] throw
246+
--
226247
foldDecompressStreamWithInput :: (S.ByteString -> a -> a)
227248
-> (L.ByteString -> a)
228249
-> (DecompressError -> a)
@@ -249,11 +270,90 @@ foldDecompressStreamWithInput chunk end err = \s lbs ->
249270
return $ err derr
250271

251272

273+
-- $incremental-compression
274+
-- The pure 'compress' and 'decompress' functions are streaming in the sense
275+
-- that they can produce output without demanding all input, however they need
276+
-- the input data stream as a lazy 'L.ByteString'. Having the input data
277+
-- stream as a lazy 'L.ByteString' often requires using lazy I\/O which is not
278+
-- appropriate in all cicumstances.
279+
--
280+
-- For these cases an incremental interface is more appropriate. This interface
281+
-- allows both incremental input and output. Chunks of input data to be
282+
-- supplied one by one (e.g. as they are obtained from an input source like a
283+
-- file or network source). Output is also produced chunk by chunk.
284+
--
285+
-- The incremental input and output is managed via the 'CompressStream' and
286+
-- 'DecompressStream' types. They represents the unfolding of the process of
287+
-- compressing and decompressing. They operates in either the 'ST' or 'IO'
288+
-- monads. They can be lifted into other incremental abstractions like pipes or
289+
-- conduits, or they can be used directly in the following style.
290+
291+
-- $using-incremental-compression
292+
--
293+
-- In a loop:
294+
--
295+
-- * Inspect the status of the stream
296+
--
297+
-- * When it is 'CompressInputRequired' then you should call the action,
298+
-- passing a chunk of input (or 'BS.empty' when no more input is available)
299+
-- to get the next state of the stream and continue the loop.
300+
--
301+
-- * When it is 'CompressOutputAvailable' then do something with the given
302+
-- chunk of output, and call the action to get the next state of the stream
303+
-- and continue the loop.
304+
--
305+
-- * When it is 'CompressStreamEnd' then terminate the loop.
306+
--
307+
-- Note that you cannot stop as soon as you have no more input, you need to
308+
-- carry on until all the output has been collected, i.e. until you get to
309+
-- 'CompressStreamEnd'.
310+
--
311+
-- Here is an example where we get input from one file handle and send the
312+
-- compressed output to another file handle.
313+
--
314+
-- > go :: Handle -> Handle -> CompressStream IO -> IO ()
315+
-- > go inh outh (CompressInputRequired next) = do
316+
-- > inchunk <- BS.hGet inh 4096
317+
-- > go inh outh =<< next inchunk
318+
-- > go inh outh (CompressOutputAvailable outchunk next) =
319+
-- > BS.hPut outh outchunk
320+
-- > go inh outh =<< next
321+
-- > go _ _ CompressStreamEnd = return ()
322+
--
323+
-- The same can be achieved with 'foldCompressStream':
324+
--
325+
-- > foldCompressStream
326+
-- > (\next -> do inchunk <- BS.hGet inh 4096; next inchunk)
327+
-- > (\outchunk next -> do BS.hPut outh outchunk; next)
328+
-- > (return ())
329+
330+
-- $using-incremental-decompression
331+
--
332+
-- The use of 'DecompressStream' is very similar to 'CompressStream' but with
333+
-- a few differences:
334+
--
335+
-- * There is the extra possibility of a 'DecompressStreamError'
336+
--
337+
-- * There can be extra trailing data after a compressed stream, and the
338+
-- 'DecompressStreamEnd' includes that.
339+
--
340+
-- Otherwise the same loop style applies, and there are fold functions.
341+
342+
-- | The unfolding of the compression process, where you provide a sequence
343+
-- of uncompressed data chunks as input and receive a sequence of compressed
344+
-- data chunks as output. The process is incremental, in that the demand for
345+
-- input and provision of output are interleaved.
346+
--
252347
data CompressStream m
253348
= CompressInputRequired (S.ByteString -> m (CompressStream m))
254349
| CompressOutputAvailable S.ByteString (m (CompressStream m))
255350
| CompressStreamEnd
256351

352+
-- | A fold over the 'CompressStream' in the given monad.
353+
--
354+
-- One way to look at this is that it runs the stream, using callback functions
355+
-- for the three stream events.
356+
--
257357
foldCompressStream :: Monad m
258358
=> ((S.ByteString -> m a) -> m a)
259359
-> (S.ByteString -> m a -> m a)
@@ -270,6 +370,15 @@ foldCompressStream input output end = fold
270370
fold CompressStreamEnd =
271371
end
272372

373+
-- | A variant on 'foldCompressStream' that is pure rather than operating in a
374+
-- monad and where the input is provided by a lazy 'L.ByteString'. So we only
375+
-- have to deal with the output and end parts, making it just like a foldr on a
376+
-- list of output chunks.
377+
--
378+
-- For example:
379+
--
380+
-- > toChunks = foldCompressStreamWithInput (:) []
381+
--
273382
foldCompressStreamWithInput :: (S.ByteString -> a -> a)
274383
-> a
275384
-> (forall s. CompressStream (ST s))
@@ -292,15 +401,22 @@ foldCompressStreamWithInput chunk end = \s lbs ->
292401
return end
293402

294403

295-
-- | Compress a data stream.
404+
-- | Compress a data stream provided as a lazy 'L.ByteString'.
296405
--
297406
-- There are no expected error conditions. All input data streams are valid. It
298407
-- is possible for unexpected errors to occur, such as running out of memory,
299408
-- or finding the wrong version of the zlib C library, these are thrown as
300409
-- exceptions.
301410
--
302411
compress :: Stream.Format -> CompressParams -> L.ByteString -> L.ByteString
412+
413+
-- | Incremental compression in the 'ST' monad. Using 'ST' makes it possible
414+
-- to write pure /lazy/ functions while making use of incremental compression.
415+
--
303416
compressST :: Stream.Format -> CompressParams -> CompressStream (ST s)
417+
418+
-- | Incremental compression in the 'IO' monad.
419+
--
304420
compressIO :: Stream.Format -> CompressParams -> CompressStream IO
305421

306422
compress format params = compressStreamToLBS (compressStream format params)
@@ -412,14 +528,21 @@ compressStream format (CompressParams compLevel method bits memLevel
412528
setDictionary _ = return ()
413529

414530

415-
-- | Decompress a data stream.
531+
-- | Decompress a data stream provided as a lazy 'L.ByteString'.
416532
--
417533
-- It will throw an exception if any error is encountered in the input data.
418534
-- If you need more control over error handling then use one the incremental
419535
-- versions, 'decompressST' or 'decompressIO'.
420536
--
421537
decompress :: Stream.Format -> DecompressParams -> L.ByteString -> L.ByteString
538+
539+
-- | Incremental decompression in the 'ST' monad. Using 'ST' makes it possible
540+
-- to write pure /lazy/ functions while making use of incremental decompression.
541+
--
422542
decompressST :: Stream.Format -> DecompressParams -> DecompressStream (ST s)
543+
544+
-- | Incremental decompression in the 'IO' monad.
545+
--
423546
decompressIO :: Stream.Format -> DecompressParams -> DecompressStream IO
424547

425548
decompress format params = decompressStreamToLBS (decompressStream format params)

0 commit comments

Comments
 (0)