12
12
-----------------------------------------------------------------------------
13
13
module Codec.Compression.Zlib.Internal (
14
14
15
- -- * Compression
15
+ -- * Pure interface
16
16
compress ,
17
- CompressParams (.. ),
18
- defaultCompressParams ,
19
-
20
- -- * Decompression
21
17
decompress ,
22
- DecompressParams (.. ),
23
- defaultDecompressParams ,
24
18
25
- -- * Incremental compression
19
+ -- * Monadic incremental interface
20
+ -- $incremental-compression
21
+
22
+ -- ** Using incremental compression
23
+ -- $using-incremental-compression
24
+
25
+ CompressStream (.. ),
26
26
compressST ,
27
27
compressIO ,
28
- CompressStream (.. ),
29
28
foldCompressStream ,
30
29
foldCompressStreamWithInput ,
31
30
32
- -- * Incremental decompression
33
- decompressST ,
34
- decompressIO ,
31
+ -- ** Using incremental decompression
32
+ -- $using-incremental-decompression
33
+
35
34
DecompressStream (.. ),
36
35
DecompressError (.. ),
36
+ decompressST ,
37
+ decompressIO ,
37
38
foldDecompressStream ,
38
39
foldDecompressStreamWithInput ,
39
40
40
41
-- * The compression parameter types
42
+ CompressParams (.. ),
43
+ defaultCompressParams ,
44
+ DecompressParams (.. ),
45
+ defaultDecompressParams ,
41
46
Stream. Format (.. ),
42
47
Stream. gzipFormat ,
43
48
Stream. zlibFormat ,
@@ -156,21 +161,23 @@ defaultCompressBufferSize, defaultDecompressBufferSize :: Int
156
161
defaultCompressBufferSize = 16 * 1024 - L. chunkOverhead
157
162
defaultDecompressBufferSize = 32 * 1024 - L. chunkOverhead
158
163
159
- -- | A sequence of chunks of data produced from decompression.
160
- --
161
- -- The difference from a simple list is that it contains a representation of
162
- -- errors as data rather than as exceptions. This allows you to handle error
163
- -- conditions explicitly.
164
+ -- | The unfolding of the decompression process, where you provide a sequence
165
+ -- of compressed data chunks as input and receive a sequence of uncompressed
166
+ -- data chunks as output. The process is incremental, in that the demand for
167
+ -- input and provision of output are interleaved.
164
168
--
165
169
data DecompressStream m
166
170
= DecompressInputRequired (S. ByteString -> m (DecompressStream m ))
167
171
| DecompressOutputAvailable S. ByteString (m (DecompressStream m ))
172
+ -- | Includes any trailing unconsumed /input/ data.
168
173
| DecompressStreamEnd S. ByteString
169
- -- | An error code and a human readable error message.
174
+ -- | An error code
170
175
| DecompressStreamError DecompressError
171
176
172
177
-- | The possible error cases when decompressing a stream.
173
178
--
179
+ -- This can be 'show'n to give a human readable error message.
180
+ --
174
181
data DecompressError =
175
182
-- | The compressed data stream ended prematurely. This may happen if the
176
183
-- input data stream was truncated.
@@ -206,6 +213,11 @@ modprefix = ("Codec.Compression.Zlib: " ++)
206
213
207
214
instance Exception DecompressError
208
215
216
+ -- | A fold over the 'DecompressStream' in the given monad.
217
+ --
218
+ -- One way to look at this is that it runs the stream, using callback functions
219
+ -- for the four stream events.
220
+ --
209
221
foldDecompressStream :: Monad m
210
222
=> ((S. ByteString -> m a ) -> m a )
211
223
-> (S. ByteString -> m a -> m a )
@@ -223,6 +235,15 @@ foldDecompressStream input output end err = fold
223
235
fold (DecompressStreamEnd inchunk) = end inchunk
224
236
fold (DecompressStreamError derr) = err derr
225
237
238
+ -- | A variant on 'foldCompressStream' that is pure rather than operating in a
239
+ -- monad and where the input is provided by a lazy 'L.ByteString'. So we only
240
+ -- have to deal with the output, end and error parts, making it like a foldr on
241
+ -- a list of output chunks.
242
+ --
243
+ -- For example:
244
+ --
245
+ -- > toChunks = foldDecompressStreamWithInput (:) [] throw
246
+ --
226
247
foldDecompressStreamWithInput :: (S. ByteString -> a -> a )
227
248
-> (L. ByteString -> a )
228
249
-> (DecompressError -> a )
@@ -249,11 +270,90 @@ foldDecompressStreamWithInput chunk end err = \s lbs ->
249
270
return $ err derr
250
271
251
272
273
+ -- $incremental-compression
274
+ -- The pure 'compress' and 'decompress' functions are streaming in the sense
275
+ -- that they can produce output without demanding all input, however they need
276
+ -- the input data stream as a lazy 'L.ByteString'. Having the input data
277
+ -- stream as a lazy 'L.ByteString' often requires using lazy I\/O which is not
278
+ -- appropriate in all cicumstances.
279
+ --
280
+ -- For these cases an incremental interface is more appropriate. This interface
281
+ -- allows both incremental input and output. Chunks of input data to be
282
+ -- supplied one by one (e.g. as they are obtained from an input source like a
283
+ -- file or network source). Output is also produced chunk by chunk.
284
+ --
285
+ -- The incremental input and output is managed via the 'CompressStream' and
286
+ -- 'DecompressStream' types. They represents the unfolding of the process of
287
+ -- compressing and decompressing. They operates in either the 'ST' or 'IO'
288
+ -- monads. They can be lifted into other incremental abstractions like pipes or
289
+ -- conduits, or they can be used directly in the following style.
290
+
291
+ -- $using-incremental-compression
292
+ --
293
+ -- In a loop:
294
+ --
295
+ -- * Inspect the status of the stream
296
+ --
297
+ -- * When it is 'CompressInputRequired' then you should call the action,
298
+ -- passing a chunk of input (or 'BS.empty' when no more input is available)
299
+ -- to get the next state of the stream and continue the loop.
300
+ --
301
+ -- * When it is 'CompressOutputAvailable' then do something with the given
302
+ -- chunk of output, and call the action to get the next state of the stream
303
+ -- and continue the loop.
304
+ --
305
+ -- * When it is 'CompressStreamEnd' then terminate the loop.
306
+ --
307
+ -- Note that you cannot stop as soon as you have no more input, you need to
308
+ -- carry on until all the output has been collected, i.e. until you get to
309
+ -- 'CompressStreamEnd'.
310
+ --
311
+ -- Here is an example where we get input from one file handle and send the
312
+ -- compressed output to another file handle.
313
+ --
314
+ -- > go :: Handle -> Handle -> CompressStream IO -> IO ()
315
+ -- > go inh outh (CompressInputRequired next) = do
316
+ -- > inchunk <- BS.hGet inh 4096
317
+ -- > go inh outh =<< next inchunk
318
+ -- > go inh outh (CompressOutputAvailable outchunk next) =
319
+ -- > BS.hPut outh outchunk
320
+ -- > go inh outh =<< next
321
+ -- > go _ _ CompressStreamEnd = return ()
322
+ --
323
+ -- The same can be achieved with 'foldCompressStream':
324
+ --
325
+ -- > foldCompressStream
326
+ -- > (\next -> do inchunk <- BS.hGet inh 4096; next inchunk)
327
+ -- > (\outchunk next -> do BS.hPut outh outchunk; next)
328
+ -- > (return ())
329
+
330
+ -- $using-incremental-decompression
331
+ --
332
+ -- The use of 'DecompressStream' is very similar to 'CompressStream' but with
333
+ -- a few differences:
334
+ --
335
+ -- * There is the extra possibility of a 'DecompressStreamError'
336
+ --
337
+ -- * There can be extra trailing data after a compressed stream, and the
338
+ -- 'DecompressStreamEnd' includes that.
339
+ --
340
+ -- Otherwise the same loop style applies, and there are fold functions.
341
+
342
+ -- | The unfolding of the compression process, where you provide a sequence
343
+ -- of uncompressed data chunks as input and receive a sequence of compressed
344
+ -- data chunks as output. The process is incremental, in that the demand for
345
+ -- input and provision of output are interleaved.
346
+ --
252
347
data CompressStream m
253
348
= CompressInputRequired (S. ByteString -> m (CompressStream m ))
254
349
| CompressOutputAvailable S. ByteString (m (CompressStream m ))
255
350
| CompressStreamEnd
256
351
352
+ -- | A fold over the 'CompressStream' in the given monad.
353
+ --
354
+ -- One way to look at this is that it runs the stream, using callback functions
355
+ -- for the three stream events.
356
+ --
257
357
foldCompressStream :: Monad m
258
358
=> ((S. ByteString -> m a ) -> m a )
259
359
-> (S. ByteString -> m a -> m a )
@@ -270,6 +370,15 @@ foldCompressStream input output end = fold
270
370
fold CompressStreamEnd =
271
371
end
272
372
373
+ -- | A variant on 'foldCompressStream' that is pure rather than operating in a
374
+ -- monad and where the input is provided by a lazy 'L.ByteString'. So we only
375
+ -- have to deal with the output and end parts, making it just like a foldr on a
376
+ -- list of output chunks.
377
+ --
378
+ -- For example:
379
+ --
380
+ -- > toChunks = foldCompressStreamWithInput (:) []
381
+ --
273
382
foldCompressStreamWithInput :: (S. ByteString -> a -> a )
274
383
-> a
275
384
-> (forall s . CompressStream (ST s ))
@@ -292,15 +401,22 @@ foldCompressStreamWithInput chunk end = \s lbs ->
292
401
return end
293
402
294
403
295
- -- | Compress a data stream.
404
+ -- | Compress a data stream provided as a lazy 'L.ByteString' .
296
405
--
297
406
-- There are no expected error conditions. All input data streams are valid. It
298
407
-- is possible for unexpected errors to occur, such as running out of memory,
299
408
-- or finding the wrong version of the zlib C library, these are thrown as
300
409
-- exceptions.
301
410
--
302
411
compress :: Stream. Format -> CompressParams -> L. ByteString -> L. ByteString
412
+
413
+ -- | Incremental compression in the 'ST' monad. Using 'ST' makes it possible
414
+ -- to write pure /lazy/ functions while making use of incremental compression.
415
+ --
303
416
compressST :: Stream. Format -> CompressParams -> CompressStream (ST s )
417
+
418
+ -- | Incremental compression in the 'IO' monad.
419
+ --
304
420
compressIO :: Stream. Format -> CompressParams -> CompressStream IO
305
421
306
422
compress format params = compressStreamToLBS (compressStream format params)
@@ -412,14 +528,21 @@ compressStream format (CompressParams compLevel method bits memLevel
412
528
setDictionary _ = return ()
413
529
414
530
415
- -- | Decompress a data stream.
531
+ -- | Decompress a data stream provided as a lazy 'L.ByteString' .
416
532
--
417
533
-- It will throw an exception if any error is encountered in the input data.
418
534
-- If you need more control over error handling then use one the incremental
419
535
-- versions, 'decompressST' or 'decompressIO'.
420
536
--
421
537
decompress :: Stream. Format -> DecompressParams -> L. ByteString -> L. ByteString
538
+
539
+ -- | Incremental decompression in the 'ST' monad. Using 'ST' makes it possible
540
+ -- to write pure /lazy/ functions while making use of incremental decompression.
541
+ --
422
542
decompressST :: Stream. Format -> DecompressParams -> DecompressStream (ST s )
543
+
544
+ -- | Incremental decompression in the 'IO' monad.
545
+ --
423
546
decompressIO :: Stream. Format -> DecompressParams -> DecompressStream IO
424
547
425
548
decompress format params = decompressStreamToLBS (decompressStream format params)
0 commit comments