Improve documentation of the incremental interface

dcoutts · dcoutts · commit e668dbd0f109 · 2014-11-23T18:45:46.000Z
diff --git a/Codec/Compression/Zlib/Internal.hs b/Codec/Compression/Zlib/Internal.hs
@@ -12,32 +12,37 @@
 -----------------------------------------------------------------------------
 module Codec.Compression.Zlib.Internal (
 
-  -- * Compression
+  -- * Pure interface
   compress,
-  CompressParams(..),
-  defaultCompressParams,
-
-  -- * Decompression
   decompress,
-  DecompressParams(..),
-  defaultDecompressParams,
 
-  -- * Incremental compression
+  -- * Monadic incremental interface
+  -- $incremental-compression
+
+  -- ** Using incremental compression
+  -- $using-incremental-compression
+
+  CompressStream(..),
   compressST,
   compressIO,
-  CompressStream(..),
   foldCompressStream,
   foldCompressStreamWithInput,
 
-  -- * Incremental decompression
-  decompressST,
-  decompressIO,
+  -- ** Using incremental decompression
+  -- $using-incremental-decompression
+
   DecompressStream(..),
   DecompressError(..),
+  decompressST,
+  decompressIO,
   foldDecompressStream,
   foldDecompressStreamWithInput,
 
   -- * The compression parameter types
+  CompressParams(..),
+  defaultCompressParams,
+  DecompressParams(..),
+  defaultDecompressParams,
   Stream.Format(..),
     Stream.gzipFormat,
     Stream.zlibFormat,
@@ -156,21 +161,23 @@ defaultCompressBufferSize, defaultDecompressBufferSize :: Int
 defaultCompressBufferSize   = 16 * 1024 - L.chunkOverhead
 defaultDecompressBufferSize = 32 * 1024 - L.chunkOverhead
 
--- | A sequence of chunks of data produced from decompression.
---
--- The difference from a simple list is that it contains a representation of
--- errors as data rather than as exceptions. This allows you to handle error
--- conditions explicitly.
+-- | The unfolding of the decompression process, where you provide a sequence
+-- of compressed data chunks as input and receive a sequence of uncompressed
+-- data chunks as output. The process is incremental, in that the demand for
+-- input and provision of output are interleaved.
 --
 data DecompressStream m
    = DecompressInputRequired  (S.ByteString -> m (DecompressStream m))
    | DecompressOutputAvailable S.ByteString (m (DecompressStream m))
+   -- | Includes any trailing unconsumed /input/ data.
    | DecompressStreamEnd S.ByteString
-   -- | An error code and a human readable error message.
+   -- | An error code
    | DecompressStreamError DecompressError
 
 -- | The possible error cases when decompressing a stream.
 --
+-- This can be 'show'n to give a human readable error message.
+--
 data DecompressError =
      -- | The compressed data stream ended prematurely. This may happen if the
      -- input data stream was truncated.
@@ -206,6 +213,11 @@ modprefix = ("Codec.Compression.Zlib: " ++)
 
 instance Exception DecompressError
 
+-- | A fold over the 'DecompressStream' in the given monad.
+--
+-- One way to look at this is that it runs the stream, using callback functions
+-- for the four stream events.
+--
 foldDecompressStream :: Monad m
                      => ((S.ByteString -> m a) -> m a)
                      -> (S.ByteString -> m a -> m a)
@@ -223,6 +235,15 @@ foldDecompressStream input output end err = fold
     fold (DecompressStreamEnd inchunk) = end inchunk
     fold (DecompressStreamError derr)  = err derr
 
+-- | A variant on 'foldCompressStream' that is pure rather than operating in a
+-- monad and where the input is provided by a lazy 'L.ByteString'. So we only
+-- have to deal with the output, end and error parts, making it like a foldr on
+-- a list of output chunks.
+--
+-- For example:
+--
+-- > toChunks = foldDecompressStreamWithInput (:) [] throw
+--
 foldDecompressStreamWithInput :: (S.ByteString -> a -> a)
                               -> (L.ByteString -> a)
                               -> (DecompressError -> a)
@@ -249,11 +270,90 @@ foldDecompressStreamWithInput chunk end err = \s lbs ->
       return $ err derr
 
 
+-- $incremental-compression
+-- The pure 'compress' and 'decompress' functions are streaming in the sense
+-- that they can produce output without demanding all input, however they need
+-- the input data stream as a lazy 'L.ByteString'. Having the input data
+-- stream as a lazy 'L.ByteString' often requires using lazy I\/O which is not
+-- appropriate in all cicumstances.
+--
+-- For these cases an incremental interface is more appropriate. This interface
+-- allows both incremental input and output. Chunks of input data to be
+-- supplied one by one (e.g. as they are obtained from an input source like a
+-- file or network source). Output is also produced chunk by chunk.
+--
+-- The incremental input and output is managed via the 'CompressStream' and
+-- 'DecompressStream' types. They represents the unfolding of the process of
+-- compressing and decompressing. They operates in either the 'ST' or 'IO'
+-- monads. They can be lifted into other incremental abstractions like pipes or
+-- conduits, or they can be used directly in the following style.
+
+-- $using-incremental-compression
+--
+-- In a loop:
+--
+--  * Inspect the status of the stream
+--
+--  * When it is 'CompressInputRequired' then you should call the action,
+--    passing a chunk of input (or 'BS.empty' when no more input is available)
+--    to get the next state of the stream and continue the loop.
+--
+--  * When it is 'CompressOutputAvailable' then do something with the given
+--    chunk of output, and call the action to get the next state of the stream
+--    and continue the loop.
+--
+--  * When it is 'CompressStreamEnd' then terminate the loop.
+--
+-- Note that you cannot stop as soon as you have no more input, you need to
+-- carry on until all the output has been collected, i.e. until you get to
+-- 'CompressStreamEnd'.
+--
+-- Here is an example where we get input from one file handle and send the
+-- compressed output to another file handle.
+--
+-- > go :: Handle -> Handle -> CompressStream IO -> IO ()
+-- > go inh outh (CompressInputRequired next) = do
+-- >    inchunk <- BS.hGet inh 4096
+-- >    go inh outh =<< next inchunk
+-- > go inh outh (CompressOutputAvailable outchunk next) =
+-- >    BS.hPut outh outchunk
+-- >    go inh outh =<< next
+-- > go _ _ CompressStreamEnd = return ()
+--
+-- The same can be achieved with 'foldCompressStream':
+--
+-- > foldCompressStream
+-- >   (\next -> do inchunk <- BS.hGet inh 4096; next inchunk)
+-- >   (\outchunk next -> do BS.hPut outh outchunk; next)
+-- >   (return ())
+
+-- $using-incremental-decompression
+--
+-- The use of 'DecompressStream' is very similar to 'CompressStream' but with
+-- a few differences:
+--
+-- * There is the extra possibility of a 'DecompressStreamError'
+--
+-- * There can be extra trailing data after a compressed stream, and the
+--   'DecompressStreamEnd' includes that.
+--
+-- Otherwise the same loop style applies, and there are fold functions.
+
+-- | The unfolding of the compression process, where you provide a sequence
+-- of uncompressed data chunks as input and receive a sequence of compressed
+-- data chunks as output. The process is incremental, in that the demand for
+-- input and provision of output are interleaved.
+--
 data CompressStream m
    = CompressInputRequired  (S.ByteString -> m (CompressStream m))
    | CompressOutputAvailable S.ByteString (m (CompressStream m))
    | CompressStreamEnd
 
+-- | A fold over the 'CompressStream' in the given monad.
+--
+-- One way to look at this is that it runs the stream, using callback functions
+-- for the three stream events.
+--
 foldCompressStream :: Monad m
                    => ((S.ByteString -> m a) -> m a)
                    -> (S.ByteString -> m a -> m a)
@@ -270,6 +370,15 @@ foldCompressStream input output end = fold
     fold CompressStreamEnd =
       end
 
+-- | A variant on 'foldCompressStream' that is pure rather than operating in a
+-- monad and where the input is provided by a lazy 'L.ByteString'. So we only
+-- have to deal with the output and end parts, making it just like a foldr on a
+-- list of output chunks.
+--
+-- For example:
+--
+-- > toChunks = foldCompressStreamWithInput (:) []
+--
 foldCompressStreamWithInput :: (S.ByteString -> a -> a)
                             -> a
                             -> (forall s. CompressStream (ST s))
@@ -292,15 +401,22 @@ foldCompressStreamWithInput chunk end = \s lbs ->
       return end
 
 
--- | Compress a data stream.
+-- | Compress a data stream provided as a lazy 'L.ByteString'.
 --
 -- There are no expected error conditions. All input data streams are valid. It
 -- is possible for unexpected errors to occur, such as running out of memory,
 -- or finding the wrong version of the zlib C library, these are thrown as
 -- exceptions.
 --
 compress   :: Stream.Format -> CompressParams -> L.ByteString -> L.ByteString
+
+-- | Incremental compression in the 'ST' monad. Using 'ST' makes it possible
+-- to write pure /lazy/ functions while making use of incremental compression.
+--
 compressST :: Stream.Format -> CompressParams -> CompressStream (ST s)
+
+-- | Incremental compression in the 'IO' monad.
+--
 compressIO :: Stream.Format -> CompressParams -> CompressStream IO
 
 compress   format params = compressStreamToLBS (compressStream format params)
@@ -412,14 +528,21 @@ compressStream format (CompressParams compLevel method bits memLevel
   setDictionary _ = return ()
 
 
--- | Decompress a data stream.
+-- | Decompress a data stream provided as a lazy 'L.ByteString'.
 --
 -- It will throw an exception if any error is encountered in the input data.
 -- If you need more control over error handling then use one the incremental
 -- versions, 'decompressST' or 'decompressIO'.
 --
 decompress   :: Stream.Format -> DecompressParams -> L.ByteString -> L.ByteString
+
+-- | Incremental decompression in the 'ST' monad. Using 'ST' makes it possible
+-- to write pure /lazy/ functions while making use of incremental decompression.
+--
 decompressST :: Stream.Format -> DecompressParams -> DecompressStream (ST s)
+
+-- | Incremental decompression in the 'IO' monad.
+--
 decompressIO :: Stream.Format -> DecompressParams -> DecompressStream IO
 
 decompress   format params = decompressStreamToLBS (decompressStream format params)