From db3f4f33bb84eef16c32e9f75cf4acb6adcafedc Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Fri, 12 Sep 2025 22:49:49 +0530 Subject: [PATCH] Update the module, function docs, README --- README.md | 28 +++++++++++++----- src/Streamly/Compat/Text.hs | 25 ++++++++++++---- src/Streamly/Compat/Text/Lazy.hs | 50 ++++++++++++++++++++++---------- 3 files changed, 76 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index f4e8985..aaeb742 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,26 @@ # streamly-text -Library for streamly and text interoperation. +Efficient interoperability between +[streamly](https://hackage.haskell.org/package/streamly) arrays and +[text](https://hackage.haskell.org/package/text). -This library is to enable interoperation of streamly with existing code that -uses `Text`. +The strict `Text` type is equivalent to UTF-8 encoded `Array Word8` in Streamly +and lazy `Text` type is equivalent to a stream of `Array Word8`. -The package provides APIs to interconvert between strict `Text` and streamly -`Array Word8` and between lazy `Text` and stream of `Array Word8`. +A `Char` stream can be converted to UTF-8 encoded `Word8` stream using +`encodeUtf8` from `Streamly.Unicode.Stream` which in turn can be written as +`Array Word8`, and a stream of UTF-8 encoded `Word8` or `Array Word8` can be +decoded using `decodeUtf8` or `decodeUtf8Chunks`. -The interconversion in the case of strict `Text` and streamly `Array Word8` has -no overhead. +This library provides zero-overhead and streaming conversions between +the `Text` type and `streamly` Array types, making it easier to use +Array and Array stream based functions on `Text`. + +## Features + +- **Strict `Text` ↔ `Array Word8`** + Convert between strict `Text` and `streamly`’s `Word8` stream or + `Array Word8` without any overhead. + +- **Lazy `Text` ↔ Stream of `Array Word8`** + Convert between lazy `Text` and a stream of `Array Word8`. diff --git a/src/Streamly/Compat/Text.hs b/src/Streamly/Compat/Text.hs index 7ddca95..7945ef9 100644 --- a/src/Streamly/Compat/Text.hs +++ b/src/Streamly/Compat/Text.hs @@ -3,14 +3,29 @@ {-# LANGUAGE MagicHash #-} {-# LANGUAGE BangPatterns #-} +-- | Efficient interoperability between +-- arrays and +-- . +-- +-- The strict 'Text' type is equivalent to a UTF-8 encoded 'Array' 'Word8' in +-- streamly. A 'Char' stream can be converted to a UTF-8 encoded 'Word8' stream +-- using 'Streamly.Unicode.Stream.encodeUtf8', which in turn can +-- be written as 'Array' 'Word8'. A stream of UTF-8 encoded 'Word8' or +-- 'Array' 'Word8' can be decoded using 'Streamly.Unicode.Stream.decodeUtf8' or +-- 'Streamly.Unicode.Stream.decodeUtf8Chunks', respectively. +-- +-- This module provides zero-overhead conversion between strict 'Text' +-- and streamly’s 'Word8' streams or 'Array' 'Word8'. + module Streamly.Compat.Text - ( toArray - , unsafeFromArray + ( + -- * Construction + unsafeFromArray + , unsafeCreate + -- * Elimination + , toArray , reader - - -- , unsafeCreateOf - , unsafeCreate ) where diff --git a/src/Streamly/Compat/Text/Lazy.hs b/src/Streamly/Compat/Text/Lazy.hs index cf4995f..c9cd708 100644 --- a/src/Streamly/Compat/Text/Lazy.hs +++ b/src/Streamly/Compat/Text/Lazy.hs @@ -1,12 +1,29 @@ {-# LANGUAGE CPP #-} +-- | Efficient interoperability between +-- arrays and +-- . +-- +-- The lazy 'Text' type is equivalent to a UTF-8 encoded stream of 'Array +-- Word8' in streamly. A 'Char' stream can be converted to a UTF-8 encoded +-- 'Word8' stream using 'Streamly.Unicode.Stream.encodeUtf8', which in turn can +-- be written as 'Array' 'Word8'. A stream of UTF-8 encoded 'Word8' or 'Array' +-- 'Word8' can be decoded using 'Streamly.Unicode.Stream.decodeUtf8' or +-- 'Streamly.Unicode.Stream.decodeUtf8Chunks', respectively. +-- +-- This module provides zero-overhead conversion between lazy 'Text' and +-- streamly’s 'Array Word8' or 'Word8' streams. + module Streamly.Compat.Text.Lazy - ( chunkReader - , reader + ( + -- * Construction + unsafeFromChunksIO + , unsafeFromChunks + -- * Elimination + , reader , toChunks - , unsafeFromChunks - , unsafeFromChunksIO + , chunkReader ) where @@ -33,7 +50,7 @@ import Prelude hiding (read) #define UNFOLD_EACH Unfold.many #endif --- | Unfold a lazy 'Text' to a stream of 'Array' 'Words'. +-- | Unfold a lazy 'Text' to a stream of 'Array Word8'. {-# INLINE chunkReader #-} chunkReader :: Monad m => Unfold m Text (Array Word8) chunkReader = Unfold step seed @@ -42,26 +59,25 @@ chunkReader = Unfold step seed step (Chunk bs bl) = return $ Yield (Strict.toArray bs) bl step Empty = return Stop --- | Unfold a lazy 'Text' to a stream of Word8 +-- | Unfold a lazy 'Text' to a stream of 'Word8'. {-# INLINE reader #-} reader :: Monad m => Unfold m Text Word8 reader = UNFOLD_EACH Array.reader chunkReader -- XXX Should this be called readChunks? --- | Convert a lazy 'Text' to a serial stream of 'Array' 'Word8'. +-- | Convert a lazy 'Text' to a stream of 'Array Word8'. {-# INLINE toChunks #-} toChunks :: Monad m => Text -> Stream m (Array Word8) toChunks = Stream.unfold chunkReader --- | Convert a serial stream of 'Array' 'Word8' to a lazy 'Text'. +-- | IMPORTANT NOTE: This function is lazy only for lazy monads (e.g. +-- Identity). For strict monads (e.g. /IO/) it consumes the entire input before +-- generating the output. For /IO/ monad use 'unsafeFromChunksIO' instead. -- --- This function is unsafe: the caller must ensure that each 'Array' 'Word8' --- element in the stream is a valid UTF-8 encoding. +-- Convert a stream of 'Array' 'Word8' to a lazy 'Text'. -- --- IMPORTANT NOTE: This function is lazy only for lazy monads --- (e.g. Identity). For strict monads (e.g. /IO/) it consumes the entire input --- before generating the output. For /IO/ monad please use unsafeFromChunksIO --- instead. +-- Unsafe because the caller must ensure that each 'Array Word8' +-- in the stream is UTF-8 encoded and terminates at Char boundary. -- -- For strict monads like /IO/ you could create a newtype wrapper to make the -- monad bind operation lazy and lift the stream to that type using hoist, then @@ -80,6 +96,7 @@ toChunks = Stream.unfold chunkReader -- @ -- -- /unsafeFromChunks/ can then be used as, +-- -- @ -- {-# INLINE unsafeFromChunksIO #-} -- unsafeFromChunksIO :: Stream IO (Array Word8) -> IO Text @@ -89,8 +106,11 @@ toChunks = Stream.unfold chunkReader unsafeFromChunks :: Monad m => Stream m (Array Word8) -> m Text unsafeFromChunks = Stream.foldr chunk Empty . fmap Strict.unsafeFromArray --- | Convert a serial stream of 'Array' 'Word8' to a lazy 'Text' in the +-- | Convert a stream of 'Array Word8' to a lazy 'Text' in the -- /IO/ monad. +-- +-- Unsafe because the caller must ensure that each 'Array Word8' +-- in the stream is UTF-8 encoded and terminates at Char boundary. {-# INLINE unsafeFromChunksIO #-} unsafeFromChunksIO :: Stream IO (Array Word8) -> IO Text unsafeFromChunksIO =