@@ -101,13 +101,35 @@ readChunkEof :: Handle__ -> CharBuffer -> IO (Text, Bool)
101101readChunkEof hh buf = do t <- readChunk hh buf
102102 return (t, False )
103103
104- -- | /Experimental./ Read a single chunk of strict text from a
104+ -- | Read a single chunk of strict text from a
105105-- 'Handle'. The size of the chunk depends on the amount of input
106106-- currently buffered.
107107--
108108-- This function blocks only if there is no data available, and EOF
109109-- has not yet been reached. Once EOF is reached, this function
110110-- returns an empty string instead of throwing an exception.
111+ --
112+ -- == Behavior
113+ --
114+ -- Unlike byte-oriented functions, 'hGetChunk' operates on complete UTF-8
115+ -- characters. Since UTF-8 characters can occupy 1 to 4 bytes, this function
116+ -- cannot guarantee reading an exact number of bytes. Instead, it reads
117+ -- complete characters up to the handle's internal buffer limit.
118+ --
119+ -- == Buffer Size
120+ --
121+ -- The maximum chunk size is determined by the handle's internal character
122+ -- buffer, which is set to 2048 characters (not bytes) by the GHC runtime
123+ -- constant @dEFAULT_CHAR_BUFFER_SIZE@. This buffer size cannot be modified
124+ -- through any public API.
125+ --
126+ -- == UTF-8 Considerations
127+ --
128+ -- When working with UTF-8 encoded text:
129+ --
130+ -- * The function will never return a partial character
131+ -- * The actual number of bytes read may vary depending on the character
132+ -- encoding (ASCII characters = 1 byte, other Unicode characters = 2-4 bytes)
111133hGetChunk :: Handle -> IO Text
112134hGetChunk h = wantReadableHandle " hGetChunk" h readSingleChunk
113135 where
0 commit comments