Skip to content

Commit 1286afd

Browse files
Add lax unicode en/decoding and raw show/read for Path
1 parent f6e8907 commit 1286afd

File tree

1 file changed

+34
-14
lines changed

1 file changed

+34
-14
lines changed

core/src/Streamly/Internal/FileSystem/PosixPath.hs

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#define WORD_TYPE Word16
88
#define UNICODE_ENCODER encodeUtf16le'
99
#define UNICODE_DECODER decodeUtf16le'
10+
#define UNICODE_DECODER_LAX decodeUtf16le
1011
#define CODEC_NAME UTF-16LE
1112
#define SEPARATORS @/, \\@
1213
#else
@@ -15,6 +16,7 @@
1516
#define WORD_TYPE Word8
1617
#define UNICODE_ENCODER encodeUtf8'
1718
#define UNICODE_DECODER decodeUtf8'
19+
#define UNICODE_DECODER_LAX decodeUtf8
1820
#define CODEC_NAME UTF-8
1921
#define SEPARATORS @/@
2022
#endif
@@ -54,6 +56,7 @@ module Streamly.Internal.FileSystem.OS_PATH
5456
, unsafeFromString
5557
-- , fromCString#
5658
-- , fromW16CString#
59+
, readRaw
5760

5861
-- * Statically Verified String Literals
5962
-- | Quasiquoters.
@@ -71,9 +74,12 @@ module Streamly.Internal.FileSystem.OS_PATH
7174
-- * Elimination
7275
, toChunk
7376
, toChars
77+
, toChars_
7478
, toString
7579
-- , toCString
7680
-- , toW16CString
81+
, toString_
82+
, showRaw
7783

7884
-- * Separators
7985
-- Do we need to export the separator functions? They are not essential if
@@ -115,9 +121,7 @@ where
115121
import Control.Monad.Catch (MonadThrow(..))
116122
import Data.Bifunctor (bimap)
117123
import Data.Functor.Identity (Identity(..))
118-
#ifdef DEBUG
119124
import Data.Maybe (fromJust)
120-
#endif
121125
import Data.Word (Word8)
122126
#if defined(IS_WINDOWS)
123127
import Data.Word (Word16)
@@ -173,18 +177,6 @@ For APIs that have not been released yet.
173177
-- the user may not have the same bytes as later returned by the file system.
174178
newtype OS_PATH = OS_PATH (Array WORD_TYPE)
175179

176-
-- Show instance is not provided because Show and Read should be inverses but
177-
-- we cannot ensure that as the path encoding may depend on the OS or the
178-
-- file system. We can print the byte values though but that won't be very
179-
-- useful. If we do not care about Show and Read being striclty faithful
180-
-- inverses we can use the default encoding/decoding to implement them.
181-
-- Otherwise we can just use toString, fromString for Show and Read purposes.
182-
--
183-
{-
184-
instance Show OS_PATH where
185-
show (OS_PATH x) = show x
186-
-}
187-
188180
-- XXX The Eq instance may be provided but it will require some sensible
189181
-- defaults for comparison. For example, should we use case sensitive or
190182
-- insensitive comparison? It depends on the underlying file system. For now
@@ -356,12 +348,40 @@ toChars p =
356348
let (OS_PATH arr) =
357349
toPath p in Common.toChars Unicode.UNICODE_DECODER arr
358350

351+
-- | Decode the path to a stream of Unicode chars using lax CODEC_NAME decoding.
352+
toChars_ :: (Monad m, IsPath OS_PATH a) => a -> Stream m Char
353+
toChars_ p =
354+
let (OS_PATH arr) =
355+
toPath p in Common.toChars Unicode.UNICODE_DECODER_LAX arr
356+
359357
-- XXX When showing, append a "/" to dir types?
360358

361359
-- | Decode the path to a Unicode string using strict CODEC_NAME decoding.
362360
toString :: IsPath OS_PATH a => a -> [Char]
363361
toString = runIdentity . Stream.toList . toChars
364362

363+
-- | Decode the path to a Unicode string using strict CODEC_NAME decoding.
364+
toString_ :: IsPath OS_PATH a => a -> [Char]
365+
toString_ = runIdentity . Stream.toList . toChars_
366+
367+
showRaw :: IsPath OS_PATH a => a -> [Char]
368+
showRaw p =
369+
let (OS_PATH arr) =
370+
toPath p in show arr
371+
372+
readRaw :: IsPath OS_PATH a => [Char] -> a
373+
readRaw = fromJust . fromChunk . read
374+
375+
-- We cannot show decoded path in the Show instance as it may not always
376+
-- succeed and it depends on the encoding which we may not even know. The
377+
-- encoding may depend on the OS and the file system. Also we need Show and
378+
-- Read to be inverses. The best we can provide is to show the bytes as
379+
-- Hex or decimal values.
380+
{-
381+
instance Show OS_PATH where
382+
show (OS_PATH x) = show x
383+
-}
384+
365385
------------------------------------------------------------------------------
366386
-- Operations on Path
367387
------------------------------------------------------------------------------

0 commit comments

Comments
 (0)