Skip to content

Commit 86749e9

Browse files
committed
Merge branch 'issue-19'
2 parents 6d31aaf + 6186487 commit 86749e9

File tree

7 files changed

+158
-14
lines changed

7 files changed

+158
-14
lines changed

System/OsString.hs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ module System.OsString
2424
, unsafeEncodeUtf
2525
, encodeWith
2626
, encodeFS
27+
, encodeLE
2728
, osstr
2829
, empty
2930
, singleton
@@ -33,6 +34,7 @@ module System.OsString
3334
, decodeUtf
3435
, decodeWith
3536
, decodeFS
37+
, decodeLE
3638
, unpack
3739

3840
-- * Word types
@@ -136,14 +138,14 @@ import System.OsString.Internal
136138
, encodeUtf
137139
, unsafeEncodeUtf
138140
, encodeWith
139-
, encodeFS
141+
, encodeLE
140142
, osstr
141143
, pack
142144
, empty
143145
, singleton
144146
, decodeUtf
145147
, decodeWith
146-
, decodeFS
148+
, decodeLE
147149
, unpack
148150
, snoc
149151
, cons
@@ -206,6 +208,38 @@ import System.OsString.Internal
206208
, findIndex
207209
, findIndices
208210
)
211+
import qualified System.OsString.Internal as SOI
209212
import System.OsString.Internal.Types
210213
( OsString, OsChar, coercionToPlatformTypes )
211-
import Prelude ()
214+
import Prelude (String, IO)
215+
216+
{-# DEPRECATED encodeFS "Use System.OsPath.encodeFS from filepath" #-}
217+
-- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem
218+
-- operations (usually filepaths), which is:
219+
--
220+
-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale,
221+
-- but PEP 383 only works properly on UTF-8 encodings, so good luck)
222+
-- 2. on windows does permissive UTF-16 encoding, where coding errors generate
223+
-- Chars in the surrogate range
224+
--
225+
-- Looking up the locale requires IO. If you're not worried about calls
226+
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
227+
-- to deeply evaluate the result to catch exceptions).
228+
encodeFS :: String -> IO OsString
229+
encodeFS = SOI.encodeFS
230+
231+
{-# DEPRECATED decodeFS "Use System.OsPath.encodeFS from filepath" #-}
232+
-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem
233+
-- operations (usually filepaths), which is:
234+
--
235+
-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale,
236+
-- but PEP 383 only works properly on UTF-8 encodings, so good luck)
237+
-- 2. on windows does permissive UTF-16 encoding, where coding errors generate
238+
-- Chars in the surrogate range
239+
--
240+
-- Looking up the locale requires IO. If you're not worried about calls
241+
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
242+
-- to deeply evaluate the result to catch exceptions).
243+
decodeFS :: OsString -> IO String
244+
decodeFS = SOI.decodeFS
245+

System/OsString/Common.hs

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ module System.OsString.MODULE_NAME
3333
, unsafeEncodeUtf
3434
, encodeWith
3535
, encodeFS
36+
, encodeLE
3637
, fromBytes
3738
, pstr
3839
, singleton
@@ -43,6 +44,7 @@ module System.OsString.MODULE_NAME
4344
, decodeUtf
4445
, decodeWith
4546
, decodeFS
47+
, decodeLE
4648
, unpack
4749

4850
-- * Word construction
@@ -242,14 +244,14 @@ encodeWith enc str = unsafePerformIO $ do
242244

243245
#ifdef WINDOWS_DOC
244246
-- | This mimics the behavior of the base library when doing filesystem
245-
-- operations, which does permissive UTF-16 encoding, where coding errors generate
247+
-- operations (usually filepaths), which does permissive UTF-16 encoding, where coding errors generate
246248
-- Chars in the surrogate range.
247249
--
248250
-- The reason this is in IO is because it unifies with the Posix counterpart,
249251
-- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'.
250252
#else
251253
-- | This mimics the behavior of the base library when doing filesystem
252-
-- operations, which uses shady PEP 383 style encoding (based on the current locale,
254+
-- operations (usually filepaths), which uses shady PEP 383 style encoding (based on the current locale,
253255
-- but PEP 383 only works properly on UTF-8 encodings, so good luck).
254256
--
255257
-- Looking up the locale requires IO. If you're not worried about calls
@@ -258,11 +260,35 @@ encodeWith enc str = unsafePerformIO $ do
258260
#endif
259261
encodeFS :: String -> IO PLATFORM_STRING
260262
#ifdef WINDOWS
263+
{-# DEPRECATED encodeFS "Use System.OsPath.Windows.encodeFS from filepath" #-}
261264
encodeFS = fmap WindowsString . encodeWithBaseWindows
262265
#else
266+
{-# DEPRECATED encodeFS "Use System.OsPath.Posix.encodeFS from filepath" #-}
263267
encodeFS = fmap PosixString . encodeWithBasePosix
264268
#endif
265269

270+
#ifdef WINDOWS_DOC
271+
-- | This mimics the behavior of the base library when doing string
272+
-- operations, which does permissive UTF-16 encoding, where coding errors generate
273+
-- Chars in the surrogate range.
274+
--
275+
-- The reason this is in IO is because it unifies with the Posix counterpart,
276+
-- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'.
277+
#else
278+
-- | This mimics the behavior of the base library when doing string
279+
-- operations, which uses 'getLocaleEncoding'.
280+
--
281+
-- Looking up the locale requires IO. If you're not worried about calls
282+
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
283+
-- to deeply evaluate the result to catch exceptions).
284+
#endif
285+
encodeLE :: String -> IO PLATFORM_STRING
286+
#ifdef WINDOWS
287+
encodeLE = fmap WindowsString . encodeWithBaseWindows
288+
#else
289+
encodeLE = fmap PosixString . encodeWithBasePosix'
290+
#endif
291+
266292

267293
#ifdef WINDOWS_DOC
268294
-- | Partial unicode friendly decoding.
@@ -317,7 +343,29 @@ decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do
317343
-- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however.
318344
#else
319345
-- | This mimics the behavior of the base library when doing filesystem
320-
-- operations, which uses shady PEP 383 style encoding (based on the current locale,
346+
-- operations, which uses 'getLocaleEncoding'.
347+
--
348+
-- Looking up the locale requires IO. If you're not worried about calls
349+
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
350+
-- to deeply evaluate the result to catch exceptions).
351+
#endif
352+
decodeLE :: PLATFORM_STRING -> IO String
353+
#ifdef WINDOWS
354+
decodeLE (WindowsString ba) = decodeWithBaseWindows ba
355+
#else
356+
decodeLE (PosixString ba) = decodeWithBasePosix' ba
357+
#endif
358+
359+
#ifdef WINDOWS_DOC
360+
-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem
361+
-- operations (usually filepaths), which does permissive UTF-16 encoding, where coding errors generate
362+
-- Chars in the surrogate range.
363+
--
364+
-- The reason this is in IO is because it unifies with the Posix counterpart,
365+
-- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however.
366+
#else
367+
-- | This mimics the behavior of the base library when doing filesystem
368+
-- operations (usually filepaths), which uses shady PEP 383 style encoding (based on the current locale,
321369
-- but PEP 383 only works properly on UTF-8 encodings, so good luck).
322370
--
323371
-- Looking up the locale requires IO. If you're not worried about calls
@@ -326,8 +374,10 @@ decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do
326374
#endif
327375
decodeFS :: PLATFORM_STRING -> IO String
328376
#ifdef WINDOWS
377+
{-# DEPRECATED decodeFS "Use System.OsPath.Windows.decodeFS from filepath" #-}
329378
decodeFS (WindowsString ba) = decodeWithBaseWindows ba
330379
#else
380+
{-# DEPRECATED decodeFS "Use System.OsPath.Posix.decodeFS from filepath" #-}
331381
decodeFS (PosixString ba) = decodeWithBasePosix ba
332382
#endif
333383

System/OsString/Encoding.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ module System.OsString.Encoding
2323
-- * base encoding
2424
, encodeWithBasePosix
2525
, decodeWithBasePosix
26+
, encodeWithBasePosix'
27+
, decodeWithBasePosix'
2628
, encodeWithBaseWindows
2729
, decodeWithBaseWindows
2830
)

System/OsString/Encoding/Internal.hs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import Numeric (showHex)
3131
import Foreign.C (CStringLen)
3232
import Data.Char (chr)
3333
import Foreign
34-
import GHC.IO.Encoding (getFileSystemEncoding)
34+
import GHC.IO.Encoding (getFileSystemEncoding, getLocaleEncoding)
3535

3636
-- -----------------------------------------------------------------------------
3737
-- UCS-2 LE
@@ -270,9 +270,15 @@ peekWindowsString (cp, l) = do
270270
withPosixString :: String -> (CStringLen -> IO a) -> IO a
271271
withPosixString fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f
272272

273+
withPosixString' :: String -> (CStringLen -> IO a) -> IO a
274+
withPosixString' fp f = getLocaleEncoding >>= \enc -> GHC.withCStringLen enc fp f
275+
273276
peekPosixString :: CStringLen -> IO String
274277
peekPosixString fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp
275278

279+
peekPosixString' :: CStringLen -> IO String
280+
peekPosixString' fp = getLocaleEncoding >>= \enc -> GHC.peekCStringLen enc fp
281+
276282
-- | Decode with the given 'TextEncoding'.
277283
decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String
278284
decodeWithTE enc ba = unsafePerformIO $ do
@@ -289,18 +295,30 @@ encodeWithTE enc str = unsafePerformIO $ do
289295
-- Encoders / decoders
290296
--
291297

292-
-- | This mimics the filepath decoder base uses on unix,
298+
-- | This mimics the filepath decoder base uses on unix (using PEP-383),
293299
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
294300
-- the outer FFI layer).
295301
decodeWithBasePosix :: BS8.ShortByteString -> IO String
296302
decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekPosixString fp
297303

298-
-- | This mimics the filepath dencoder base uses on unix,
304+
-- | This mimics the string decoder base uses on unix,
305+
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
306+
-- the outer FFI layer).
307+
decodeWithBasePosix' :: BS8.ShortByteString -> IO String
308+
decodeWithBasePosix' ba = BS8.useAsCStringLen ba $ \fp -> peekPosixString' fp
309+
310+
-- | This mimics the filepath encoder base uses on unix (using PEP-383),
299311
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
300312
-- the outer FFI layer).
301313
encodeWithBasePosix :: String -> IO BS8.ShortByteString
302314
encodeWithBasePosix str = withPosixString str $ \cstr -> BS8.packCStringLen cstr
303315

316+
-- | This mimics the string encoder base uses on unix,
317+
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
318+
-- the outer FFI layer).
319+
encodeWithBasePosix' :: String -> IO BS8.ShortByteString
320+
encodeWithBasePosix' str = withPosixString' str $ \cstr -> BS8.packCStringLen cstr
321+
304322
-- | This mimics the filepath decoder base uses on windows,
305323
-- with the small distinction that we're not truncating at NUL bytes (because we're not at
306324
-- the outer FFI layer).

System/OsString/Internal.hs

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@ import System.OsString.Encoding ( EncodingException(..) )
2626
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
2727
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
2828
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
29+
import System.OsString.Encoding ( encodeWithBaseWindows, decodeWithBaseWindows )
2930
import qualified System.OsString.Windows as PF
3031
#else
3132
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
33+
import System.OsString.Encoding ( encodeWithBasePosix, decodeWithBasePosix )
3234
import qualified System.OsString.Posix as PF
3335
#endif
3436
import GHC.Stack (HasCallStack)
@@ -71,7 +73,7 @@ encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str
7173
#endif
7274

7375
-- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem
74-
-- operations, which is:
76+
-- operations (usually filepaths), which is:
7577
--
7678
-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale,
7779
-- but PEP 383 only works properly on UTF-8 encodings, so good luck)
@@ -82,7 +84,24 @@ encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str
8284
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
8385
-- to deeply evaluate the result to catch exceptions).
8486
encodeFS :: String -> IO OsString
85-
encodeFS = fmap OsString . PF.encodeFS
87+
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
88+
encodeFS = fmap (OsString . WindowsString) . encodeWithBaseWindows
89+
#else
90+
encodeFS = fmap (OsString . PosixString) . encodeWithBasePosix
91+
#endif
92+
93+
-- | Like 'encodeUtf', except this mimics the behavior of the base library when doing string
94+
-- operations, which is:
95+
--
96+
-- 1. on unix this uses 'getLocaleEncoding'
97+
-- 2. on windows does permissive UTF-16 encoding, where coding errors generate
98+
-- Chars in the surrogate range
99+
--
100+
-- Looking up the locale requires IO. If you're not worried about calls
101+
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
102+
-- to deeply evaluate the result to catch exceptions).
103+
encodeLE :: String -> IO OsString
104+
encodeLE = fmap OsString . PF.encodeLE
86105

87106

88107
-- | Partial unicode friendly decoding.
@@ -110,7 +129,7 @@ decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x
110129

111130

112131
-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem
113-
-- operations, which is:
132+
-- operations (usually filepaths), which is:
114133
--
115134
-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale,
116135
-- but PEP 383 only works properly on UTF-8 encodings, so good luck)
@@ -121,7 +140,24 @@ decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x
121140
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
122141
-- to deeply evaluate the result to catch exceptions).
123142
decodeFS :: OsString -> IO String
124-
decodeFS (OsString x) = PF.decodeFS x
143+
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
144+
decodeFS (OsString (WindowsString x)) = decodeWithBaseWindows x
145+
#else
146+
decodeFS (OsString (PosixString x)) = decodeWithBasePosix x
147+
#endif
148+
149+
-- | Like 'decodeUtf', except this mimics the behavior of the base library when doing string operations,
150+
-- which is:
151+
--
152+
-- 1. on unix this uses 'getLocaleEncoding'
153+
-- 2. on windows does permissive UTF-16 encoding, where coding errors generate
154+
-- Chars in the surrogate range
155+
--
156+
-- Looking up the locale requires IO. If you're not worried about calls
157+
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
158+
-- to deeply evaluate the result to catch exceptions).
159+
decodeLE :: OsString -> IO String
160+
decodeLE (OsString x) = PF.decodeLE x
125161

126162

127163
-- | Constructs an @OsString@ from a ByteString.

changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog for [`os-string` package](http://hackage.haskell.org/package/os-string)
22

3+
## 2.0.5 *Jun 2024*
4+
5+
* Add `decodeLE`/`encodeLE` and deprecate `decodeFS`/`encodeFS` (pointing users to `System.OsPath` instead), fixes [#19](https://github.com/haskell/os-string/issues/19)
6+
37
## 2.0.4 *Jun 2024*
48

59
* Use TemplateHaskellQuotes rather than TemplateHaskell extension wrt [#21](https://github.com/haskell/os-string/issues/21)

os-string.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cabal-version: 2.2
22
name: os-string
3-
version: 2.0.4
3+
version: 2.0.5
44

55
-- NOTE: Don't forget to update ./changelog.md
66
license: BSD-3-Clause

0 commit comments

Comments
 (0)