Skip to content

Commit f9b1285

Browse files
committed
Add encoding tests to os-string
1 parent 935e13f commit f9b1285

File tree

4 files changed

+268
-0
lines changed

4 files changed

+268
-0
lines changed

os-string.cabal

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,25 @@ test-suite bytestring-tests
9191
, os-string
9292
, QuickCheck >=2.7 && <2.15
9393

94+
test-suite encoding-tests
95+
default-language: Haskell2010
96+
ghc-options: -Wall
97+
type: exitcode-stdio-1.0
98+
main-is: Main.hs
99+
hs-source-dirs: tests tests/encoding
100+
other-modules:
101+
Arbitrary
102+
EncodingSpec
103+
TestUtil
104+
105+
build-depends:
106+
, base
107+
, bytestring >=0.11.3.0
108+
, deepseq
109+
, os-string
110+
, QuickCheck >=2.7 && <2.15
111+
, quickcheck-classes-base ^>=0.6.2
112+
94113
benchmark bench
95114
main-is: Bench.hs
96115
other-modules: BenchOsString

tests/Arbitrary.hs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{-# OPTIONS_GHC -Wno-orphans #-}
2+
3+
module Arbitrary where
4+
5+
import Data.Char
6+
import Data.Maybe
7+
import System.OsString
8+
import System.OsString.Internal.Types
9+
import qualified System.OsString.Posix as Posix
10+
import qualified System.OsString.Windows as Windows
11+
import Data.ByteString ( ByteString )
12+
import qualified Data.ByteString as ByteString
13+
import Test.QuickCheck
14+
15+
16+
instance Arbitrary OsString where
17+
arbitrary = fmap fromJust $ encodeUtf <$> listOf filepathChar
18+
19+
instance Arbitrary PosixString where
20+
arbitrary = fmap fromJust $ Posix.encodeUtf <$> listOf filepathChar
21+
22+
instance Arbitrary WindowsString where
23+
arbitrary = fmap fromJust $ Windows.encodeUtf <$> listOf filepathChar
24+
25+
26+
newtype NonNullString = NonNullString { nonNullString :: String }
27+
deriving Show
28+
29+
instance Arbitrary NonNullString where
30+
arbitrary = NonNullString <$> listOf filepathChar
31+
32+
filepathChar :: Gen Char
33+
filepathChar = arbitraryUnicodeChar `suchThat` (\c -> not (isNull c) && isValidUnicode c)
34+
where
35+
isNull = (== '\NUL')
36+
isValidUnicode c = case generalCategory c of
37+
Surrogate -> False
38+
NotAssigned -> False
39+
_ -> True
40+
41+
42+
newtype NonNullAsciiString = NonNullAsciiString { nonNullAsciiString :: String }
43+
deriving Show
44+
45+
instance Arbitrary NonNullAsciiString where
46+
arbitrary = NonNullAsciiString <$> listOf filepathAsciiChar
47+
48+
filepathAsciiChar :: Gen Char
49+
filepathAsciiChar = arbitraryASCIIChar `suchThat` (\c -> not (isNull c))
50+
where
51+
isNull = (== '\NUL')
52+
53+
newtype NonNullSurrogateString = NonNullSurrogateString { nonNullSurrogateString :: String }
54+
deriving Show
55+
56+
instance Arbitrary NonNullSurrogateString where
57+
arbitrary = NonNullSurrogateString <$> listOf filepathWithSurrogates
58+
59+
filepathWithSurrogates :: Gen Char
60+
filepathWithSurrogates =
61+
frequency
62+
[(3, arbitraryASCIIChar),
63+
(1, arbitraryUnicodeChar),
64+
(1, arbitraryBoundedEnum)
65+
]
66+
67+
68+
instance Arbitrary ByteString where arbitrary = ByteString.pack <$> arbitrary
69+
instance CoArbitrary ByteString where coarbitrary = coarbitrary . ByteString.unpack

tests/encoding/EncodingSpec.hs

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE ScopedTypeVariables #-}
3+
{-# LANGUAGE ViewPatterns #-}
4+
{-# LANGUAGE TypeApplications #-}
5+
6+
module EncodingSpec where
7+
8+
import Data.ByteString ( ByteString )
9+
import qualified Data.ByteString as BS
10+
11+
import Arbitrary
12+
import Test.QuickCheck
13+
14+
import Data.Either ( isRight )
15+
import qualified System.OsString.Data.ByteString.Short as BS8
16+
import qualified System.OsString.Data.ByteString.Short.Word16 as BS16
17+
import System.OsString.Encoding.Internal
18+
import GHC.IO (unsafePerformIO)
19+
import GHC.IO.Encoding ( setFileSystemEncoding )
20+
import System.IO
21+
( utf16le )
22+
import Control.Exception
23+
import Control.DeepSeq
24+
import Data.Bifunctor ( first )
25+
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
26+
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
27+
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
28+
29+
30+
tests :: [(String, Property)]
31+
tests =
32+
[ ("ucs2le_decode . ucs2le_encode == id",
33+
property $ \(padEven -> ba) ->
34+
let decoded = decodeWithTE ucs2le (BS8.toShort ba)
35+
encoded = encodeWithTE ucs2le =<< decoded
36+
in (BS8.fromShort <$> encoded) === Right ba)
37+
, ("utf16 doesn't handle invalid surrogate pairs",
38+
property $
39+
let str = [toEnum 55296, toEnum 55297]
40+
encoded = encodeWithTE utf16le str
41+
decoded = decodeWithTE utf16le =<< encoded
42+
#if __GLASGOW_HASKELL__ >= 904
43+
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
44+
#else
45+
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
46+
#endif
47+
, ("ucs2 handles invalid surrogate pairs",
48+
property $
49+
let str = [toEnum 55296, toEnum 55297]
50+
encoded = encodeWithTE ucs2le str
51+
decoded = decodeWithTE ucs2le =<< encoded
52+
in decoded === Right str)
53+
, ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)",
54+
property $
55+
\bs ->
56+
let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs)
57+
encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded
58+
in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
59+
60+
, ("can decode arbitrary strings through utf-8 (with RoundtripFailure)",
61+
property $
62+
\(NonNullSurrogateString str) ->
63+
let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
64+
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
65+
in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str))
66+
67+
, ("utf-8 roundtrip encode cannot deal with some surrogates",
68+
property $
69+
let str = [toEnum 0xDFF0, toEnum 0xDFF2]
70+
encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
71+
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
72+
#if __GLASGOW_HASKELL__ >= 904
73+
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
74+
#else
75+
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
76+
#endif
77+
78+
, ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)",
79+
property $
80+
\(padEven -> bs) ->
81+
let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs)
82+
encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded
83+
in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
84+
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)",
85+
property $
86+
\(padEven -> bs) ->
87+
let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs)
88+
encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded
89+
in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
90+
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)",
91+
property $
92+
\bs ->
93+
let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs)
94+
encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded
95+
in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
96+
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)",
97+
property $
98+
\(padEven -> bs) ->
99+
let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs)
100+
encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded
101+
in (isRight encoded, isRight decoded) === (True, True))
102+
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)",
103+
property $
104+
\bs ->
105+
let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs)
106+
encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded
107+
in (isRight encoded, isRight decoded) === (True, True))
108+
, ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)",
109+
property $
110+
\(padEven -> bs) ->
111+
let decoded = decodeW' (BS8.toShort bs)
112+
encoded = encodeW' =<< decoded
113+
in (isRight encoded, isRight decoded) === (True, True))
114+
, ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)",
115+
property $
116+
\bs -> ioProperty $ do
117+
setFileSystemEncoding (mkUTF8 TransliterateCodingFailure)
118+
let decoded = decodeP' (BS8.toShort bs)
119+
encoded = encodeP' =<< decoded
120+
pure $ (isRight encoded, isRight decoded) === (True, True))
121+
122+
, ("decodeWithBaseWindows == utf16le_b",
123+
property $
124+
\(BS8.toShort . padEven -> bs) ->
125+
let decoded = decodeW' bs
126+
decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs
127+
in decoded === decoded')
128+
129+
, ("encodeWithBaseWindows == utf16le_b",
130+
property $
131+
\(NonNullSurrogateString str) ->
132+
let decoded = encodeW' str
133+
decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str
134+
in decoded === decoded')
135+
136+
, ("encodeWithTE/decodeWithTE never fails (utf16le_b)",
137+
property $
138+
\(padEven -> bs) ->
139+
let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs)
140+
encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded
141+
in (isRight encoded, isRight decoded) === (True, True))
142+
]
143+
144+
145+
padEven :: ByteString -> ByteString
146+
padEven bs
147+
| even (BS.length bs) = bs
148+
| otherwise = bs `BS.append` BS.pack [70]
149+
150+
151+
decodeP' :: BS8.ShortByteString -> Either String String
152+
decodeP' ba = unsafePerformIO $ do
153+
r <- try @SomeException $ decodeWithBasePosix ba
154+
evaluate $ force $ first displayException r
155+
156+
encodeP' :: String -> Either String BS8.ShortByteString
157+
encodeP' str = unsafePerformIO $ do
158+
r <- try @SomeException $ encodeWithBasePosix str
159+
evaluate $ force $ first displayException r
160+
161+
decodeW' :: BS16.ShortByteString -> Either String String
162+
decodeW' ba = unsafePerformIO $ do
163+
r <- try @SomeException $ decodeWithBaseWindows ba
164+
evaluate $ force $ first displayException r
165+
166+
encodeW' :: String -> Either String BS8.ShortByteString
167+
encodeW' str = unsafePerformIO $ do
168+
r <- try @SomeException $ encodeWithBaseWindows str
169+
evaluate $ force $ first displayException r
170+

tests/encoding/Main.hs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{-# LANGUAGE TypeApplications #-}
2+
3+
module Main (main) where
4+
5+
import qualified EncodingSpec as Spec
6+
import TestUtil
7+
8+
main :: IO ()
9+
main = runTests (Spec.tests)
10+

0 commit comments

Comments
 (0)