|
| 1 | +{-# LANGUAGE CPP #-} |
| 2 | +{-# LANGUAGE ScopedTypeVariables #-} |
| 3 | +{-# LANGUAGE ViewPatterns #-} |
| 4 | +{-# LANGUAGE TypeApplications #-} |
| 5 | + |
| 6 | +module EncodingSpec where |
| 7 | + |
| 8 | +import Data.ByteString ( ByteString ) |
| 9 | +import qualified Data.ByteString as BS |
| 10 | + |
| 11 | +import Arbitrary |
| 12 | +import Test.QuickCheck |
| 13 | + |
| 14 | +import Data.Either ( isRight ) |
| 15 | +import qualified System.OsString.Data.ByteString.Short as BS8 |
| 16 | +import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 |
| 17 | +import System.OsString.Encoding.Internal |
| 18 | +import GHC.IO (unsafePerformIO) |
| 19 | +import GHC.IO.Encoding ( setFileSystemEncoding ) |
| 20 | +import System.IO |
| 21 | + ( utf16le ) |
| 22 | +import Control.Exception |
| 23 | +import Control.DeepSeq |
| 24 | +import Data.Bifunctor ( first ) |
| 25 | +import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) |
| 26 | +import GHC.IO.Encoding.UTF16 ( mkUTF16le ) |
| 27 | +import GHC.IO.Encoding.UTF8 ( mkUTF8 ) |
| 28 | + |
| 29 | + |
| 30 | +tests :: [(String, Property)] |
| 31 | +tests = |
| 32 | + [ ("ucs2le_decode . ucs2le_encode == id", |
| 33 | + property $ \(padEven -> ba) -> |
| 34 | + let decoded = decodeWithTE ucs2le (BS8.toShort ba) |
| 35 | + encoded = encodeWithTE ucs2le =<< decoded |
| 36 | + in (BS8.fromShort <$> encoded) === Right ba) |
| 37 | + , ("utf16 doesn't handle invalid surrogate pairs", |
| 38 | + property $ |
| 39 | + let str = [toEnum 55296, toEnum 55297] |
| 40 | + encoded = encodeWithTE utf16le str |
| 41 | + decoded = decodeWithTE utf16le =<< encoded |
| 42 | +#if __GLASGOW_HASKELL__ >= 904 |
| 43 | + in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) |
| 44 | +#else |
| 45 | + in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing)) |
| 46 | +#endif |
| 47 | + , ("ucs2 handles invalid surrogate pairs", |
| 48 | + property $ |
| 49 | + let str = [toEnum 55296, toEnum 55297] |
| 50 | + encoded = encodeWithTE ucs2le str |
| 51 | + decoded = decodeWithTE ucs2le =<< encoded |
| 52 | + in decoded === Right str) |
| 53 | + , ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)", |
| 54 | + property $ |
| 55 | + \bs -> |
| 56 | + let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs) |
| 57 | + encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded |
| 58 | + in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs))) |
| 59 | + |
| 60 | + , ("can decode arbitrary strings through utf-8 (with RoundtripFailure)", |
| 61 | + property $ |
| 62 | + \(NonNullSurrogateString str) -> |
| 63 | + let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str |
| 64 | + decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded |
| 65 | + in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str)) |
| 66 | + |
| 67 | + , ("utf-8 roundtrip encode cannot deal with some surrogates", |
| 68 | + property $ |
| 69 | + let str = [toEnum 0xDFF0, toEnum 0xDFF2] |
| 70 | + encoded = encodeWithTE (mkUTF8 RoundtripFailure) str |
| 71 | + decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded |
| 72 | +#if __GLASGOW_HASKELL__ >= 904 |
| 73 | + in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) |
| 74 | +#else |
| 75 | + in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing)) |
| 76 | +#endif |
| 77 | + |
| 78 | + , ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)", |
| 79 | + property $ |
| 80 | + \(padEven -> bs) -> |
| 81 | + let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs) |
| 82 | + encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded |
| 83 | + in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs))) |
| 84 | + , ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)", |
| 85 | + property $ |
| 86 | + \(padEven -> bs) -> |
| 87 | + let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs) |
| 88 | + encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded |
| 89 | + in expectFailure $ (isRight encoded, isRight decoded) === (True, True)) |
| 90 | + , ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)", |
| 91 | + property $ |
| 92 | + \bs -> |
| 93 | + let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs) |
| 94 | + encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded |
| 95 | + in expectFailure $ (isRight encoded, isRight decoded) === (True, True)) |
| 96 | + , ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)", |
| 97 | + property $ |
| 98 | + \(padEven -> bs) -> |
| 99 | + let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs) |
| 100 | + encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded |
| 101 | + in (isRight encoded, isRight decoded) === (True, True)) |
| 102 | + , ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)", |
| 103 | + property $ |
| 104 | + \bs -> |
| 105 | + let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs) |
| 106 | + encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded |
| 107 | + in (isRight encoded, isRight decoded) === (True, True)) |
| 108 | + , ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)", |
| 109 | + property $ |
| 110 | + \(padEven -> bs) -> |
| 111 | + let decoded = decodeW' (BS8.toShort bs) |
| 112 | + encoded = encodeW' =<< decoded |
| 113 | + in (isRight encoded, isRight decoded) === (True, True)) |
| 114 | + , ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)", |
| 115 | + property $ |
| 116 | + \bs -> ioProperty $ do |
| 117 | + setFileSystemEncoding (mkUTF8 TransliterateCodingFailure) |
| 118 | + let decoded = decodeP' (BS8.toShort bs) |
| 119 | + encoded = encodeP' =<< decoded |
| 120 | + pure $ (isRight encoded, isRight decoded) === (True, True)) |
| 121 | + |
| 122 | + , ("decodeWithBaseWindows == utf16le_b", |
| 123 | + property $ |
| 124 | + \(BS8.toShort . padEven -> bs) -> |
| 125 | + let decoded = decodeW' bs |
| 126 | + decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs |
| 127 | + in decoded === decoded') |
| 128 | + |
| 129 | + , ("encodeWithBaseWindows == utf16le_b", |
| 130 | + property $ |
| 131 | + \(NonNullSurrogateString str) -> |
| 132 | + let decoded = encodeW' str |
| 133 | + decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str |
| 134 | + in decoded === decoded') |
| 135 | + |
| 136 | + , ("encodeWithTE/decodeWithTE never fails (utf16le_b)", |
| 137 | + property $ |
| 138 | + \(padEven -> bs) -> |
| 139 | + let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs) |
| 140 | + encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded |
| 141 | + in (isRight encoded, isRight decoded) === (True, True)) |
| 142 | + ] |
| 143 | + |
| 144 | + |
| 145 | +padEven :: ByteString -> ByteString |
| 146 | +padEven bs |
| 147 | + | even (BS.length bs) = bs |
| 148 | + | otherwise = bs `BS.append` BS.pack [70] |
| 149 | + |
| 150 | + |
| 151 | +decodeP' :: BS8.ShortByteString -> Either String String |
| 152 | +decodeP' ba = unsafePerformIO $ do |
| 153 | + r <- try @SomeException $ decodeWithBasePosix ba |
| 154 | + evaluate $ force $ first displayException r |
| 155 | + |
| 156 | +encodeP' :: String -> Either String BS8.ShortByteString |
| 157 | +encodeP' str = unsafePerformIO $ do |
| 158 | + r <- try @SomeException $ encodeWithBasePosix str |
| 159 | + evaluate $ force $ first displayException r |
| 160 | + |
| 161 | +decodeW' :: BS16.ShortByteString -> Either String String |
| 162 | +decodeW' ba = unsafePerformIO $ do |
| 163 | + r <- try @SomeException $ decodeWithBaseWindows ba |
| 164 | + evaluate $ force $ first displayException r |
| 165 | + |
| 166 | +encodeW' :: String -> Either String BS8.ShortByteString |
| 167 | +encodeW' str = unsafePerformIO $ do |
| 168 | + r <- try @SomeException $ encodeWithBaseWindows str |
| 169 | + evaluate $ force $ first displayException r |
| 170 | + |
0 commit comments