@@ -64,21 +64,24 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
64
64
65
65
import Control.Exception (evaluate , try , throwIO , ErrorCall (ErrorCall ))
66
66
import Control.Monad.ST (runST )
67
- import Data.ByteString as B
67
+ import Data.Bits (shiftR , (.&.) )
68
+ import Data.ByteString (ByteString )
69
+ import qualified Data.ByteString as B
68
70
import qualified Data.ByteString.Internal as B
71
+ import qualified Data.ByteString.Short.Internal as SBS
69
72
import Data.Foldable (traverse_ )
70
73
import Data.Text.Encoding.Error (OnDecodeError , UnicodeException , strictDecode , lenientDecode )
71
- import Data.Text.Internal (Text (.. ), safe , text )
74
+ import Data.Text.Internal (Text (.. ), safe , empty , text )
72
75
import Data.Text.Internal.Private (runText )
73
76
import Data.Text.Internal.Unsafe (unsafeWithForeignPtr )
74
77
import Data.Text.Internal.Unsafe.Char (unsafeWrite )
75
78
import Data.Text.Show ()
76
79
import Data.Text.Unsafe (unsafeDupablePerformIO )
77
80
import Data.Word (Word8 , Word32 )
78
- import Foreign.C.Types (CSize )
81
+ import Foreign.C.Types (CSize ( .. ) )
79
82
import Foreign.Marshal.Utils (with )
80
83
import Foreign.Ptr (Ptr , minusPtr , nullPtr , plusPtr )
81
- import Foreign.Storable (Storable , peek , poke )
84
+ import Foreign.Storable (Storable , peek , poke , peekByteOff )
82
85
import GHC.Exts (MutableByteArray #, byteArrayContents #, unsafeCoerce #)
83
86
import GHC.ForeignPtr (ForeignPtr (.. ), ForeignPtrContents (PlainPtr ))
84
87
import qualified Data.ByteString.Builder as B
@@ -112,7 +115,13 @@ import GHC.Stack (HasCallStack)
112
115
-- | /Deprecated/. Decode a 'ByteString' containing 7-bit ASCII
113
116
-- encoded text.
114
117
decodeASCII :: ByteString -> Text
115
- decodeASCII = decodeUtf8
118
+ decodeASCII bs = withBS bs $ \ fp len -> if len == 0 then empty else runST $ do
119
+ asciiPrefixLen <- fmap cSizeToInt $ unsafeIOToST $ unsafeWithForeignPtr fp $ \ src ->
120
+ c_is_ascii src (src `plusPtr` len)
121
+ if asciiPrefixLen == len
122
+ then let ! (SBS. SBS arr) = SBS. toShort bs in
123
+ return (Text (A. ByteArray arr) 0 len)
124
+ else error $ " decodeASCII: detected non-ASCII codepoint at " ++ show asciiPrefixLen
116
125
{-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
117
126
118
127
-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
@@ -124,13 +133,29 @@ decodeLatin1 ::
124
133
HasCallStack =>
125
134
#endif
126
135
ByteString -> Text
127
- decodeLatin1 bs = withBS bs aux where
128
- aux fp len = text a 0 actualLen
129
- where
130
- (a, actualLen) = A. run2 (A. new (2 * len) >>= unsafeIOToST . go)
131
- go (A. MutableByteArray dest) = unsafeWithForeignPtr fp $ \ src -> do
132
- destLen <- c_decode_latin1 dest src (src `plusPtr` len)
133
- return (A. MutableByteArray dest, destLen)
136
+ decodeLatin1 bs = withBS bs $ \ fp len -> runST $ do
137
+ dst <- A. new (2 * len)
138
+ let inner srcOff dstOff = if srcOff >= len then return dstOff else do
139
+ asciiPrefixLen <- fmap cSizeToInt $ unsafeIOToST $ unsafeWithForeignPtr fp $ \ src ->
140
+ c_is_ascii (src `plusPtr` srcOff) (src `plusPtr` len)
141
+ if asciiPrefixLen == 0
142
+ then do
143
+ byte <- unsafeIOToST $ unsafeWithForeignPtr fp $ \ src -> peekByteOff src srcOff
144
+ A. unsafeWrite dst dstOff (0xC0 + (byte `shiftR` 6 ))
145
+ A. unsafeWrite dst (dstOff + 1 ) (0x80 + (byte .&. 0x3F ))
146
+ inner (srcOff + 1 ) (dstOff + 2 )
147
+ else do
148
+ unsafeIOToST $ unsafeWithForeignPtr fp $ \ src ->
149
+ unsafeSTToIO $ A. copyFromPointer dst dstOff (src `plusPtr` srcOff) asciiPrefixLen
150
+ inner (srcOff + asciiPrefixLen) (dstOff + asciiPrefixLen)
151
+
152
+ actualLen <- inner 0 0
153
+ dst' <- A. resizeM dst actualLen
154
+ arr <- A. unsafeFreeze dst'
155
+ return $ Text arr 0 actualLen
156
+
157
+ foreign import ccall unsafe " _hs_text_is_ascii" c_is_ascii
158
+ :: Ptr Word8 -> Ptr Word8 -> IO CSize
134
159
135
160
-- | Decode a 'ByteString' containing UTF-8 encoded text.
136
161
--
@@ -538,6 +563,3 @@ foreign import ccall unsafe "_hs_text_decode_utf8_state" c_decode_utf8_with_stat
538
563
:: MutableByteArray # s -> Ptr CSize
539
564
-> Ptr (Ptr Word8 ) -> Ptr Word8
540
565
-> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8 )
541
-
542
- foreign import ccall unsafe " _hs_text_decode_latin1" c_decode_latin1
543
- :: MutableByteArray # s -> Ptr Word8 -> Ptr Word8 -> IO Int
0 commit comments