1
1
{-# LANGUAGE OverloadedStrings, CPP #-}
2
- {-# LANGUAGE Safe #-}
2
+ {-# LANGUAGE Trustworthy #-}
3
+ {-# LANGUAGE LambdaCase #-}
4
+ {-# LANGUAGE UnboxedTuples #-}
3
5
4
6
-- |
5
7
-- Module : Data.Text.Lazy.Read
@@ -21,11 +23,15 @@ module Data.Text.Lazy.Read
21
23
) where
22
24
23
25
import Control.Monad (liftM )
24
- import Data.Char (isDigit , isHexDigit )
26
+ import Data.Char (ord )
25
27
import Data.Int (Int8 , Int16 , Int32 , Int64 )
26
28
import Data.Ratio ((%) )
27
29
import Data.Text.Internal.Read
30
+ import Data.Text.Array as A
28
31
import Data.Text.Lazy as T
32
+ import Data.Text.Internal.Lazy as T (Text (.. ))
33
+ import qualified Data.Text.Internal as T (Text (.. ))
34
+ import qualified Data.Text.Internal.Private as T (spanAscii_ )
29
35
import Data.Word (Word , Word8 , Word16 , Word32 , Word64 )
30
36
31
37
-- | Read some text. If the read succeeds, return its value and the
@@ -59,7 +65,7 @@ decimal :: Integral a => Reader a
59
65
decimal txt
60
66
| T. null h = Left " input does not start with a digit"
61
67
| otherwise = Right (T. foldl' go 0 h, t)
62
- where (h,t ) = T. span isDigit txt
68
+ where (# h, t # ) = spanAscii_ ( \ w -> w - ord8 ' 0 ' < 10 ) txt
63
69
go n d = (n * 10 + fromIntegral (digitToInt d))
64
70
65
71
-- | Read a hexadecimal integer, consisting of an optional leading
@@ -97,7 +103,7 @@ hex :: Integral a => Reader a
97
103
hex txt
98
104
| T. null h = Left " input does not start with a hexadecimal digit"
99
105
| otherwise = Right (T. foldl' go 0 h, t)
100
- where (h,t ) = T. span isHexDigit txt
106
+ where (# h, t # ) = spanAscii_ ( \ w -> w - ord8 ' 0 ' < 10 || w - ord8 ' A ' < 6 || w - ord8 ' a ' < 6 ) txt
101
107
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
102
108
103
109
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
@@ -156,36 +162,54 @@ signa :: Num a => Parser a -> Parser a
156
162
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
157
163
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
158
164
signa p = do
159
- sign <- perhaps ' +' $ char (\ c -> c == ' -' || c == ' +' )
160
- if sign == ' +' then p else negate `liftM` p
165
+ sign <- perhaps (ord8 ' +' ) $ charAscii (\ c -> c == ord8 ' -' || c == ord8 ' +' )
166
+ if sign == ord8 ' +' then p else negate `liftM` p
161
167
162
- char :: (Char -> Bool ) -> Parser Char
163
- char p = P $ \ t -> case T. uncons t of
164
- Just (c,t') | p c -> Right (c,t')
165
- _ -> Left " character does not match"
168
+ charAscii :: (Word8 -> Bool ) -> Parser Word8
169
+ charAscii p = P $ \ case
170
+ Empty -> Left " character does not match"
171
+ -- len is > 0, unless the internal invariant of Text is violated
172
+ Chunk (T. Text arr off len) ts -> let c = A. unsafeIndex arr off in
173
+ if p c
174
+ then Right (c, if len <= 1 then ts else Chunk (T. Text arr (off + 1 ) (len - 1 )) ts)
175
+ else Left " character does not match"
166
176
167
177
floaty :: Fractional a => (Integer -> Integer -> Integer -> a ) -> Reader a
168
178
{-# INLINE floaty #-}
169
179
floaty f = runP $ do
170
- sign <- perhaps ' +' $ char (\ c -> c == ' -' || c == ' +' )
180
+ sign <- perhaps (ord8 ' +' ) $ charAscii (\ c -> c == ord8 ' -' || c == ord8 ' +' )
171
181
real <- P decimal
172
182
T fraction fracDigits <- perhaps (T 0 0 ) $ do
173
- _ <- char (== ' .' )
174
- digits <- P $ \ t -> Right (int64ToInt . T. length $ T. takeWhile isDigit t , t)
183
+ _ <- charAscii (== ord8 ' .' )
184
+ digits <- P $ \ t -> Right (let ( # hd, _ # ) = spanAscii_ ( \ w -> w - ord8 ' 0 ' < 10 ) t in int64ToInt ( T. length hd) , t)
175
185
n <- P decimal
176
186
return $ T n digits
177
- let e c = c == ' e' || c == ' E'
178
- power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int )
187
+ let e c = c == ord8 ' e' || c == ord8 ' E'
188
+ power <- perhaps 0 (charAscii e >> signa (P decimal) :: Parser Int )
179
189
let n = if fracDigits == 0
180
190
then if power == 0
181
191
then fromInteger real
182
192
else fromInteger real * (10 ^^ power)
183
193
else if power == 0
184
194
then f real fraction (10 ^ fracDigits)
185
195
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
186
- return $! if sign == ' +'
196
+ return $! if sign == ord8 ' +'
187
197
then n
188
198
else - n
189
199
190
200
int64ToInt :: Int64 -> Int
191
201
int64ToInt = fromIntegral
202
+
203
+ ord8 :: Char -> Word8
204
+ ord8 = fromIntegral . ord
205
+
206
+ -- | For the sake of performance this function does not check
207
+ -- that a char is in ASCII range; it is a responsibility of @p@.
208
+ spanAscii_ :: (Word8 -> Bool ) -> Text -> (# Text , Text # )
209
+ spanAscii_ p = loop
210
+ where
211
+ loop Empty = (# Empty , Empty # )
212
+ loop (Chunk t ts) = let (# t', t''@ (T. Text _ _ len) # ) = T. spanAscii_ p t in
213
+ if len == 0
214
+ then let (# ts', ts'' # ) = loop ts in (# Chunk t ts', ts'' # )
215
+ else (# Chunk t' Empty , Chunk t'' ts # )
0 commit comments