Skip to content

Commit eba656a

Browse files
committed
Speed up strict and lazy reading of numbers
1 parent 55358a4 commit eba656a

File tree

4 files changed

+89
-35
lines changed

4 files changed

+89
-35
lines changed

src/Data/Text/Internal/Private.hs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ module Data.Text.Internal.Private
1313
(
1414
runText
1515
, span_
16+
, spanAscii_
1617
) where
1718

1819
import Control.Monad.ST (ST, runST)
1920
import Data.Text.Internal (Text(..), text)
2021
import Data.Text.Unsafe (Iter(..), iter)
2122
import qualified Data.Text.Array as A
23+
import Data.Word (Word8)
2224

2325
#if defined(ASSERTS)
2426
import GHC.Stack (HasCallStack)
@@ -34,6 +36,17 @@ span_ p t@(Text arr off len) = (# hd,tl #)
3436
where Iter c d = iter t i
3537
{-# INLINE span_ #-}
3638

39+
-- | For the sake of performance this function does not check
40+
-- that a char is in ASCII range; it is a responsibility of @p@.
41+
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
42+
spanAscii_ p (Text arr off len) = (# hd, tl #)
43+
where hd = text arr off k
44+
tl = text arr (off + k) (len - k)
45+
!k = loop 0
46+
loop !i | i < len && p (A.unsafeIndex arr (off + i)) = loop (i + 1)
47+
| otherwise = i
48+
{-# INLINE spanAscii_ #-}
49+
3750
runText ::
3851
#if defined(ASSERTS)
3952
HasCallStack =>

src/Data/Text/Internal/Read.hs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,20 @@ perhaps def m = P $ \t -> case runP m t of
6161

6262
hexDigitToInt :: Char -> Int
6363
hexDigitToInt c
64-
| c >= '0' && c <= '9' = ord c - ord '0'
65-
| c >= 'a' && c <= 'f' = ord c - (ord 'a' - 10)
66-
| otherwise = ord c - (ord 'A' - 10)
64+
| to0 < 10 = wordToInt to0
65+
| toa < 6 = wordToInt toa + 10
66+
| otherwise = wordToInt toA + 10
67+
where
68+
ordW = intToWord (ord c)
69+
to0 = ordW - intToWord (ord '0')
70+
toa = ordW - intToWord (ord 'a')
71+
toA = ordW - intToWord (ord 'A')
6772

6873
digitToInt :: Char -> Int
6974
digitToInt c = ord c - ord '0'
75+
76+
intToWord :: Int -> Word
77+
intToWord = fromIntegral
78+
79+
wordToInt :: Word -> Int
80+
wordToInt = fromIntegral

src/Data/Text/Lazy/Read.hs

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
{-# LANGUAGE OverloadedStrings, CPP #-}
2-
{-# LANGUAGE Safe #-}
2+
{-# LANGUAGE Trustworthy #-}
3+
{-# LANGUAGE LambdaCase #-}
4+
{-# LANGUAGE UnboxedTuples #-}
35

46
-- |
57
-- Module : Data.Text.Lazy.Read
@@ -21,11 +23,15 @@ module Data.Text.Lazy.Read
2123
) where
2224

2325
import Control.Monad (liftM)
24-
import Data.Char (isDigit, isHexDigit)
26+
import Data.Char (ord)
2527
import Data.Int (Int8, Int16, Int32, Int64)
2628
import Data.Ratio ((%))
2729
import Data.Text.Internal.Read
30+
import Data.Text.Array as A
2831
import Data.Text.Lazy as T
32+
import Data.Text.Internal.Lazy as T (Text(..))
33+
import qualified Data.Text.Internal as T (Text(..))
34+
import qualified Data.Text.Internal.Private as T (spanAscii_)
2935
import Data.Word (Word, Word8, Word16, Word32, Word64)
3036

3137
-- | Read some text. If the read succeeds, return its value and the
@@ -59,7 +65,7 @@ decimal :: Integral a => Reader a
5965
decimal txt
6066
| T.null h = Left "input does not start with a digit"
6167
| otherwise = Right (T.foldl' go 0 h, t)
62-
where (h,t) = T.span isDigit txt
68+
where (# h, t #) = spanAscii_ (\w -> w - ord8 '0' < 10) txt
6369
go n d = (n * 10 + fromIntegral (digitToInt d))
6470

6571
-- | Read a hexadecimal integer, consisting of an optional leading
@@ -97,7 +103,7 @@ hex :: Integral a => Reader a
97103
hex txt
98104
| T.null h = Left "input does not start with a hexadecimal digit"
99105
| otherwise = Right (T.foldl' go 0 h, t)
100-
where (h,t) = T.span isHexDigit txt
106+
where (# h, t #) = spanAscii_ (\w -> w - ord8 '0' < 10 || w - ord8 'A' < 6 || w - ord8 'a' < 6) txt
101107
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
102108

103109
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
@@ -156,36 +162,54 @@ signa :: Num a => Parser a -> Parser a
156162
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
157163
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
158164
signa p = do
159-
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
160-
if sign == '+' then p else negate `liftM` p
165+
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
166+
if sign == ord8 '+' then p else negate `liftM` p
161167

162-
char :: (Char -> Bool) -> Parser Char
163-
char p = P $ \t -> case T.uncons t of
164-
Just (c,t') | p c -> Right (c,t')
165-
_ -> Left "character does not match"
168+
charAscii :: (Word8 -> Bool) -> Parser Word8
169+
charAscii p = P $ \case
170+
Empty -> Left "character does not match"
171+
-- len is > 0, unless the internal invariant of Text is violated
172+
Chunk (T.Text arr off len) ts -> let c = A.unsafeIndex arr off in
173+
if p c
174+
then Right (c, if len <= 1 then ts else Chunk (T.Text arr (off + 1) (len - 1)) ts)
175+
else Left "character does not match"
166176

167177
floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
168178
{-# INLINE floaty #-}
169179
floaty f = runP $ do
170-
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
180+
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
171181
real <- P decimal
172182
T fraction fracDigits <- perhaps (T 0 0) $ do
173-
_ <- char (=='.')
174-
digits <- P $ \t -> Right (int64ToInt . T.length $ T.takeWhile isDigit t, t)
183+
_ <- charAscii (== ord8 '.')
184+
digits <- P $ \t -> Right (let (# hd, _ #) = spanAscii_ (\w -> w - ord8 '0' < 10) t in int64ToInt (T.length hd), t)
175185
n <- P decimal
176186
return $ T n digits
177-
let e c = c == 'e' || c == 'E'
178-
power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int)
187+
let e c = c == ord8 'e' || c == ord8 'E'
188+
power <- perhaps 0 (charAscii e >> signa (P decimal) :: Parser Int)
179189
let n = if fracDigits == 0
180190
then if power == 0
181191
then fromInteger real
182192
else fromInteger real * (10 ^^ power)
183193
else if power == 0
184194
then f real fraction (10 ^ fracDigits)
185195
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
186-
return $! if sign == '+'
196+
return $! if sign == ord8 '+'
187197
then n
188198
else -n
189199

190200
int64ToInt :: Int64 -> Int
191201
int64ToInt = fromIntegral
202+
203+
ord8 :: Char -> Word8
204+
ord8 = fromIntegral . ord
205+
206+
-- | For the sake of performance this function does not check
207+
-- that a char is in ASCII range; it is a responsibility of @p@.
208+
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
209+
spanAscii_ p = loop
210+
where
211+
loop Empty = (# Empty, Empty #)
212+
loop (Chunk t ts) = let (# t', t''@(T.Text _ _ len) #) = T.spanAscii_ p t in
213+
if len == 0
214+
then let (# ts', ts'' #) = loop ts in (# Chunk t ts', ts'' #)
215+
else (# Chunk t' Empty, Chunk t'' ts #)

src/Data/Text/Read.hs

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ module Data.Text.Read
2121
) where
2222

2323
import Control.Monad (liftM)
24-
import Data.Char (isDigit, isHexDigit)
24+
import Data.Char (ord)
2525
import Data.Int (Int8, Int16, Int32, Int64)
2626
import Data.Ratio ((%))
2727
import Data.Text as T
28-
import Data.Text.Internal.Private (span_)
28+
import Data.Text.Internal as T (Text(..))
29+
import Data.Text.Array as A
30+
import Data.Text.Internal.Private (spanAscii_)
2931
import Data.Text.Internal.Read
3032
import Data.Word (Word, Word8, Word16, Word32, Word64)
3133

@@ -60,7 +62,7 @@ decimal :: Integral a => Reader a
6062
decimal txt
6163
| T.null h = Left "input does not start with a digit"
6264
| otherwise = Right (T.foldl' go 0 h, t)
63-
where (# h,t #) = span_ isDigit txt
65+
where (# h,t #) = spanAscii_ (\w -> w - ord8 '0' < 10) txt
6466
go n d = (n * 10 + fromIntegral (digitToInt d))
6567

6668
-- | Read a hexadecimal integer, consisting of an optional leading
@@ -107,7 +109,7 @@ hex :: Integral a => Reader a
107109
hex txt
108110
| T.null h = Left "input does not start with a hexadecimal digit"
109111
| otherwise = Right (T.foldl' go 0 h, t)
110-
where (# h,t #) = span_ isHexDigit txt
112+
where (# h,t #) = spanAscii_ (\w -> w - ord8 '0' < 10 || w - ord8 'A' < 6 || w - ord8 'a' < 6) txt
111113
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
112114

113115
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
@@ -166,33 +168,37 @@ signa :: Num a => Parser a -> Parser a
166168
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
167169
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
168170
signa p = do
169-
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
170-
if sign == '+' then p else negate `liftM` p
171+
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
172+
if sign == ord8 '+' then p else negate `liftM` p
171173

172-
char :: (Char -> Bool) -> Parser Char
173-
char p = P $ \t -> case T.uncons t of
174-
Just (c,t') | p c -> Right (c,t')
175-
_ -> Left "character does not match"
174+
charAscii :: (Word8 -> Bool) -> Parser Word8
175+
charAscii p = P $ \(Text arr off len) -> let c = A.unsafeIndex arr off in
176+
if len > 0 && p c
177+
then Right (c, Text arr (off + 1) (len - 1))
178+
else Left "character does not match"
176179

177180
floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
178181
{-# INLINE floaty #-}
179182
floaty f = runP $ do
180-
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
183+
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
181184
real <- P decimal
182185
T fraction fracDigits <- perhaps (T 0 0) $ do
183-
_ <- char (=='.')
184-
digits <- P $ \t -> Right (T.length $ T.takeWhile isDigit t, t)
186+
_ <- charAscii (== ord8 '.')
187+
digits <- P $ \t -> Right (let (# hd, _ #) = spanAscii_ (\w -> w - ord8 '0' < 10) t in T.length hd, t)
185188
n <- P decimal
186189
return $ T n digits
187-
let e c = c == 'e' || c == 'E'
188-
power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int)
190+
let e c = c == ord8 'e' || c == ord8 'E'
191+
power <- perhaps 0 (charAscii e >> signa (P decimal) :: Parser Int)
189192
let n = if fracDigits == 0
190193
then if power == 0
191194
then fromInteger real
192195
else fromInteger real * (10 ^^ power)
193196
else if power == 0
194197
then f real fraction (10 ^ fracDigits)
195198
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
196-
return $! if sign == '+'
199+
return $! if sign == ord8 '+'
197200
then n
198201
else -n
202+
203+
ord8 :: Char -> Word8
204+
ord8 = fromIntegral . ord

0 commit comments

Comments
 (0)