Skip to content

Commit fa1370c

Browse files
authored
Merge pull request #959 from haskell/jstring_-tweak
jstring_ tweak and ascii code cleanup
2 parents 51f3303 + a690b66 commit fa1370c

File tree

1 file changed

+122
-73
lines changed

1 file changed

+122
-73
lines changed

src/Data/Aeson/Parser/Internal.hs

Lines changed: 122 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{-# LANGUAGE BangPatterns #-}
22
{-# LANGUAGE CPP #-}
3+
{-# LANGUAGE PatternSynonyms #-}
34
{-# LANGUAGE NoImplicitPrelude #-}
45
{-# LANGUAGE OverloadedStrings #-}
56
#if __GLASGOW_HASKELL__ <= 800 && __GLASGOW_HASKELL__ >= 706
@@ -60,6 +61,7 @@ import Data.Functor.Compat (($>))
6061
import Data.Scientific (Scientific)
6162
import Data.Text (Text)
6263
import Data.Vector (Vector)
64+
import Data.Word (Word8)
6365
import qualified Data.Vector as Vector (empty, fromList, fromListN, reverse)
6466
import qualified Data.Attoparsec.ByteString as A
6567
import qualified Data.Attoparsec.Lazy as L
@@ -77,21 +79,78 @@ import Data.Aeson.Internal.Text
7779
-- >>> :set -XOverloadedStrings
7880
-- >>> import Data.Aeson.Types
7981

80-
#define BACKSLASH 92
81-
#define CLOSE_CURLY 125
82-
#define CLOSE_SQUARE 93
83-
#define COMMA 44
84-
#define DOUBLE_QUOTE 34
85-
#define OPEN_CURLY 123
86-
#define OPEN_SQUARE 91
87-
#define C_0 48
88-
#define C_9 57
89-
#define C_A 65
90-
#define C_F 70
91-
#define C_a 97
92-
#define C_f 102
93-
#define C_n 110
94-
#define C_t 116
82+
-------------------------------------------------------------------------------
83+
-- Word8 ASCII codes as patterns
84+
-------------------------------------------------------------------------------
85+
86+
-- GHC-8.0 doesn't support giving multiple pattern synonyms type signature at once
87+
88+
-- spaces
89+
pattern W8_SPACE :: Word8
90+
pattern W8_NL :: Word8
91+
pattern W8_CR :: Word8
92+
pattern W8_TAB :: Word8
93+
94+
pattern W8_SPACE = 0x20
95+
pattern W8_NL = 0x0a
96+
pattern W8_CR = 0x0d
97+
pattern W8_TAB = 0x09
98+
99+
-- punctuation
100+
pattern W8_BACKSLASH :: Word8
101+
pattern W8_DOUBLE_QUOTE :: Word8
102+
pattern W8_DOT :: Word8
103+
pattern W8_COMMA :: Word8
104+
105+
pattern W8_BACKSLASH = 92
106+
pattern W8_COMMA = 44
107+
pattern W8_DOT = 46
108+
pattern W8_DOUBLE_QUOTE = 34
109+
110+
-- parentheses
111+
pattern W8_CLOSE_CURLY :: Word8
112+
pattern W8_CLOSE_SQUARE :: Word8
113+
pattern W8_OPEN_SQUARE :: Word8
114+
pattern W8_OPEN_CURLY :: Word8
115+
116+
pattern W8_OPEN_CURLY = 123
117+
pattern W8_OPEN_SQUARE = 91
118+
pattern W8_CLOSE_CURLY = 125
119+
pattern W8_CLOSE_SQUARE = 93
120+
121+
-- operators
122+
pattern W8_MINUS :: Word8
123+
pattern W8_PLUS :: Word8
124+
125+
pattern W8_PLUS = 43
126+
pattern W8_MINUS = 45
127+
128+
-- digits
129+
pattern W8_0 :: Word8
130+
pattern W8_9 :: Word8
131+
132+
pattern W8_0 = 48
133+
pattern W8_9 = 57
134+
135+
-- lower case
136+
pattern W8_e :: Word8
137+
pattern W8_f :: Word8
138+
pattern W8_n :: Word8
139+
pattern W8_t :: Word8
140+
141+
pattern W8_e = 101
142+
pattern W8_f = 102
143+
pattern W8_n = 110
144+
pattern W8_t = 116
145+
146+
-- upper case
147+
pattern W8_E :: Word8
148+
pattern W8_E = 69
149+
150+
151+
-------------------------------------------------------------------------------
152+
-- Parsers
153+
-------------------------------------------------------------------------------
95154

96155
-- | Parse any JSON value.
97156
--
@@ -151,7 +210,7 @@ objectValues :: ([(Key, Value)] -> Either String Object)
151210
objectValues mkObject str val = do
152211
skipSpace
153212
w <- A.peekWord8'
154-
if w == CLOSE_CURLY
213+
if w == W8_CLOSE_CURLY
155214
then A.anyWord8 >> return KM.empty
156215
else loop []
157216
where
@@ -162,9 +221,9 @@ objectValues mkObject str val = do
162221
loop acc = do
163222
k <- (str A.<?> "object key") <* skipSpace <* (char ':' A.<?> "':'")
164223
v <- (val A.<?> "object value") <* skipSpace
165-
ch <- A.satisfy (\w -> w == COMMA || w == CLOSE_CURLY) A.<?> "',' or '}'"
224+
ch <- A.satisfy (\w -> w == W8_COMMA || w == W8_CLOSE_CURLY) A.<?> "',' or '}'"
166225
let acc' = (k, v) : acc
167-
if ch == COMMA
226+
if ch == W8_COMMA
168227
then skipSpace >> loop acc'
169228
else case mkObject acc' of
170229
Left err -> fail err
@@ -185,14 +244,14 @@ arrayValues :: Parser Value -> Parser (Vector Value)
185244
arrayValues val = do
186245
skipSpace
187246
w <- A.peekWord8'
188-
if w == CLOSE_SQUARE
247+
if w == W8_CLOSE_SQUARE
189248
then A.anyWord8 >> return Vector.empty
190249
else loop [] 1
191250
where
192251
loop acc !len = do
193252
v <- (val A.<?> "json list value") <* skipSpace
194-
ch <- A.satisfy (\w -> w == COMMA || w == CLOSE_SQUARE) A.<?> "',' or ']'"
195-
if ch == COMMA
253+
ch <- A.satisfy (\w -> w == W8_COMMA || w == W8_CLOSE_SQUARE) A.<?> "',' or ']'"
254+
if ch == W8_COMMA
196255
then skipSpace >> loop (v:acc) (len+1)
197256
else return (Vector.reverse (Vector.fromListN len (v:acc)))
198257
{-# INLINE arrayValues #-}
@@ -239,15 +298,15 @@ jsonWith mkObject = fix $ \value_ -> do
239298
skipSpace
240299
w <- A.peekWord8'
241300
case w of
242-
DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_)
243-
OPEN_CURLY -> A.anyWord8 *> object_ mkObject value_
244-
OPEN_SQUARE -> A.anyWord8 *> array_ value_
245-
C_f -> string "false" $> Bool False
246-
C_t -> string "true" $> Bool True
247-
C_n -> string "null" $> Null
248-
_ | w >= 48 && w <= 57 || w == 45
249-
-> Number <$> scientific
250-
| otherwise -> fail "not a valid json value"
301+
W8_DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_)
302+
W8_OPEN_CURLY -> A.anyWord8 *> object_ mkObject value_
303+
W8_OPEN_SQUARE -> A.anyWord8 *> array_ value_
304+
W8_f -> string "false" $> Bool False
305+
W8_t -> string "true" $> Bool True
306+
W8_n -> string "null" $> Null
307+
_ | w >= W8_0 && w <= W8_9 || w == W8_MINUS
308+
-> Number <$> scientific
309+
| otherwise -> fail "not a valid json value"
251310
{-# INLINE jsonWith #-}
252311

253312
-- | Variant of 'json' which keeps only the last occurence of every key.
@@ -291,19 +350,19 @@ jsonWith' mkObject = fix $ \value_ -> do
291350
skipSpace
292351
w <- A.peekWord8'
293352
case w of
294-
DOUBLE_QUOTE -> do
295-
!s <- A.anyWord8 *> jstring_
296-
return (String s)
297-
OPEN_CURLY -> A.anyWord8 *> object_' mkObject value_
298-
OPEN_SQUARE -> A.anyWord8 *> array_' value_
299-
C_f -> string "false" $> Bool False
300-
C_t -> string "true" $> Bool True
301-
C_n -> string "null" $> Null
302-
_ | w >= 48 && w <= 57 || w == 45
303-
-> do
304-
!n <- scientific
305-
return (Number n)
306-
| otherwise -> fail "not a valid json value"
353+
W8_DOUBLE_QUOTE -> do
354+
!s <- A.anyWord8 *> jstring_
355+
return (String s)
356+
W8_OPEN_CURLY -> A.anyWord8 *> object_' mkObject value_
357+
W8_OPEN_SQUARE -> A.anyWord8 *> array_' value_
358+
W8_f -> string "false" $> Bool False
359+
W8_t -> string "true" $> Bool True
360+
W8_n -> string "null" $> Null
361+
_ | w >= W8_0 && w <= W8_9 || w == W8_MINUS
362+
-> do
363+
!n <- scientific
364+
return (Number n)
365+
| otherwise -> fail "not a valid json value"
307366
{-# INLINE jsonWith' #-}
308367

309368
-- | Variant of 'json'' which keeps only the last occurence of every key.
@@ -321,7 +380,7 @@ jsonNoDup' = jsonWith' parseListNoDup
321380

322381
-- | Parse a quoted JSON string.
323382
jstring :: Parser Text
324-
jstring = A.word8 DOUBLE_QUOTE *> jstring_
383+
jstring = A.word8 W8_DOUBLE_QUOTE *> jstring_
325384

326385
-- | Parse a JSON Key
327386
key :: Parser Key
@@ -331,16 +390,13 @@ key = Key.fromText <$> jstring
331390
jstring_ :: Parser Text
332391
{-# INLINE jstring_ #-}
333392
jstring_ = do
334-
-- not sure whether >= or bit hackery is faster
335-
-- perfectly, we shouldn't care, it's compiler job.
336-
s <- A.takeWhile (\w -> w /= DOUBLE_QUOTE && w /= BACKSLASH && w >= 0x20 && w < 0x80)
337-
let txt = unsafeDecodeASCII s
393+
s <- A.takeWhile (\w -> w /= W8_DOUBLE_QUOTE && w /= W8_BACKSLASH && w >= 0x20 && w < 0x80)
338394
mw <- A.peekWord8
339395
case mw of
340-
Nothing -> fail "string without end"
341-
Just DOUBLE_QUOTE -> A.anyWord8 $> txt
342-
Just w | w < 0x20 -> fail "unescaped control character"
343-
_ -> jstringSlow s
396+
Nothing -> fail "string without end"
397+
Just W8_DOUBLE_QUOTE -> A.anyWord8 $> unsafeDecodeASCII s
398+
Just w | w < 0x20 -> fail "unescaped control character"
399+
_ -> jstringSlow s
344400

345401
jstringSlow :: B.ByteString -> Parser Text
346402
{-# INLINE jstringSlow #-}
@@ -350,13 +406,12 @@ jstringSlow s' = do
350406
Right r -> return r
351407
Left err -> fail $ show err
352408
where
353-
startState = False
409+
startState = False
354410
go a c
355-
| a = Just False
356-
| c == DOUBLE_QUOTE = Nothing
357-
| otherwise = let a' = c == backslash
411+
| a = Just False
412+
| c == W8_DOUBLE_QUOTE = Nothing
413+
| otherwise = let a' = c == W8_BACKSLASH
358414
in Just a'
359-
where backslash = BACKSLASH
360415

361416
decodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString -> Maybe a
362417
decodeWith p to s =
@@ -451,7 +506,7 @@ jsonEOF' = json' <* skipSpace <* endOfInput
451506
-- | The only valid whitespace in a JSON document is space, newline,
452507
-- carriage return, and tab.
453508
skipSpace :: Parser ()
454-
skipSpace = A.skipWhile $ \w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09
509+
skipSpace = A.skipWhile $ \w -> w == W8_SPACE || w == W8_NL || w == W8_CR || w == W8_TAB
455510
{-# INLINE skipSpace #-}
456511

457512
------------------ Copy-pasted and adapted from attoparsec ------------------
@@ -461,40 +516,34 @@ data SP = SP !Integer {-# UNPACK #-}!Int
461516

462517
decimal0 :: Parser Integer
463518
decimal0 = do
464-
let zero = 48
465519
digits <- A.takeWhile1 isDigit_w8
466-
if B.length digits > 1 && B.unsafeHead digits == zero
520+
if B.length digits > 1 && B.unsafeHead digits == W8_0
467521
then fail "leading zero"
468522
else return (bsToInteger digits)
469523

470524
-- | Parse a JSON number.
471525
scientific :: Parser Scientific
472526
scientific = do
473-
let minus = 45
474-
plus = 43
475527
sign <- A.peekWord8'
476-
let !positive = sign == plus || sign /= minus
477-
when (sign == plus || sign == minus) $
528+
let !positive = not (sign == W8_MINUS)
529+
when (sign == W8_PLUS || sign == W8_MINUS) $
478530
void A.anyWord8
479531

480532
n <- decimal0
481533

482534
let f fracDigits = SP (B.foldl' step n fracDigits)
483535
(negate $ B.length fracDigits)
484-
step a w = a * 10 + fromIntegral (w - 48)
536+
step a w = a * 10 + fromIntegral (w - W8_0)
485537

486538
dotty <- A.peekWord8
487-
-- '.' -> ascii 46
488539
SP c e <- case dotty of
489-
Just 46 -> A.anyWord8 *> (f <$> A.takeWhile1 isDigit_w8)
490-
_ -> pure (SP n 0)
540+
Just W8_DOT -> A.anyWord8 *> (f <$> A.takeWhile1 isDigit_w8)
541+
_ -> pure (SP n 0)
491542

492543
let !signedCoeff | positive = c
493544
| otherwise = -c
494545

495-
let littleE = 101
496-
bigE = 69
497-
(A.satisfy (\ex -> ex == littleE || ex == bigE) *>
546+
(A.satisfy (\ex -> case ex of W8_e -> True; W8_E -> True; _ -> False) *>
498547
fmap (Sci.scientific signedCoeff . (e +)) (signed decimal)) <|>
499548
return (Sci.scientific signedCoeff e)
500549
{-# INLINE scientific #-}
@@ -503,14 +552,14 @@ scientific = do
503552

504553
bsToInteger :: B.ByteString -> Integer
505554
bsToInteger bs
506-
| l > 40 = valInteger 10 l [ fromIntegral (w - 48) | w <- B.unpack bs ]
555+
| l > 40 = valInteger 10 l [ fromIntegral (w - W8_0) | w <- B.unpack bs ]
507556
| otherwise = bsToIntegerSimple bs
508557
where
509558
l = B.length bs
510559

511560
bsToIntegerSimple :: B.ByteString -> Integer
512561
bsToIntegerSimple = B.foldl' step 0 where
513-
step a b = a * 10 + fromIntegral (b - 48) -- 48 = '0'
562+
step a b = a * 10 + fromIntegral (b - W8_0)
514563

515564
-- A sub-quadratic algorithm for Integer. Pairs of adjacent radix b
516565
-- digits are combined into a single radix b^2 digit. This process is

0 commit comments

Comments
 (0)