Skip to content

Commit 33f0315

Browse files
committed
Add Data.Aeson.Decoding.Text, decodeStrictText :: Text -> ...
We avoid intermediate ByteString copy by not doing `decode . TE.encodeUtf8`, but instead working on `Text` value directly. As we know that the stream is valid Unicode (UTF8 or UTF16), we can also take some shortcuts. One gotcha is that internal Text values (in Keys or Value Strings) most likely retain the original input `Text` value (its Array). It shouldn't be an issue if the Value is actually decoded so these `Text` values disapper, but if not (e.g. `Object` keys survive) then users might want to use `Data.Text.copy`. With GHC-9.6.2 (text-2.0.2; UTF-8) the speedup is not huge, but noticeable anyway: aeson/strict: OK (0.26s) 462 μs ± 23 μs aeson/text: OK (0.22s) 399 μs ± 25 μs aeson/text-via-bs: OK (0.14s) 473 μs ± 45 μs With GHC-8.6.5 (text-1.2.3.0; UTF-16) the speedup is relatively more: aeson/strict: OK (0.22s) 819 μs ± 74 μs aeson/text: OK (0.17s) 593 μs ± 46 μs aeson/text-via-bs: OK (0.23s) 875 μs ± 62 μs
1 parent a50c64f commit 33f0315

File tree

228 files changed

+401633
-12
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

228 files changed

+401633
-12
lines changed

aeson.cabal

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ library
5959
Data.Aeson.Decoding
6060
Data.Aeson.Decoding.ByteString
6161
Data.Aeson.Decoding.ByteString.Lazy
62+
Data.Aeson.Decoding.Text
6263
Data.Aeson.Decoding.Tokens
6364
Data.Aeson.Encoding
6465
Data.Aeson.Encoding.Internal
@@ -81,7 +82,9 @@ library
8182
Data.Aeson.Internal.Text
8283
Data.Aeson.Internal.TH
8384
Data.Aeson.Internal.Unescape
85+
Data.Aeson.Internal.UnescapeFromText
8486
Data.Aeson.Internal.Word8
87+
Data.Aeson.Internal.Word16
8588
Data.Aeson.Parser.Time
8689
Data.Aeson.Types.Class
8790
Data.Aeson.Types.FromJSON

benchmarks/bench/CompareWithJSON.hs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import qualified Data.Aeson.Text as A
1616
import qualified Data.Aeson.Parser.Internal as I
1717
import qualified Data.ByteString as BS
1818
import qualified Data.ByteString.Lazy as BL
19+
import qualified Data.Text as T
20+
import qualified Data.Text.Encoding as TE
1921
import qualified Data.Text.Lazy as TL
2022
import qualified Data.Text.Lazy.Builder as TLB
2123
import qualified Data.Text.Lazy.Encoding as TLE
@@ -52,6 +54,12 @@ decodeS s = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrict s
5254
decodeS' :: BS.ByteString -> A.Value
5355
decodeS' s = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrict' s
5456

57+
decodeT :: T.Text -> A.Value
58+
decodeT t = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrictText t
59+
60+
decodeTviaBS :: T.Text -> A.Value
61+
decodeTviaBS t = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrict $ TE.encodeUtf8 t
62+
5563
decodeAtto :: BL.ByteString -> A.Value
5664
decodeAtto s = fromMaybe (error "fail to parse via Parser.decodeWith") $
5765
I.decodeWith I.jsonEOF A.fromJSON s
@@ -82,9 +90,11 @@ benchmark =
8290
env (readL enFile) $ \enA ->
8391
env (readS enFile) $ \enS ->
8492
env (readStr enFile) $ \enJ ->
93+
env (readT enFile) $ \enT ->
8594
env (readL jpFile) $ \jpA ->
8695
env (readS jpFile) $ \jpS ->
8796
env (readStr jpFile) $ \jpJ ->
97+
env (readT jpFile) $ \jpT ->
8898
bgroup "compare-json" [
8999
bgroup "decode" [
90100
bgroup "whnf" [
@@ -94,6 +104,8 @@ benchmark =
94104
, bench "aeson/normal'" $ whnf decode' enA
95105
, bench "aeson/strict" $ whnf decodeS enS
96106
, bench "aeson/strict'" $ whnf decodeS' enS
107+
, bench "aeson/text" $ whnf decodeT enT
108+
, bench "aeson/text-via-bs" $ whnf decodeTviaBS enT
97109

98110
-- attoparsec-aeson package
99111
, bench "aeson/atto" $ whnf decodeAtto enA
@@ -123,6 +135,8 @@ benchmark =
123135
, bgroup "jp" [
124136
bench "aeson/normal" $ whnf decode jpA
125137
, bench "aeson/strict" $ whnf decodeS jpS
138+
, bench "aeson/text" $ whnf decodeT jpT
139+
, bench "aeson/text-via-bs" $ whnf decodeTviaBS jpT
126140
, bench "json" $ whnf decodeJ jpJ
127141
]
128142
]

benchmarks/bench/Utils.hs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,25 @@ import qualified Data.Aeson as A
1313
import qualified Data.ByteString as BS
1414
import qualified Data.ByteString.Char8 as BS8
1515
import qualified Data.ByteString.Lazy as LBS
16+
import qualified Data.Text as T
17+
import qualified Data.Text.Encoding as TE
1618

1719
readStr :: FilePath -> IO String
1820
readStr fp = do
19-
dataDir <- lookupEnv "AESON_BENCH_DATADIR"
20-
fmap BS8.unpack $ BS.readFile $ fromMaybe "json-data" dataDir </> fp
21+
fmap BS8.unpack $ readS fp
2122

2223
readS :: FilePath -> IO BS.ByteString
2324
readS fp = do
2425
dataDir <- lookupEnv "AESON_BENCH_DATADIR"
25-
BS.readFile $ fromMaybe "json-data" dataDir </> fp
26+
BS.readFile $ fromMaybe "benchmarks/json-data" dataDir </> fp
2627

2728
readL :: FilePath -> IO LBS.ByteString
2829
readL fp = do
2930
dataDir <- lookupEnv "AESON_BENCH_DATADIR"
30-
LBS.readFile $ fromMaybe "json-data" dataDir </> fp
31+
LBS.readFile $ fromMaybe "benchmarks/json-data" dataDir </> fp
32+
33+
readT :: FilePath -> IO T.Text
34+
readT fp = fmap TE.decodeUtf8 $ readS fp
3135

3236
readV :: A.FromJSON a => FilePath -> IO a
3337
readV fileName = do

src/Data/Aeson.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,14 @@ module Data.Aeson
5555
, eitherDecodeFileStrict
5656
, eitherDecodeStrict'
5757
, eitherDecodeFileStrict'
58+
-- ** Variants for strict text
59+
, decodeStrictText
60+
, eitherDecodeStrictText
5861
-- ** Exception throwing variants
5962
, AesonException (..)
6063
, throwDecode
6164
, throwDecodeStrict
65+
, throwDecodeStrictText
6266
, throwDecode'
6367
, throwDecodeStrict'
6468
-- * Core JSON types
@@ -171,6 +175,7 @@ import Data.Aeson.Types
171175
import qualified Data.ByteString as B
172176
import qualified Data.ByteString.Lazy as L
173177
import Data.Aeson.Decoding (decode, eitherDecode, throwDecode, decodeStrict, eitherDecodeStrict, throwDecodeStrict)
178+
import Data.Aeson.Decoding (decodeStrictText, eitherDecodeStrictText, throwDecodeStrictText)
174179

175180
-- $setup
176181
-- >>> :set -XOverloadedStrings

src/Data/Aeson/Decoding.hs

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ module Data.Aeson.Decoding (
99
decodeStrict,
1010
eitherDecodeStrict,
1111
throwDecodeStrict,
12+
decodeStrictText,
13+
eitherDecodeStrictText,
14+
throwDecodeStrictText,
1215
toEitherValue,
1316
unescapeText,
1417
) where
@@ -17,11 +20,13 @@ import Control.Monad.Catch (MonadThrow (..))
1720
import Data.Aeson.Types.Internal (AesonException (..), formatError)
1821

1922
import qualified Data.Aeson.Types as A
20-
import qualified Data.ByteString as B
21-
import qualified Data.ByteString.Lazy as L
23+
import qualified Data.ByteString as BS
24+
import qualified Data.ByteString.Lazy as LBS
25+
import qualified Data.Text as T
2226

2327
import Data.Aeson.Decoding.ByteString
2428
import Data.Aeson.Decoding.ByteString.Lazy
29+
import Data.Aeson.Decoding.Text
2530
import Data.Aeson.Decoding.Conversion
2631
import Data.Aeson.Internal.Unescape (unescapeText)
2732

@@ -32,23 +37,23 @@ import Data.Aeson.Internal.Unescape (unescapeText)
3237
-- | Efficiently deserialize a JSON value from a strict 'B.ByteString'.
3338
-- If this fails due to incomplete or invalid input, 'Nothing' is
3439
-- returned.
35-
decodeStrict :: (A.FromJSON a) => B.ByteString -> Maybe a
40+
decodeStrict :: (A.FromJSON a) => BS.ByteString -> Maybe a
3641
decodeStrict bs = unResult (toResultValue (bsToTokens bs)) (\_ -> Nothing) $ \v bs' -> case A.ifromJSON v of
3742
A.ISuccess x
3843
| bsSpace bs' -> Just x
3944
| otherwise -> Nothing
4045
A.IError _ _ -> Nothing
4146

4247
-- | Like 'decodeStrict' but returns an error message when decoding fails.
43-
eitherDecodeStrict :: (A.FromJSON a) => B.ByteString -> Either String a
48+
eitherDecodeStrict :: (A.FromJSON a) => BS.ByteString -> Either String a
4449
eitherDecodeStrict bs = unResult (toResultValue (bsToTokens bs)) Left $ \v bs' -> case A.ifromJSON v of
4550
A.ISuccess x
4651
| bsSpace bs' -> Right x
4752
| otherwise -> Left "Trailing garbage"
4853
A.IError path msg -> Left $ formatError path msg
4954

5055
-- | Like 'decodeStrict' but throws an 'AesonException' when decoding fails.
51-
throwDecodeStrict :: forall a m. (A.FromJSON a, MonadThrow m) => B.ByteString -> m a
56+
throwDecodeStrict :: forall a m. (A.FromJSON a, MonadThrow m) => BS.ByteString -> m a
5257
throwDecodeStrict bs = unResult (toResultValue (bsToTokens bs)) (throwM . AesonException) $ \v bs' -> case A.ifromJSON v of
5358
A.ISuccess x
5459
| bsSpace bs' -> pure x
@@ -62,15 +67,15 @@ throwDecodeStrict bs = unResult (toResultValue (bsToTokens bs)) (throwM . AesonE
6267
-- | Efficiently deserialize a JSON value from a lazy 'L.ByteString'.
6368
-- If this fails due to incomplete or invalid input, 'Nothing' is
6469
-- returned.
65-
decode :: (A.FromJSON a) => L.ByteString -> Maybe a
70+
decode :: (A.FromJSON a) => LBS.ByteString -> Maybe a
6671
decode bs = unResult (toResultValue (lbsToTokens bs)) (\_ -> Nothing) $ \v bs' -> case A.ifromJSON v of
6772
A.ISuccess x
6873
| lbsSpace bs' -> Just x
6974
| otherwise -> Nothing
7075
A.IError _ _ -> Nothing
7176

7277
-- | Like 'decode' but returns an error message when decoding fails.
73-
eitherDecode :: (A.FromJSON a) => L.ByteString -> Either String a
78+
eitherDecode :: (A.FromJSON a) => LBS.ByteString -> Either String a
7479
eitherDecode bs = unResult (toResultValue (lbsToTokens bs)) Left $ \v bs' -> case A.ifromJSON v of
7580
A.ISuccess x
7681
| lbsSpace bs' -> Right x
@@ -80,9 +85,45 @@ eitherDecode bs = unResult (toResultValue (lbsToTokens bs)) Left $ \v bs' -> cas
8085
-- | Like 'decode' but throws an 'AesonException' when decoding fails.
8186
--
8287
-- 'throwDecode' is in @aeson@ since 2.1.2.0, but this variant is added later.
83-
throwDecode :: forall a m. (A.FromJSON a, MonadThrow m) => L.ByteString -> m a
88+
throwDecode :: forall a m. (A.FromJSON a, MonadThrow m) => LBS.ByteString -> m a
8489
throwDecode bs = unResult (toResultValue (lbsToTokens bs)) (throwM . AesonException) $ \v bs' -> case A.ifromJSON v of
8590
A.ISuccess x
8691
| lbsSpace bs' -> pure x
8792
| otherwise -> throwM $ AesonException "Trailing garbage"
8893
A.IError path msg -> throwM $ AesonException $ formatError path msg
94+
95+
-------------------------------------------------------------------------------
96+
-- Decoding: strict text
97+
-------------------------------------------------------------------------------
98+
99+
-- | Efficiently deserialize a JSON value from a strict 'B.ByteString'.
100+
-- If this fails due to incomplete or invalid input, 'Nothing' is
101+
-- returned.
102+
--
103+
-- @since 2.2.1.0
104+
decodeStrictText :: (A.FromJSON a) => T.Text -> Maybe a
105+
decodeStrictText bs = unResult (toResultValue (textToTokens bs)) (\_ -> Nothing) $ \v bs' -> case A.ifromJSON v of
106+
A.ISuccess x
107+
| textSpace bs' -> Just x
108+
| otherwise -> Nothing
109+
A.IError _ _ -> Nothing
110+
111+
-- | Like 'decodeStrictText' but returns an error message when decoding fails.
112+
--
113+
-- @since 2.2.1.0
114+
eitherDecodeStrictText :: (A.FromJSON a) => T.Text -> Either String a
115+
eitherDecodeStrictText bs = unResult (toResultValue (textToTokens bs)) Left $ \v bs' -> case A.ifromJSON v of
116+
A.ISuccess x
117+
| textSpace bs' -> Right x
118+
| otherwise -> Left "Trailing garbage"
119+
A.IError path msg -> Left $ formatError path msg
120+
121+
-- | Like 'decodeStrictText' but throws an 'AesonException' when decoding fails.
122+
--
123+
-- @since 2.2.1.0
124+
throwDecodeStrictText :: forall a m. (A.FromJSON a, MonadThrow m) => T.Text -> m a
125+
throwDecodeStrictText bs = unResult (toResultValue (textToTokens bs)) (throwM . AesonException) $ \v bs' -> case A.ifromJSON v of
126+
A.ISuccess x
127+
| textSpace bs' -> pure x
128+
| otherwise -> throwM $ AesonException "Trailing garbage"
129+
A.IError path msg -> throwM $ AesonException $ formatError path msg

src/Data/Aeson/Decoding/Conversion.hs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
module Data.Aeson.Decoding.Conversion (
44
bsSpace,
55
lbsSpace,
6+
textSpace,
7+
ltextSpace,
68
toEitherValue,
79
toResultValue,
810
Result (..),
@@ -16,6 +18,8 @@ import qualified Data.Aeson.Types.Internal as A
1618
import qualified Data.ByteString as B
1719
import qualified Data.ByteString.Lazy as L
1820
import qualified Data.Vector as V
21+
import qualified Data.Text as T
22+
import qualified Data.Text.Lazy as LT
1923

2024
import Data.Aeson.Decoding.Tokens
2125

@@ -25,6 +29,12 @@ bsSpace = B.all (\w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09)
2529
lbsSpace :: L.ByteString -> Bool
2630
lbsSpace = L.all (\w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09)
2731

32+
textSpace :: T.Text -> Bool
33+
textSpace = T.all (\c -> c == ' ' || c == '\r' || c == '\n' || c == '\t')
34+
35+
ltextSpace :: LT.Text -> Bool
36+
ltextSpace = LT.all (\c -> c == ' ' || c == '\r' || c == '\n' || c == '\t')
37+
2838
-- | Convert 'Tokens' to 'A.Value'.
2939
--
3040
-- The resulting value will be in normal form if its forced.

0 commit comments

Comments
 (0)