Skip to content

Commit a50c64f

Browse files
authored
Merge pull request #1070 from haskell/rfc8785
Implementation of RFC 8785: JSON Canonicalization Scheme
2 parents 54d3c33 + 3f2a79a commit a50c64f

File tree

9 files changed

+612
-4
lines changed

9 files changed

+612
-4
lines changed

.hlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,4 @@
4646
- ignore: { name: "Use const" }
4747
- ignore: { name: "Use -" }
4848
- ignore: { name: "Use /=" }
49+
- ignore: { name: "Use uncurry" }

aeson.cabal

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ library
6565
Data.Aeson.Key
6666
Data.Aeson.KeyMap
6767
Data.Aeson.QQ.Simple
68+
Data.Aeson.RFC8785
6869
Data.Aeson.Text
6970
Data.Aeson.TH
7071
Data.Aeson.Types
@@ -108,13 +109,17 @@ library
108109
if !impl(ghc >=8.6)
109110
build-depends: contravariant >=1.4.1 && <1.6
110111

112+
if !impl(ghc >=9.0)
113+
build-depends: integer-gmp
114+
111115
-- Other dependencies
112116
build-depends:
113117
data-fix >=0.3.2 && <0.4
114118
, dlist >=1.0 && <1.1
115119
, hashable >=1.4.2.0 && <1.5
116120
, indexed-traversable >=0.1.2 && <0.2
117121
, integer-conversion >=0.1 && <0.2
122+
, integer-logarithms >=1.0.3.1 && <1.1
118123
, network-uri >=2.6.4.1 && <2.7
119124
, OneTuple >=0.4.1.1 && <0.5
120125
, primitive >=0.8.0.0 && <0.9
@@ -146,10 +151,12 @@ test-suite aeson-tests
146151
main-is: Tests.hs
147152
ghc-options: -Wall -threaded -rtsopts
148153
other-modules:
154+
CastFloat
149155
DataFamilies.Encoders
150156
DataFamilies.Instances
151157
DataFamilies.Properties
152158
DataFamilies.Types
159+
DoubleToScientific
153160
Encoders
154161
ErrorMessages
155162
Functions
@@ -168,13 +175,15 @@ test-suite aeson-tests
168175
Regression.Issue571
169176
Regression.Issue687
170177
Regression.Issue967
178+
RFC8785
171179
SerializationFormatSpec
172180
Types
173181
UnitTests
174182
UnitTests.FromJSONKey
175183
UnitTests.Hashable
176184
UnitTests.KeyMapInsertWith
177185
UnitTests.MonadFix
186+
UnitTests.NoThunks
178187
UnitTests.NullaryConstructors
179188
UnitTests.OmitNothingFieldsNote
180189
UnitTests.OptionalFields
@@ -183,18 +192,17 @@ test-suite aeson-tests
183192
UnitTests.OptionalFields.Manual
184193
UnitTests.OptionalFields.TH
185194
UnitTests.UTCTime
186-
UnitTests.NoThunks
187195

188196
build-depends:
189197
aeson
190198
, base
191199
, base-compat
192-
, deepseq
193200
, base-orphans >=0.5.3 && <0.10
194201
, base16-bytestring
195202
, bytestring
196203
, containers
197204
, data-fix
205+
, deepseq
198206
, Diff >=0.4 && <0.5
199207
, directory
200208
, dlist
@@ -227,6 +235,9 @@ test-suite aeson-tests
227235
, uuid-types
228236
, vector
229237

238+
if !impl(ghc >=9.0)
239+
build-depends: integer-gmp
240+
230241
if impl(ghc >=9.2 && <9.7)
231242
build-depends: nothunks >=0.1.4 && <0.2
232243

changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
For the latest version of this document, please see [https://github.com/haskell/aeson/blob/master/changelog.md](https://github.com/haskell/aeson/blob/master/changelog.md).
22

3+
### 2.2.1.0
4+
5+
* Add `Data.Aeson.RFC8785`, a JSON Canonicalization Scheme implementation
6+
https://datatracker.ietf.org/doc/html/rfc8785
7+
38
### 2.2.0.0
49

510
* Rework how `omitNothingFields` works. Add `allowOmittedFields` as a parsing counterpart.

src/Data/Aeson/RFC8785.hs

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
{-# LANGUAGE UnboxedTuples, BangPatterns #-}
2+
-- | JSON Canonicalization Scheme https://datatracker.ietf.org/doc/html/rfc8785
3+
module Data.Aeson.RFC8785 (
4+
encodeCanonical,
5+
) where
6+
7+
import Data.List (sortBy)
8+
import Data.Ord (comparing)
9+
import GHC.Integer (quotRemInteger)
10+
import Math.NumberTheory.Logarithms (integerLog10)
11+
12+
import Data.Aeson
13+
import Data.Aeson.Encoding
14+
import Data.Aeson.Encoding.Internal
15+
import Data.Aeson.Internal.Prelude
16+
import Data.Aeson.Internal.Word8
17+
18+
import qualified Data.Aeson.Key as Key
19+
import qualified Data.Aeson.KeyMap as KM
20+
import qualified Data.ByteString as BS
21+
import qualified Data.ByteString.Builder as B
22+
import qualified Data.ByteString.Builder.Prim as BP
23+
import qualified Data.ByteString.Lazy as LBS
24+
import qualified Data.Scientific as Sci
25+
import qualified Data.Text.Encoding as TE
26+
import qualified Data.Vector as V
27+
28+
-- $setup
29+
-- >>> import Data.Aeson
30+
31+
-- | Encode to JSON according to RFC 8785 canonicalization scheme.
32+
-- https://datatracker.ietf.org/doc/html/rfc8785
33+
--
34+
-- 'encodeCanonical' uses 'toJSON' to produce intermediate 'Value',
35+
-- as 'toEncoding' may (and most likely) produces non-canonical JSON.
36+
--
37+
-- Note: @decode (encodeCanonical v) === Just v@ for all @v :: Value@,
38+
-- i.e. 'encodeCanonical' doesn't lose any information.
39+
--
40+
-- However, the example in RFC8785 /loses/ information as the intermediate
41+
-- number representation is 'Double', also current @toJSON :: Double -> Value@
42+
-- sometimes produces too precise values. For example
43+
--
44+
-- >>> toJSON (1e23 :: Double)
45+
-- Number 9.999999999999999e22
46+
--
47+
-- 'show' also behaves the same:
48+
--
49+
-- >>> 1e23 :: Double
50+
-- 9.999999999999999e22
51+
--
52+
-- Note: RFC8785 is __not the same scheme__ as used in
53+
-- [canonical-json](https://hackage.haskell.org/package/canonical-json) package
54+
-- (https://wiki.laptop.org/go/Canonical_JSON).
55+
-- That scheme produces /invalid/ JSON (e.g. control characters encoded as is, not escaped)
56+
-- and cannot encode non-integral numbers.
57+
--
58+
-- @since 2.2.1.0
59+
--
60+
encodeCanonical :: ToJSON a => a -> LBS.ByteString
61+
encodeCanonical = encodingToLazyByteString . toCanonical . toJSON
62+
63+
toCanonical :: Value -> Encoding
64+
toCanonical Null = null_
65+
toCanonical (Bool b) = bool b
66+
toCanonical (Number n) = canonicalNumber n
67+
toCanonical (String s) = canonicalString s
68+
toCanonical (Array v) = list toCanonical (V.toList v)
69+
toCanonical (Object m) = dict (canonicalString . Key.toText) toCanonical ifr $
70+
sortBy (\(k1, _) (k2, _) -> propertyCmp k1 k2) (KM.toList m)
71+
72+
ifr :: (k -> v -> a -> a) -> a -> [(k, v)] -> a
73+
ifr f z = foldr (\(k, v) -> f k v) z
74+
{-# INLINE ifr #-}
75+
76+
-- Property name strings to be sorted are formatted as arrays of UTF-16 code units.
77+
propertyCmp :: Key -> Key -> Ordering
78+
propertyCmp = comparing f where
79+
-- this is slow implementation, but it's obviously not wrong.
80+
f :: Key -> BS.ByteString
81+
f = TE.encodeUtf16BE . Key.toText
82+
83+
-- strings are already serialized canonically.
84+
canonicalString :: Text -> Encoding' a
85+
canonicalString = text
86+
87+
-- RFC 8785 is outsourcing number format to ECMA-262.
88+
-- 10th edition, 7.1.12.1 NumberToString
89+
-- https://262.ecma-international.org/10.0/#sec-tostring-applied-to-the-number-type
90+
--
91+
-- Note: this specification is not lossy
92+
-- Given 'Scientific' we can choose n,k,s uniquely: 'nks'.
93+
--
94+
-- RFC8785 Appendix D says "don't use bignums".
95+
canonicalNumber :: Scientific -> Encoding
96+
canonicalNumber m = case compare m 0 of
97+
EQ -> Encoding (B.word8 W8_0)
98+
LT -> Encoding (B.word8 W8_MINUS <> fromEncoding (canonicalNumber' (negate m)))
99+
GT -> canonicalNumber' m
100+
101+
-- input: Positive number
102+
canonicalNumber' :: Scientific -> Encoding
103+
canonicalNumber' m
104+
| k <= n, n <= 21
105+
= Encoding $
106+
BP.primMapListFixed BP.word8 ds <>
107+
BP.primMapListFixed BP.word8 (replicate (n - k) W8_0)
108+
109+
| 0 < n, n <= 21
110+
, let (pfx, sfx) = splitAt n ds
111+
= Encoding $
112+
BP.primMapListFixed BP.word8 pfx <>
113+
B.word8 W8_DOT <>
114+
BP.primMapListFixed BP.word8 sfx
115+
116+
| -6 < n, n <= 0
117+
= Encoding $
118+
B.word8 W8_0 <>
119+
B.word8 W8_DOT <>
120+
BP.primMapListFixed BP.word8 (replicate (negate n) W8_0) <>
121+
BP.primMapListFixed BP.word8 ds
122+
123+
| k == 1, [d] <- ds
124+
= Encoding $
125+
B.word8 d <>
126+
B.word8 W8_e <>
127+
B.word8 (if (n - 1) >= 0 then W8_PLUS else W8_MINUS) <>
128+
BP.primMapListFixed BP.word8 (integerToDecimalDigits (abs (toInteger n - 1)))
129+
130+
| (d:ds') <- ds
131+
= Encoding $
132+
B.word8 d <>
133+
B.word8 W8_DOT <>
134+
BP.primMapListFixed BP.word8 ds' <>
135+
B.word8 W8_e <>
136+
B.word8 (if (n - 1) >= 0 then W8_PLUS else W8_MINUS) <>
137+
BP.primMapListFixed BP.word8 (integerToDecimalDigits (abs (toInteger n - 1)))
138+
139+
| otherwise
140+
= string "0" -- shouldn't happen, but we need a default case.
141+
142+
where
143+
-- 5. Otherwise, let n, k, and s be integers such that
144+
-- k ≥ 1, 10k - 1 ≤ s < 10k, the Number value for s × 10n - k is m,
145+
-- and k is as small as possible.
146+
-- Note that k is the number of digits in the decimal representation of s,
147+
-- that s is not divisible by 10, and that the least significant digit of s
148+
-- is not necessarily uniquely determined by these criteria.
149+
(n, k, s) = nks m
150+
ds = integerToDecimalDigits s
151+
152+
-- 5. Otherwise, let n, k, and s be integers such that k ≥ 1, 10^(k - 1) ≤ s < 10^k,
153+
-- the Number value for s × 10^(n - k) is m, and k is as small as possible.
154+
-- Note that k is the number of digits in the decimal representation of s,
155+
-- that s is not divisible by 10, and that the least significant digit of s
156+
-- is not necessarily uniquely determined by these criteria.
157+
nks :: Scientific -> (Int, Int, Integer)
158+
nks m = (e + k, k, c)
159+
where
160+
m' = Sci.normalize m
161+
c = Sci.coefficient m'
162+
e = Sci.base10Exponent m'
163+
k = integerLog10 c + 1
164+
165+
integerToDecimalDigits :: Integer -> [Word8]
166+
integerToDecimalDigits = go [] where
167+
go acc 0 = acc
168+
go acc i = case quotRemInteger i 10 of
169+
(# q, r #) -> go (d:acc) q where !d = fromIntegral r + W8_0

tests/CastFloat.hs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{-# LANGUAGE CPP #-}
2+
module CastFloat (
3+
castDoubleToWord64,
4+
castWord64ToDouble,
5+
castFloatTests,
6+
) where
7+
8+
import Test.Tasty (TestTree, testGroup)
9+
import Test.Tasty.HUnit (testCase, (@?=))
10+
import Test.Tasty.QuickCheck (testProperty, (===))
11+
12+
import Types (UniformWord64 (..))
13+
14+
#if MIN_VERSION_base(4,11,0)
15+
import GHC.Float (castDoubleToWord64, castWord64ToDouble)
16+
#else
17+
18+
import Data.Word (Word64)
19+
import Foreign.Storable (Storable (peek, poke))
20+
import Foreign.Ptr (castPtr)
21+
import Foreign.Marshal (alloca)
22+
import System.IO.Unsafe (unsafeDupablePerformIO)
23+
24+
castDoubleToWord64 :: Double -> Word64
25+
castDoubleToWord64 = reinterpretCast
26+
27+
castWord64ToDouble :: Word64 -> Double
28+
castWord64ToDouble = reinterpretCast
29+
30+
reinterpretCast :: (Storable a, Storable b) => a -> b
31+
reinterpretCast x = unsafeDupablePerformIO $ alloca $ \ptr -> do
32+
poke ptr x
33+
peek (castPtr ptr)
34+
35+
#endif
36+
37+
castFloatTests :: TestTree
38+
castFloatTests = testGroup "castDoubleToWord64"
39+
[ testCase "5e-324" $ castDoubleToWord64 5e-324 @?= 1
40+
, testProperty "roundtrip1" $ \d -> castWord64ToDouble (castDoubleToWord64 d) === d
41+
, testProperty "roundtrip2" $ \(U64 w) -> castDoubleToWord64 (castWord64ToDouble w) === w
42+
]

0 commit comments

Comments
 (0)