Use only fixed-width uints in the C itoa functions (#702)

clyring · web-flow · commit aca65b394e5f · 2025-01-01T12:38:53.000Z
* Use only fixed-width uints in the C itoa functions

The existing logic for decimal encoding of signed ints was a bit
more complicated than necessary in its handling for negative numbers,
mostly because of negation overflowing for INT_MIN.

But the absolute value of the smallest signed Int16 does fit into
an unsigned Word16 without overflowing, allowing some simplification.

Additionally, on hardware with slow integer division instructions,
fast division-by-known-divisor is typically faster for unsigned types,
so this change may lead to a slight speed-up on such platforms.

(We could almost certainly produce slightly better code still for
these platforms by hand, for example by exploiting the fact that
after the first division the numbers are small enough that a quotient
by ten can be extracted with a single mulhi and no shift.)

* Remove a dead branch in `integerDec`

If the absolute value of the input is small enough to enter this
branch, then it fits in an Int and takes the very first branch instead.
diff --git a/Data/ByteString/Builder/ASCII.hs b/Data/ByteString/Builder/ASCII.hs
@@ -79,7 +79,7 @@ import           Data.ByteString.Builder.Internal (Builder)
 import qualified Data.ByteString.Builder.Prim                   as P
 import qualified Data.ByteString.Builder.Prim.Internal          as P
 import           Data.ByteString.Builder.RealFloat (floatDec, doubleDec)
-import           Data.ByteString.Internal.Type (c_int_dec_padded9, c_long_long_int_dec_padded18)
+import           Data.ByteString.Internal.Type (c_uint32_dec_padded9, c_uint64_dec_padded18)
 
 import           Foreign
 import           Data.List.NonEmpty (NonEmpty(..))
@@ -275,10 +275,8 @@ integerDec i
     | otherwise =                                   go i
   where
     go :: Integer -> Builder
-    go n | n < maxPow10 = intDec (fromInteger n)
-         | otherwise    =
-             case putH (splitf (maxPow10 * maxPow10) n) of
-               x:|xs -> intDec x `mappend` P.primMapListBounded intDecPadded xs
+    go n = case putH (splitf (maxPow10 * maxPow10) n) of
+             x:|xs -> intDec x `mappend` P.primMapListBounded intDecPadded xs
 
     splitf :: Integer -> Integer -> NonEmpty Integer
     splitf pow10 n0
@@ -311,5 +309,5 @@ integerDec i
 {-# INLINE intDecPadded #-}
 intDecPadded :: P.BoundedPrim Int
 intDecPadded = P.liftFixedToBounded $ P.caseWordSize_32_64
-    (P.fixedPrim  9 $ c_int_dec_padded9            . fromIntegral)
-    (P.fixedPrim 18 $ c_long_long_int_dec_padded18 . fromIntegral)
+    (P.fixedPrim  9 $ c_uint32_dec_padded9  . fromIntegral)
+    (P.fixedPrim 18 $ c_uint64_dec_padded18 . fromIntegral)
diff --git a/Data/ByteString/Builder/Prim/ASCII.hs b/Data/ByteString/Builder/Prim/ASCII.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE TypeFamilies #-}
+
 -- | Copyright   : (c) 2010 Jasper Van der Jeugt
 --                 (c) 2010 - 2011 Simon Meier
 -- License       : BSD3-style (see LICENSE)
@@ -99,30 +101,50 @@ char7 = (\c -> fromIntegral $ ord c .&. 0x7f) >$< word8
 -- Signed integers
 ------------------
 
-{-# INLINE encodeIntDecimal #-}
-encodeIntDecimal :: Integral a => Int -> BoundedPrim a
-encodeIntDecimal bound = boundedPrim bound $ c_int_dec . fromIntegral
+type family CorrespondingUnsigned s where
+  CorrespondingUnsigned Int8  = Word8
+  CorrespondingUnsigned Int16 = Word16
+  CorrespondingUnsigned Int32 = Word32
+  CorrespondingUnsigned Int   = Word
+  CorrespondingUnsigned Int64 = Word64
+
+{-# INLINE encodeSignedViaUnsigned #-}
+encodeSignedViaUnsigned ::
+  forall s.
+  (Integral s, Num (CorrespondingUnsigned s)) =>
+  Int -> (BoundedPrim (CorrespondingUnsigned s)) -> BoundedPrim s
+encodeSignedViaUnsigned bound writeUnsigned = boundedPrim bound $ \sval ptr ->
+  if sval < 0 then do
+    poke ptr (c2w '-')
+    runB writeUnsigned (makeUnsigned (negate sval)) (ptr `plusPtr` 1)
+    -- This call to 'negate' may overflow if `sval == minBound`.
+    -- But since we insist that the unsigned type has the same width,
+    -- this causes no trouble.
+  else do
+    runB writeUnsigned (makeUnsigned sval) ptr
+  where
+    makeUnsigned = fromIntegral @s @(CorrespondingUnsigned s)
 
 -- | Decimal encoding of an 'Int8'.
 {-# INLINE int8Dec #-}
 int8Dec :: BoundedPrim Int8
-int8Dec = encodeIntDecimal 4
+int8Dec = encodeSignedViaUnsigned 4 word8Dec
 
 -- | Decimal encoding of an 'Int16'.
 {-# INLINE int16Dec #-}
 int16Dec :: BoundedPrim Int16
-int16Dec = encodeIntDecimal 6
+int16Dec = encodeSignedViaUnsigned 6 word16Dec
 
 
 -- | Decimal encoding of an 'Int32'.
 {-# INLINE int32Dec #-}
 int32Dec :: BoundedPrim Int32
-int32Dec = encodeIntDecimal 11
+int32Dec = encodeSignedViaUnsigned 11 word32Dec
 
 -- | Decimal encoding of an 'Int64'.
 {-# INLINE int64Dec #-}
 int64Dec :: BoundedPrim Int64
-int64Dec = boundedPrim 20 $ c_long_long_int_dec . fromIntegral
+int64Dec = encodeSignedViaUnsigned 20 word64Dec
 
 -- | Decimal encoding of an 'Int'.
 {-# INLINE intDec #-}
@@ -135,29 +157,29 @@ intDec = caseWordSize_32_64
 -- Unsigned integers
 --------------------
 
-{-# INLINE encodeWordDecimal #-}
-encodeWordDecimal :: Integral a => Int -> BoundedPrim a
-encodeWordDecimal bound = boundedPrim bound $ c_uint_dec . fromIntegral
+{-# INLINE encodeWord32Decimal #-}
+encodeWord32Decimal :: Integral a => Int -> BoundedPrim a
+encodeWord32Decimal bound = boundedPrim bound $ c_uint32_dec . fromIntegral
 
 -- | Decimal encoding of a 'Word8'.
 {-# INLINE word8Dec #-}
 word8Dec :: BoundedPrim Word8
-word8Dec = encodeWordDecimal 3
+word8Dec = encodeWord32Decimal 3
 
 -- | Decimal encoding of a 'Word16'.
 {-# INLINE word16Dec #-}
 word16Dec :: BoundedPrim Word16
-word16Dec = encodeWordDecimal 5
+word16Dec = encodeWord32Decimal 5
 
 -- | Decimal encoding of a 'Word32'.
 {-# INLINE word32Dec #-}
 word32Dec :: BoundedPrim Word32
-word32Dec = encodeWordDecimal 10
+word32Dec = encodeWord32Decimal 10
 
 -- | Decimal encoding of a 'Word64'.
 {-# INLINE word64Dec #-}
 word64Dec :: BoundedPrim Word64
-word64Dec = boundedPrim 20 $ c_long_long_uint_dec . fromIntegral
+word64Dec = boundedPrim 20 c_uint64_dec
 
 -- | Decimal encoding of a 'Word'.
 {-# INLINE wordDec #-}
@@ -173,30 +195,30 @@ wordDec = caseWordSize_32_64
 -- without lead
 ---------------
 
-{-# INLINE encodeWordHex #-}
-encodeWordHex :: forall a. (Storable a, Integral a) => BoundedPrim a
-encodeWordHex =
-    boundedPrim (2 * sizeOf (undefined :: a)) $ c_uint_hex  . fromIntegral
+{-# INLINE encodeWord32Hex #-}
+encodeWord32Hex :: forall a. (Storable a, Integral a) => BoundedPrim a
+encodeWord32Hex =
+    boundedPrim (2 * sizeOf @a undefined) $ c_uint32_hex . fromIntegral
 
 -- | Hexadecimal encoding of a 'Word8'.
 {-# INLINE word8Hex #-}
 word8Hex :: BoundedPrim Word8
-word8Hex = encodeWordHex
+word8Hex = encodeWord32Hex
 
 -- | Hexadecimal encoding of a 'Word16'.
 {-# INLINE word16Hex #-}
 word16Hex :: BoundedPrim Word16
-word16Hex = encodeWordHex
+word16Hex = encodeWord32Hex
 
 -- | Hexadecimal encoding of a 'Word32'.
 {-# INLINE word32Hex #-}
 word32Hex :: BoundedPrim Word32
-word32Hex = encodeWordHex
+word32Hex = encodeWord32Hex
 
 -- | Hexadecimal encoding of a 'Word64'.
 {-# INLINE word64Hex #-}
 word64Hex :: BoundedPrim Word64
-word64Hex = boundedPrim 16 $ c_long_long_uint_hex . fromIntegral
+word64Hex = boundedPrim 16 c_uint64_hex
 
 -- | Hexadecimal encoding of a 'Word'.
 {-# INLINE wordHex #-}
diff --git a/Data/ByteString/Internal/Pure.hs b/Data/ByteString/Internal/Pure.hs
@@ -19,7 +19,6 @@ module Data.ByteString.Internal.Pure
   , isValidUtf8
   , isValidUtf8BA
   -- * itoa.c
-  , encodeSignedDec
   , encodeUnsignedDec
   , encodeUnsignedDecPadded
   , encodeUnsignedHex
@@ -307,22 +306,6 @@ reverseBytesInplace !p1 !p2
     reverseBytesInplace (plusPtr p1 1) (plusPtr p2 (-1))
   | otherwise = pure ()
 
--- | Encode signed number as decimal
-encodeSignedDec :: (Eq a, Num a, Integral a) => a -> Ptr Word8 -> IO (Ptr Word8)
-{-# INLINABLE encodeSignedDec #-} -- for specialization
-encodeSignedDec !x !buf
-  | x >= 0    = encodeUnsignedDec x buf
-  | otherwise = do
-    -- we cannot negate directly as  0 - (minBound :: Int) = minBound
-    -- So we write the sign and the first digit.
-    pokeByteOff buf 0 '-'
-    let !(q,r) = quotRem x (-10)
-    putDigit buf 1 (fromIntegral (abs r))
-    case q of
-      0 -> pure (plusPtr buf 2)
-      _ -> encodeUnsignedDec' q (plusPtr buf 1) (plusPtr buf 2)
-
-
 -- | Encode positive number as decimal
 encodeUnsignedDec :: (Eq a, Num a, Integral a) => a -> Ptr Word8 -> IO (Ptr Word8)
 {-# INLINABLE encodeUnsignedDec #-} -- for specialization
@@ -331,7 +314,6 @@ encodeUnsignedDec !v !next_ptr = encodeUnsignedDec' v next_ptr next_ptr
 -- | Encode positive number as little-endian decimal, then reverse it.
 --
 -- Take two pointers (orig_ptr, next_ptr) to support already encoded digits
--- (e.g. used by encodeSignedDec to avoid overflows)
 --
 encodeUnsignedDec' :: (Eq a, Num a, Integral a) => a -> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
 {-# INLINABLE encodeUnsignedDec' #-} -- for specialization
diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
@@ -103,14 +103,12 @@ module Data.ByteString.Internal.Type (
         c_count_ba,
         c_elem_index,
         c_sort,
-        c_int_dec,
-        c_int_dec_padded9,
-        c_uint_dec,
-        c_uint_hex,
-        c_long_long_int_dec,
-        c_long_long_int_dec_padded18,
-        c_long_long_uint_dec,
-        c_long_long_uint_hex,
+        c_uint32_dec,
+        c_uint64_dec,
+        c_uint32_dec_padded9,
+        c_uint64_dec_padded18,
+        c_uint32_hex,
+        c_uint64_hex,
         cIsValidUtf8BA,
         cIsValidUtf8BASafe,
         cIsValidUtf8,
@@ -1164,29 +1162,23 @@ foreign import ccall unsafe "static sbs_elem_index"
 
 
 
-foreign import ccall unsafe "static _hs_bytestring_uint_dec" c_uint_dec
-    :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint32_dec" c_uint32_dec
+    :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_long_long_uint_dec" c_long_long_uint_dec
-    :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint64_dec" c_uint64_dec
+    :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_int_dec" c_int_dec
-    :: CInt -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint32_hex" c_uint32_hex
+    :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec" c_long_long_int_dec
-    :: CLLong -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint64_hex" c_uint64_hex
+    :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_uint_hex" c_uint_hex
-    :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint32_dec_padded9"
+    c_uint32_dec_padded9  :: Word32 -> Ptr Word8 -> IO ()
 
-foreign import ccall unsafe "static _hs_bytestring_long_long_uint_hex" c_long_long_uint_hex
-    :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
-
-foreign import ccall unsafe "static _hs_bytestring_int_dec_padded9"
-    c_int_dec_padded9 :: CInt -> Ptr Word8 -> IO ()
-
-foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec_padded18"
-    c_long_long_int_dec_padded18 :: CLLong -> Ptr Word8 -> IO ()
+foreign import ccall unsafe "static _hs_bytestring_uint64_dec_padded18"
+    c_uint64_dec_padded18 :: Word64 -> Ptr Word8 -> IO ()
 
 -- We import bytestring_is_valid_utf8 both unsafe and safe. For small inputs
 -- we can use the unsafe version to get a bit more performance, but for large
@@ -1272,28 +1264,22 @@ checkedCast x =
 -- Haskell version of functions in itoa.c
 ----------------------------------------------------------------
 
-c_int_dec :: CInt -> Ptr Word8 -> IO (Ptr Word8)
-c_int_dec = Pure.encodeSignedDec
-
-c_long_long_int_dec :: CLLong -> Ptr Word8 -> IO (Ptr Word8)
-c_long_long_int_dec = Pure.encodeSignedDec
-
-c_uint_dec :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
-c_uint_dec = Pure.encodeUnsignedDec
+c_uint32_dec :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint32_dec = Pure.encodeUnsignedDec
 
-c_long_long_uint_dec :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
-c_long_long_uint_dec = Pure.encodeUnsignedDec
+c_uint64_dec :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint64_dec = Pure.encodeUnsignedDec
 
-c_uint_hex :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
-c_uint_hex = Pure.encodeUnsignedHex
+c_uint32_hex :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint32_hex = Pure.encodeUnsignedHex
 
-c_long_long_uint_hex :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
-c_long_long_uint_hex = Pure.encodeUnsignedHex
+c_uint64_hex :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint64_hex = Pure.encodeUnsignedHex
 
-c_int_dec_padded9 :: CInt -> Ptr Word8 -> IO ()
-c_int_dec_padded9 = Pure.encodeUnsignedDecPadded 9
+c_uint32_dec_padded9 :: Word32 -> Ptr Word8 -> IO ()
+c_uint32_dec_padded9 = Pure.encodeUnsignedDecPadded 9
 
-c_long_long_int_dec_padded18 :: CLLong -> Ptr Word8 -> IO ()
-c_long_long_int_dec_padded18 = Pure.encodeUnsignedDecPadded 18
+c_uint64_dec_padded18 :: Word64 -> Ptr Word8 -> IO ()
+c_uint64_dec_padded18 = Pure.encodeUnsignedDecPadded 18
 
 #endif
diff --git a/cbits/itoa.c b/cbits/itoa.c