Skip to content

Commit ec7c738

Browse files
hsyl20clyring
andcommitted
Add pure Haskell implementation (#631)
bytestring used to rely on C functions. This patch adds equivalent functions implemented in Haskell. The main purpose is for the JavaScript backend to fully support bytestring. Pure Haskell implementation can be enabled explicitly with a cabal flag. It's automatically enabled for the JavaScript platform. Thanks to Matthew Craven for the thorough review and the many suggestions. Co-authored-by: Matthew Craven <[email protected]> (cherry picked from commit d497f39)
1 parent ed7a9d4 commit ec7c738

File tree

14 files changed

+1398
-99
lines changed

14 files changed

+1398
-99
lines changed

.github/workflows/ci.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,27 @@ jobs:
167167
- name: Test
168168
run: cabal test --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts -DHS_BYTESTRING_ASSERTIONS'
169169

170+
pure-haskell:
171+
needs: build
172+
runs-on: ubuntu-latest
173+
steps:
174+
- uses: actions/checkout@v3
175+
- uses: haskell/actions/setup@v2
176+
id: setup-haskell-cabal
177+
with:
178+
ghc-version: 'latest'
179+
- name: Update cabal package database
180+
run: cabal update
181+
- uses: actions/cache@v3
182+
name: Cache cabal stuff
183+
with:
184+
path: |
185+
${{ steps.setup-haskell-cabal.outputs.cabal-store }}
186+
dist-newstyle
187+
key: ${{ runner.os }}-latest-pure-haskell
188+
- name: Test
189+
run: cabal test -fpure-haskell --ghc-options=-fno-ignore-asserts --enable-tests --test-show-details=direct all
190+
170191
old-gcc:
171192
needs: build
172193
runs-on: ubuntu-latest

Data/ByteString.hs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,6 @@ import Control.Exception (IOException, catch, finally, assert, throwIO)
264264
import Control.Monad (when)
265265

266266
import Foreign.C.String (CString, CStringLen)
267-
import Foreign.C.Types (CSize (CSize), CInt (CInt))
268267
import Foreign.ForeignPtr (ForeignPtr, touchForeignPtr)
269268
import Foreign.ForeignPtr.Unsafe(unsafeForeignPtrToPtr)
270269
import Foreign.Marshal.Alloc (allocaBytes)
@@ -1562,17 +1561,6 @@ isValidUtf8 (BS ptr len) = accursedUnutterablePerformIO $ unsafeWithForeignPtr p
15621561
else cIsValidUtf8Safe p (fromIntegral len)
15631562
pure $ i /= 0
15641563

1565-
-- We import bytestring_is_valid_utf8 both unsafe and safe. For small inputs
1566-
-- we can use the unsafe version to get a bit more performance, but for large
1567-
-- inputs the safe version should be used to avoid GC synchronization pauses
1568-
-- in multithreaded contexts.
1569-
1570-
foreign import ccall unsafe "bytestring_is_valid_utf8" cIsValidUtf8
1571-
:: Ptr Word8 -> CSize -> IO CInt
1572-
1573-
foreign import ccall safe "bytestring_is_valid_utf8" cIsValidUtf8Safe
1574-
:: Ptr Word8 -> CSize -> IO CInt
1575-
15761564
-- | Break a string on a substring, returning a pair of the part of the
15771565
-- string prior to the match, and the rest of the string.
15781566
--

Data/ByteString/Builder/ASCII.hs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ import Data.ByteString.Builder.Internal (Builder)
8282
import qualified Data.ByteString.Builder.Prim as P
8383
import qualified Data.ByteString.Builder.Prim.Internal as P
8484
import Data.ByteString.Builder.RealFloat (floatDec, doubleDec)
85+
import Data.ByteString.Internal.Type (c_int_dec_padded9, c_long_long_int_dec_padded18)
8586

8687
import Foreign
87-
import Foreign.C.Types
8888
import Data.List.NonEmpty (NonEmpty(..))
8989

9090
------------------------------------------------------------------------------
@@ -311,12 +311,6 @@ integerDec i
311311
(q,r) -> fromInteger q : fromInteger r : putB ns
312312

313313

314-
foreign import ccall unsafe "static _hs_bytestring_int_dec_padded9"
315-
c_int_dec_padded9 :: CInt -> Ptr Word8 -> IO ()
316-
317-
foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec_padded18"
318-
c_long_long_int_dec_padded18 :: CLLong -> Ptr Word8 -> IO ()
319-
320314
{-# INLINE intDecPadded #-}
321315
intDecPadded :: P.BoundedPrim Int
322316
intDecPadded = P.liftFixedToBounded $ P.caseWordSize_32_64

Data/ByteString/Builder/Prim/ASCII.hs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ module Data.ByteString.Builder.Prim.ASCII
7777

7878
) where
7979

80+
import Data.ByteString.Internal.Type
8081
import Data.ByteString.Builder.Prim.Binary
8182
import Data.ByteString.Builder.Prim.Internal
8283
import Data.ByteString.Builder.Prim.Internal.Floating
@@ -86,7 +87,6 @@ import Data.ByteString.Utils.UnalignedWrite
8687
import Data.Char (ord)
8788

8889
import Foreign
89-
import Foreign.C.Types
9090

9191
-- | Encode the least 7-bits of a 'Char' using the ASCII encoding.
9292
{-# INLINE char7 #-}
@@ -101,12 +101,6 @@ char7 = (\c -> fromIntegral $ ord c .&. 0x7f) >$< word8
101101
-- Signed integers
102102
------------------
103103

104-
foreign import ccall unsafe "static _hs_bytestring_int_dec" c_int_dec
105-
:: CInt -> Ptr Word8 -> IO (Ptr Word8)
106-
107-
foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec" c_long_long_int_dec
108-
:: CLLong -> Ptr Word8 -> IO (Ptr Word8)
109-
110104
{-# INLINE encodeIntDecimal #-}
111105
encodeIntDecimal :: Integral a => Int -> BoundedPrim a
112106
encodeIntDecimal bound = boundedPrim bound $ c_int_dec . fromIntegral
@@ -143,12 +137,6 @@ intDec = caseWordSize_32_64
143137
-- Unsigned integers
144138
--------------------
145139

146-
foreign import ccall unsafe "static _hs_bytestring_uint_dec" c_uint_dec
147-
:: CUInt -> Ptr Word8 -> IO (Ptr Word8)
148-
149-
foreign import ccall unsafe "static _hs_bytestring_long_long_uint_dec" c_long_long_uint_dec
150-
:: CULLong -> Ptr Word8 -> IO (Ptr Word8)
151-
152140
{-# INLINE encodeWordDecimal #-}
153141
encodeWordDecimal :: Integral a => Int -> BoundedPrim a
154142
encodeWordDecimal bound = boundedPrim bound $ c_uint_dec . fromIntegral
@@ -187,12 +175,6 @@ wordDec = caseWordSize_32_64
187175
-- without lead
188176
---------------
189177

190-
foreign import ccall unsafe "static _hs_bytestring_uint_hex" c_uint_hex
191-
:: CUInt -> Ptr Word8 -> IO (Ptr Word8)
192-
193-
foreign import ccall unsafe "static _hs_bytestring_long_long_uint_hex" c_long_long_uint_hex
194-
:: CULLong -> Ptr Word8 -> IO (Ptr Word8)
195-
196178
{-# INLINE encodeWordHex #-}
197179
encodeWordHex :: forall a. (Storable a, Integral a) => BoundedPrim a
198180
encodeWordHex =

Data/ByteString/Builder/Prim/Internal/Base16.hs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{-# LANGUAGE MagicHash #-}
2+
{-# LANGUAGE CPP #-}
23
-- |
34
-- Copyright : (c) 2011 Simon Meier
45
-- License : BSD3-style (see LICENSE)
@@ -22,23 +23,33 @@ module Data.ByteString.Builder.Prim.Internal.Base16 (
2223
) where
2324

2425
import Foreign
25-
import Foreign.C.Types
2626
import GHC.Exts (Addr#, Ptr(..))
27+
#if PURE_HASKELL
28+
import qualified Data.ByteString.Internal.Pure as Pure
29+
#else
30+
import Foreign.C.Types
31+
#endif
2732

2833
-- Creating the encoding table
2934
------------------------------
3035

3136
-- | An encoding table for Base16 encoding.
3237
data EncodingTable = EncodingTable Addr#
3338

34-
foreign import ccall "&hs_bytestring_lower_hex_table"
35-
c_lower_hex_table :: Ptr CChar
36-
3739
-- | The encoding table for hexadecimal values with lower-case characters;
3840
-- e.g., deadbeef.
3941
lowerTable :: EncodingTable
40-
lowerTable = case c_lower_hex_table of
41-
Ptr p# -> EncodingTable p#
42+
lowerTable =
43+
#if PURE_HASKELL
44+
case Pure.lower_hex_table of
45+
Ptr p# -> EncodingTable p#
46+
#else
47+
case c_lower_hex_table of
48+
Ptr p# -> EncodingTable p#
49+
50+
foreign import ccall "&hs_bytestring_lower_hex_table"
51+
c_lower_hex_table :: Ptr CChar
52+
#endif
4253

4354
-- | Encode an octet as 16bit word comprising both encoded nibbles ordered
4455
-- according to the host endianness. Writing these 16bit to memory will write

0 commit comments

Comments
 (0)