Skip to content

Commit 1d17918

Browse files
authored
Merge pull request #82 from haskell-works/move-repartitionMod8-function-to-internal-bytestring-module
Move repartitionMod8 function to internal ByteString module. Move tab…
2 parents e0d878b + 941524c commit 1d17918

File tree

7 files changed

+66
-45
lines changed

7 files changed

+66
-45
lines changed

.vscode/tasks.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"label": "Build",
66
"type": "shell",
77
"command": "bash",
8-
"args": ["-lc", "cabal new-build && echo 'Done'"],
8+
"args": ["-lc", "cabal new-build --enable-tests --enable-benchmarks && echo 'Done'"],
99
"group": {
1010
"kind": "build",
1111
"isDefault": true
@@ -37,7 +37,7 @@
3737
"label": "Test",
3838
"type": "shell",
3939
"command": "bash",
40-
"args": ["-lc", "cabal new-test --enable-tests && echo 'Done'"],
40+
"args": ["-lc", "cabal new-test --enable-tests --enable-benchmarks && echo 'Done'"],
4141
"group": {
4242
"kind": "test",
4343
"isDefault": true

bench/Main.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import HaskellWorks.Data.Bits.BitShown
1111
import HaskellWorks.Data.FromByteString
1212
import HaskellWorks.Data.Xml.Conduit
1313
import HaskellWorks.Data.Xml.Conduit.Blank
14+
import HaskellWorks.Data.Xml.Internal.Tables
1415
import HaskellWorks.Data.Xml.Succinct.Cursor
1516
import System.IO.MMap
1617

hw-xml.cabal

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ cabal-version: 2.2
22

33
name: hw-xml
44
version: 0.4.0.1
5-
synopsis: Conduits for tokenizing streams.
6-
description: Conduits for tokenizing streams. Please see README.md
5+
synopsis: XML parser based on succinct data structures.
6+
description: XML parser based on succinct data structures. Please see README.md
77
category: Data, XML, Succinct Data Structures, Data Structures
88
homepage: http://github.com/haskell-works/hw-xml#readme
99
bug-reports: https://github.com/haskell-works/hw-xml/issues
@@ -94,6 +94,8 @@ library
9494
HaskellWorks.Data.Xml.Grammar
9595
HaskellWorks.Data.Xml.Index
9696
HaskellWorks.Data.Xml.Internal.BalancedParens
97+
HaskellWorks.Data.Xml.Internal.ByteString
98+
HaskellWorks.Data.Xml.Internal.Tables
9799
HaskellWorks.Data.Xml.Internal.ToIbBp64
98100
HaskellWorks.Data.Xml.Lens
99101
HaskellWorks.Data.Xml.Succinct

project.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ case "$cmd" in
1717

1818
build)
1919
cabal new-build all -j8 \
20-
--disable-tests --disable-benchmarks \
20+
--enable-tests --enable-benchmarks \
2121
$CABAL_FLAGS "$@"
2222
;;
2323

src/HaskellWorks/Data/Xml/Conduit.hs

Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,23 @@ module HaskellWorks.Data.Xml.Conduit
55
( blankedXmlToInterestBits
66
, byteStringToBits
77
, compressWordAsBit
8-
, interestingWord8s
9-
, isInterestingWord8
108
) where
119

12-
import Data.ByteString as BS
10+
import Data.ByteString (ByteString)
1311
import Data.Word
14-
import Data.Word8
15-
import HaskellWorks.Data.AtIndex ((!!!))
1612
import HaskellWorks.Data.Bits.BitWise
17-
import Prelude as P
13+
import HaskellWorks.Data.Xml.Internal.ByteString
14+
import HaskellWorks.Data.Xml.Internal.Tables
15+
import Prelude
1816

19-
import qualified Data.Bits as BITS
20-
import qualified Data.Vector.Storable as DVS
17+
import qualified Data.Bits as BITS
18+
import qualified Data.ByteString as BS
19+
import qualified Prelude as P
2120

22-
interestingWord8s :: DVS.Vector Word8
23-
interestingWord8s = DVS.constructN 256 go
24-
where go :: DVS.Vector Word8 -> Word8
25-
go v = if w == _bracketleft
26-
|| w == _braceleft
27-
|| w == _parenleft
28-
|| w == _bracketleft
29-
|| w == _less
30-
|| w == _a
31-
|| w == _v
32-
|| w == _t
33-
then 1
34-
else 0
35-
where w :: Word8
36-
w = fromIntegral (DVS.length v)
37-
{-# NOINLINE interestingWord8s #-}
38-
39-
isInterestingWord8 :: Word8 -> Word8
40-
isInterestingWord8 b = fromIntegral (interestingWord8s !!! fromIntegral b)
41-
{-# INLINABLE isInterestingWord8 #-}
42-
43-
blankedXmlToInterestBits :: [BS.ByteString] -> [BS.ByteString]
21+
blankedXmlToInterestBits :: [ByteString] -> [ByteString]
4422
blankedXmlToInterestBits = blankedXmlToInterestBits' ""
4523

46-
blankedXmlToInterestBits' :: BS.ByteString -> [BS.ByteString] -> [BS.ByteString]
24+
blankedXmlToInterestBits' :: ByteString -> [ByteString] -> [ByteString]
4725
blankedXmlToInterestBits' rs is = case is of
4826
(bs:bss) -> do
4927
let cs = if BS.length rs /= 0 then BS.concat [rs, bs] else bs
@@ -63,16 +41,10 @@ blankedXmlToInterestBits' rs is = case is of
6341
, BS.drop 8 as
6442
)
6543

66-
repartitionMod8 :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString)
67-
repartitionMod8 aBS bBS = (BS.take cLen abBS, BS.drop cLen abBS)
68-
where abBS = BS.concat [aBS, bBS]
69-
abLen = BS.length abBS
70-
cLen = (abLen `div` 8) * 8
71-
72-
compressWordAsBit :: [BS.ByteString] -> [BS.ByteString]
44+
compressWordAsBit :: [ByteString] -> [ByteString]
7345
compressWordAsBit = compressWordAsBit' BS.empty
7446

75-
compressWordAsBit' :: BS.ByteString -> [BS.ByteString] -> [BS.ByteString]
47+
compressWordAsBit' :: ByteString -> [ByteString] -> [ByteString]
7648
compressWordAsBit' aBS iBS = case iBS of
7749
(bBS:bBSs) -> do
7850
let (cBS, dBS) = repartitionMod8 aBS bBS
@@ -103,7 +75,7 @@ yieldBitsOfWord8 w =
10375
yieldBitsofWord8s :: [Word8] -> [Bool]
10476
yieldBitsofWord8s = P.foldr ((++) . yieldBitsOfWord8) []
10577

106-
byteStringToBits :: [BS.ByteString] -> [Bool]
78+
byteStringToBits :: [ByteString] -> [Bool]
10779
byteStringToBits is = case is of
10880
(bs:bss) -> yieldBitsofWord8s (BS.unpack bs) ++ byteStringToBits bss
10981
[] -> []
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
module HaskellWorks.Data.Xml.Internal.ByteString
2+
( repartitionMod8
3+
) where
4+
5+
import Data.ByteString (ByteString)
6+
7+
import qualified Data.ByteString as BS
8+
9+
repartitionMod8 :: ByteString -> ByteString -> (ByteString, ByteString)
10+
repartitionMod8 aBS bBS = (BS.take cLen abBS, BS.drop cLen abBS)
11+
where abBS = BS.concat [aBS, bBS]
12+
abLen = BS.length abBS
13+
cLen = (abLen `div` 8) * 8
14+
{-# INLINE repartitionMod8 #-}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
module HaskellWorks.Data.Xml.Internal.Tables
2+
( interestingWord8s
3+
, isInterestingWord8
4+
) where
5+
6+
import Data.Word
7+
import Data.Word8
8+
import HaskellWorks.Data.AtIndex ((!!!))
9+
import Prelude as P
10+
11+
import qualified Data.Vector.Storable as DVS
12+
13+
interestingWord8s :: DVS.Vector Word8
14+
interestingWord8s = DVS.constructN 256 go
15+
where go :: DVS.Vector Word8 -> Word8
16+
go v = if w == _bracketleft
17+
|| w == _braceleft
18+
|| w == _parenleft
19+
|| w == _bracketleft
20+
|| w == _less
21+
|| w == _a
22+
|| w == _v
23+
|| w == _t
24+
then 1
25+
else 0
26+
where w :: Word8
27+
w = fromIntegral (DVS.length v)
28+
{-# NOINLINE interestingWord8s #-}
29+
30+
isInterestingWord8 :: Word8 -> Word8
31+
isInterestingWord8 b = fromIntegral (interestingWord8s !!! fromIntegral b)
32+
{-# INLINABLE isInterestingWord8 #-}

0 commit comments

Comments
 (0)