Skip to content

Commit b0e251b

Browse files
committed
Make topBits64 safe for raw bytes of size less than 8
When the size of the raw bytes is less than 8, then a fallback method is used that is likely slower. When the size of the raw bytes is 8 or higher, then an additional integer comparison is made, but this has very little impact on performance, which I verified using micro-benchmarks. The upside to making the function safe for any input raw bytes is that the API and test generators become simpler, because there is one fewer constraint to satisfy: the minimum size of 8 bytes for serialised keys.
1 parent 89170eb commit b0e251b

File tree

2 files changed

+51
-7
lines changed

2 files changed

+51
-7
lines changed

src/Database/LSMTree/Internal/RawBytes.hs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ module Database.LSMTree.Internal.RawBytes (
5151
) where
5252

5353
import Control.DeepSeq (NFData)
54-
import Control.Exception (assert)
54+
import Data.Bits (Bits (..))
5555
import Data.BloomFilter.Hash (Hashable (..), hashByteArray)
5656
import qualified Data.ByteString as BS
5757
import qualified Data.ByteString.Builder as BB
@@ -71,6 +71,9 @@ import GHC.Stack
7171
import GHC.Word
7272
import Text.Printf (printf)
7373

74+
-- $setup
75+
-- >>> import Numeric
76+
7477
{- Note: [Export structure]
7578
~~~~~~~~~~~~~~~~~~~~~~~
7679
Since RawBytes are very similar to Primitive Vectors, the code is sectioned
@@ -172,14 +175,29 @@ drop = coerce VP.drop
172175
--
173176
-- The /top/ corresponds to the most significant bit (big-endian).
174177
--
175-
-- PRECONDITION: The byte-size of the raw bytes should be at least 8 bytes.
178+
-- If the number of bits is smaller than @64@, then any missing bits default to
179+
-- @0@s.
180+
--
181+
-- >>> showHex (topBits64 (pack [1,0,0,0,0,0,0,0])) ""
182+
-- "100000000000000"
183+
--
184+
-- >>> showHex (topBits64 (pack [1,0,0])) ""
185+
-- "100000000000000"
176186
--
177187
-- TODO: optimisation ideas: use unsafe shift/byteswap primops, look at GHC
178188
-- core, find other opportunities for using primops.
179189
--
180190
topBits64 :: RawBytes -> Word64
181-
topBits64 rb@(RawBytes (VP.Vector (I# off#) _size (ByteArray k#))) =
182-
assert (size rb >= 8) $ toWord64 (indexWord8ArrayAsWord64# k# off#)
191+
topBits64 rb@(RawBytes v@(VP.Vector (I# off#) _size (ByteArray k#)))
192+
| n >= 8
193+
= toWord64 (indexWord8ArrayAsWord64# k# off#)
194+
| otherwise
195+
= VP.foldl' f 0 v `unsafeShiftL` ((8 - n) * 8)
196+
where
197+
!n = size rb
198+
199+
f :: Word64 -> Word8 -> Word64
200+
f acc w = acc `unsafeShiftL` 8 + fromIntegral w
183201

184202
#if (MIN_VERSION_GLASGOW_HASKELL(9, 4, 0, 0))
185203
toWord64 :: Word64# -> Word64

test/Test/Database/LSMTree/Internal/RawBytes.hs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1+
{-# LANGUAGE OverloadedLists #-}
2+
13
module Test.Database.LSMTree.Internal.RawBytes (tests) where
24

5+
import Data.Bits (Bits (shiftL))
6+
import qualified Data.List as List
7+
import qualified Data.Vector.Primitive as VP
38
import Database.LSMTree.Extras.Generators ()
4-
import Database.LSMTree.Internal.RawBytes (RawBytes)
5-
import qualified Database.LSMTree.Internal.RawBytes as RB (size)
9+
import Database.LSMTree.Internal.RawBytes (RawBytes (RawBytes))
10+
import qualified Database.LSMTree.Internal.RawBytes as RB
611
import Test.QuickCheck (Property, classify, collect, mapSize,
712
withDiscardRatio, withMaxSuccess, (.||.), (===), (==>))
813
import Test.Tasty (TestTree, testGroup)
@@ -26,7 +31,9 @@ tests = testGroup "Test.Database.LSMTree.Internal.RawBytes" $
2631
testProperty "Transitivity" prop_ordTransitivity,
2732
testProperty "Reflexivity" prop_ordReflexivity,
2833
testProperty "Antisymmetry" prop_ordAntisymmetry
29-
]
34+
],
35+
testProperty "prop_topBits64" prop_topBits64,
36+
testProperty "prop_topBits64_default0s" prop_topBits64_default0s
3037
]
3138

3239
-- * Utilities
@@ -92,3 +99,22 @@ prop_ordAntisymmetry = mapSize (const 4) $
9299
untunedProp block1 block2
93100
= withFirstBlockSizeInfo block1 $
94101
block1 <= block2 && block2 <= block1 ==> block1 === block2
102+
103+
{-------------------------------------------------------------------------------
104+
Accessors
105+
-------------------------------------------------------------------------------}
106+
107+
-- | Compare 'topBits64' against a model
108+
prop_topBits64 :: RawBytes -> Property
109+
prop_topBits64 x@(RawBytes v) =
110+
expected === RB.topBits64 x
111+
where
112+
expected =
113+
let ws = take 8 (VP.toList v ++ repeat 0)
114+
in List.foldl' (\acc w -> acc `shiftL` 8 + fromIntegral w) 0 ws
115+
116+
-- | If @x@ has fewer than 8 bytes, then all missing bits in the result default
117+
-- to 0s.
118+
prop_topBits64_default0s :: RawBytes -> Property
119+
prop_topBits64_default0s x =
120+
RB.topBits64 x === RB.topBits64 (x <> mconcat (replicate 8 [0]))

0 commit comments

Comments
 (0)