Skip to content

Commit 170ae9b

Browse files
jorisdralwenkokke
andcommitted
bloomfilter: make the hash salt configurable
Co-authored-by: Wen Kokke <[email protected]>
1 parent 2e7635c commit 170ae9b

File tree

9 files changed

+119
-81
lines changed

9 files changed

+119
-81
lines changed

bloomfilter/bench/bloomfilter-bench.hs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
module Main where
22

3+
import Criterion.Main (bench, bgroup, defaultMain, env, whnf)
34
import qualified Data.BloomFilter.Blocked as B.Blocked
45
import qualified Data.BloomFilter.Classic as B.Classic
5-
import Data.BloomFilter.Hash (Hashable (..), hash64)
6-
6+
import Data.BloomFilter.Hash (Hashable (..))
77
import Data.Word (Word64)
8-
import System.Random
9-
10-
import Criterion.Main
8+
import System.Random (StdGen, newStdGen, uniform)
119

1210
main :: IO ()
1311
main =
@@ -42,11 +40,13 @@ main =
4240

4341
constructBloom_classic :: Int -> Double -> StdGen -> B.Classic.Bloom Word64
4442
constructBloom_classic n fpr g0 =
45-
B.Classic.unfold (B.Classic.sizeForFPR fpr n) (nextElement n) (g0, 0)
43+
let (!salt, !g1) = uniform g0 in
44+
B.Classic.unfold (B.Classic.sizeForFPR fpr n) salt (nextElement n) (g1, 0)
4645

4746
constructBloom_blocked :: Int -> Double -> StdGen -> B.Blocked.Bloom Word64
4847
constructBloom_blocked n fpr g0 =
49-
B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) (nextElement n) (g0, 0)
48+
let (!salt, !g1) = uniform g0 in
49+
B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) salt (nextElement n) (g1, 0)
5050

5151
{-# INLINE nextElement #-}
5252
nextElement :: Int -> (StdGen, Int) -> Maybe (Word64, (StdGen, Int))

bloomfilter/examples/spell.hs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ main :: IO ()
1010
main = do
1111
files <- getArgs
1212
dictionary <- readFile "/usr/share/dict/words"
13-
let !bloom = B.fromList (B.policyForFPR 0.01) (words dictionary)
13+
let !bloom = B.fromList (B.policyForFPR 0.01) bSalt (words dictionary)
1414
forM_ files $ \file ->
1515
putStrLn . unlines . filter (`B.notElem` bloom) . words
1616
=<< readFile file
17+
18+
bSalt :: B.Salt
19+
bSalt = 4

bloomfilter/src/Data/BloomFilter/Blocked.hs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
module Data.BloomFilter.Blocked (
1717
-- * Types
1818
Hash,
19+
Salt,
1920
Hashable,
2021

2122
-- * Immutable Bloom filters
@@ -64,7 +65,7 @@ module Data.BloomFilter.Blocked (
6465

6566
-- * Low level variants
6667
Hashes,
67-
hashes,
68+
hashesWithSalt,
6869
insertHashes,
6970
elemHashes,
7071
-- ** Prefetching
@@ -92,34 +93,35 @@ import Prelude hiding (elem, notElem)
9293
-- Example:
9394
--
9495
-- @
95-
--filter = create (sizeForBits 16 2) $ \mf -> do
96+
-- filter = create (sizeForBits 16 2) 4 $ \mf -> do
9697
-- insert mf \"foo\"
9798
-- insert mf \"bar\"
9899
-- @
99100
--
100101
-- Note that the result of the setup function is not used.
101102
create :: BloomSize
103+
-> Salt
102104
-> (forall s. (MBloom s a -> ST s ())) -- ^ setup function
103105
-> Bloom a
104106
{-# INLINE create #-}
105-
create bloomsize body =
107+
create bloomsize bloomsalt body =
106108
runST $ do
107-
mb <- new bloomsize
109+
mb <- new bloomsize bloomsalt
108110
body mb
109111
unsafeFreeze mb
110112

111113
{-# INLINEABLE insert #-}
112114
-- | Insert a value into a mutable Bloom filter. Afterwards, a
113115
-- membership query for the same value is guaranteed to return @True@.
114116
insert :: Hashable a => MBloom s a -> a -> ST s ()
115-
insert = \ !mb !x -> insertHashes mb (hashes x)
117+
insert = \ !mb !x -> insertHashes mb (hashesWithSalt (mbHashSalt mb) x)
116118

117119
{-# INLINE elem #-}
118120
-- | Query an immutable Bloom filter for membership. If the value is
119121
-- present, return @True@. If the value is not present, there is
120122
-- /still/ some possibility that @True@ will be returned.
121123
elem :: Hashable a => a -> Bloom a -> Bool
122-
elem = \ !x !b -> elemHashes b (hashes x)
124+
elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x)
123125

124126
-- | Same as 'elem' but with the opposite argument order:
125127
--
@@ -150,12 +152,13 @@ notElem = \x b -> not (x `elem` b)
150152
unfold :: forall a b.
151153
Hashable a
152154
=> BloomSize
155+
-> Salt
153156
-> (b -> Maybe (a, b)) -- ^ seeding function
154157
-> b -- ^ initial seed
155158
-> Bloom a
156159
{-# INLINE unfold #-}
157-
unfold bloomsize f k =
158-
create bloomsize body
160+
unfold bloomsize bloomsalt f k =
161+
create bloomsize bloomsalt body
159162
where
160163
body :: forall s. MBloom s a -> ST s ()
161164
body mb = loop k
@@ -170,26 +173,29 @@ unfold bloomsize f k =
170173
-- For example
171174
--
172175
-- @
173-
-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"]
176+
-- filter = fromList (policyForBits 10) 4 [\"foo\", \"bar\", \"quux\"]
174177
-- @
175178
fromList :: (Foldable t, Hashable a)
176179
=> BloomPolicy
180+
-> Salt
177181
-> t a -- ^ values to populate with
178182
-> Bloom a
179-
fromList policy xs =
180-
create bsize (\b -> mapM_ (insert b) xs)
183+
fromList policy bloomsalt xs =
184+
create bsize bloomsalt (\b -> mapM_ (insert b) xs)
181185
where
182186
bsize = sizeForPolicy policy (length xs)
183187

184188
{-# SPECIALISE deserialise :: BloomSize
189+
-> Salt
185190
-> (MutableByteArray RealWorld -> Int -> Int -> IO ())
186191
-> IO (Bloom a) #-}
187192
deserialise :: PrimMonad m
188193
=> BloomSize
194+
-> Salt
189195
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
190196
-> m (Bloom a)
191-
deserialise bloomsize fill = do
192-
mbloom <- stToPrim $ new bloomsize
197+
deserialise bloomsize bloomsalt fill = do
198+
mbloom <- stToPrim $ new bloomsize bloomsalt
193199
Internal.deserialise mbloom fill
194200
stToPrim $ unsafeFreeze mbloom
195201

@@ -235,7 +241,7 @@ insertMany bloom key n =
235241
prepareProbes !i !i_w
236242
| i_w < 0x0f && i < n = do
237243
k <- key i
238-
let !kh = hashes k
244+
let !kh = hashesWithSalt (mbHashSalt bloom) k
239245
prefetchInsert bloom kh
240246
P.writePrimArray buf i_w kh
241247
prepareProbes (i+1) (i_w+1)
@@ -258,7 +264,7 @@ insertMany bloom key n =
258264
-- (from the read end of the buffer).
259265
| i < n = do
260266
k <- key i
261-
let !kh = hashes k
267+
let !kh = hashesWithSalt (mbHashSalt bloom) k
262268
prefetchInsert bloom kh
263269
P.writePrimArray buf i_w kh
264270
insertProbe

bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,19 @@
88
-- the trusted base.
99
module Data.BloomFilter.Blocked.Internal (
1010
-- * Mutable Bloom filters
11-
MBloom,
11+
MBloom (mbHashSalt),
1212
new,
1313
maxSizeBits,
1414

1515
-- * Immutable Bloom filters
16-
Bloom,
16+
Bloom (hashSalt),
1717
bloomInvariant,
1818
size,
1919

2020
-- * Hash-based operations
2121
Hashes,
22-
hashes,
22+
Salt,
23+
hashesWithSalt,
2324
insertHashes,
2425
prefetchInsert,
2526
elemHashes,
@@ -84,6 +85,7 @@ type MBloom :: Type -> Type -> Type
8485
data MBloom s a = MBloom {
8586
mbNumBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero
8687
, mbNumHashes :: {-# UNPACK #-} !Int
88+
, mbHashSalt :: {-# UNPACK #-} !Salt
8789
, mbBitArray :: {-# UNPACK #-} !(MBitArray s)
8890
}
8991
type role MBloom nominal nominal
@@ -100,13 +102,14 @@ instance NFData (MBloom s a) where
100102
--
101103
-- The filter size is capped at 'maxSizeBits'.
102104
--
103-
new :: BloomSize -> ST s (MBloom s a)
104-
new BloomSize { sizeBits, sizeHashes } = do
105+
new :: BloomSize -> Salt -> ST s (MBloom s a)
106+
new BloomSize { sizeBits, sizeHashes } mbHashSalt = do
105107
let numBlocks = bitsToBlocks (max 1 (min maxSizeBits sizeBits))
106108
mbBitArray <- BitArray.new numBlocks
107109
pure MBloom {
108110
mbNumBlocks = numBlocks,
109111
mbNumHashes = max 1 sizeHashes,
112+
mbHashSalt,
110113
mbBitArray
111114
}
112115

@@ -174,6 +177,7 @@ type Bloom :: Type -> Type
174177
data Bloom a = Bloom {
175178
numBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero
176179
, numHashes :: {-# UNPACK #-} !Int
180+
, hashSalt :: {-# UNPACK #-} !Salt
177181
, bitArray :: {-# UNPACK #-} !BitArray
178182
}
179183
deriving stock Eq
@@ -239,9 +243,9 @@ prefetchElem Bloom { numBlocks, bitArray } !h =
239243
--
240244
-- See also 'formatVersion' for compatibility advice.
241245
--
242-
serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int)
246+
serialise :: Bloom a -> (BloomSize, Salt, ByteArray, Int, Int)
243247
serialise b@Bloom{bitArray} =
244-
(size b, ba, off, len)
248+
(size b, hashSalt b, ba, off, len)
245249
where
246250
(ba, off, len) = BitArray.serialise bitArray
247251

@@ -253,11 +257,12 @@ serialise b@Bloom{bitArray} =
253257
-- | Create an immutable Bloom filter from a mutable one. The mutable
254258
-- filter may be modified afterwards.
255259
freeze :: MBloom s a -> ST s (Bloom a)
256-
freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
260+
freeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do
257261
bitArray <- BitArray.freeze mbBitArray
258262
let !bf = Bloom {
259263
numBlocks = mbNumBlocks,
260264
numHashes = mbNumHashes,
265+
hashSalt = mbHashSalt,
261266
bitArray
262267
}
263268
assert (bloomInvariant bf) $ pure bf
@@ -266,23 +271,25 @@ freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
266271
-- mutable filter /must not/ be modified afterwards. For a safer creation
267272
-- interface, use 'freeze' or 'create'.
268273
unsafeFreeze :: MBloom s a -> ST s (Bloom a)
269-
unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
274+
unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do
270275
bitArray <- BitArray.unsafeFreeze mbBitArray
271276
let !bf = Bloom {
272277
numBlocks = mbNumBlocks,
273278
numHashes = mbNumHashes,
279+
hashSalt = mbHashSalt,
274280
bitArray
275281
}
276282
assert (bloomInvariant bf) $ pure bf
277283

278284
-- | Copy an immutable Bloom filter to create a mutable one. There is
279285
-- no non-copying equivalent.
280286
thaw :: Bloom a -> ST s (MBloom s a)
281-
thaw Bloom { numBlocks, numHashes, bitArray } = do
287+
thaw Bloom { numBlocks, numHashes, hashSalt, bitArray } = do
282288
mbBitArray <- BitArray.thaw bitArray
283289
pure MBloom {
284290
mbNumBlocks = numBlocks,
285291
mbNumHashes = numHashes,
292+
mbHashSalt = hashSalt,
286293
mbBitArray
287294
}
288295

@@ -317,9 +324,9 @@ newtype Hashes a = Hashes Hash
317324
deriving newtype Prim
318325
type role Hashes nominal
319326

320-
{-# INLINE hashes #-}
321-
hashes :: Hashable a => a -> Hashes a
322-
hashes = Hashes . hash64
327+
{-# INLINE hashesWithSalt #-}
328+
hashesWithSalt :: Hashable a => Salt -> a -> Hashes a
329+
hashesWithSalt = \ !salt !x -> Hashes (hashSalt64 salt x)
323330

324331
{-# INLINE blockIxAndBitGen #-}
325332
-- | The scheme for turning 'Hashes' into block and bit indexes is as follows:

0 commit comments

Comments
 (0)