Skip to content

Commit 08445e8

Browse files
committed
feat(bloomfilter): add salt
1 parent 9982e14 commit 08445e8

File tree

5 files changed

+90
-56
lines changed

5 files changed

+90
-56
lines changed

bloomfilter/src/Data/BloomFilter/Blocked.hs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
module Data.BloomFilter.Blocked (
1717
-- * Types
1818
Hash,
19+
Salt (Salt),
1920
Hashable,
2021

2122
-- * Immutable Bloom filters
@@ -92,34 +93,35 @@ import Prelude hiding (elem, notElem)
9293
-- Example:
9394
--
9495
-- @
95-
--filter = create (sizeForBits 16 2) $ \mf -> do
96+
--filter = create salt (sizeForBits 16 2) $ \mf -> do
9697
-- insert mf \"foo\"
9798
-- insert mf \"bar\"
9899
-- @
99100
--
100101
-- Note that the result of the setup function is not used.
101-
create :: BloomSize
102+
create :: Salt
103+
-> BloomSize
102104
-> (forall s. (MBloom s a -> ST s ())) -- ^ setup function
103105
-> Bloom a
104106
{-# INLINE create #-}
105-
create bloomsize body =
107+
create bloomsalt bloomsize body =
106108
runST $ do
107-
mb <- new bloomsize
109+
mb <- new bloomsalt bloomsize
108110
body mb
109111
unsafeFreeze mb
110112

111113
{-# INLINEABLE insert #-}
112114
-- | Insert a value into a mutable Bloom filter. Afterwards, a
113115
-- membership query for the same value is guaranteed to return @True@.
114116
insert :: Hashable a => MBloom s a -> a -> ST s ()
115-
insert = \ !mb !x -> insertHashes mb (hashes x)
117+
insert = \ !mb !x -> insertHashes mb (hashes (mbHashSalt mb) x)
116118

117119
{-# INLINE elem #-}
118120
-- | Query an immutable Bloom filter for membership. If the value is
119121
-- present, return @True@. If the value is not present, there is
120122
-- /still/ some possibility that @True@ will be returned.
121123
elem :: Hashable a => a -> Bloom a -> Bool
122-
elem = \ !x !b -> elemHashes b (hashes x)
124+
elem = \ !x !b -> elemHashes b (hashes (hashSalt b) x)
123125

124126
-- | Same as 'elem' but with the opposite argument order:
125127
--
@@ -149,13 +151,14 @@ notElem = \x b -> not (x `elem` b)
149151
-- @b@ is used as a new seed.
150152
unfold :: forall a b.
151153
Hashable a
152-
=> BloomSize
154+
=> Salt
155+
-> BloomSize
153156
-> (b -> Maybe (a, b)) -- ^ seeding function
154157
-> b -- ^ initial seed
155158
-> Bloom a
156159
{-# INLINE unfold #-}
157-
unfold bloomsize f k =
158-
create bloomsize body
160+
unfold bloomsalt bloomsize f k =
161+
create bloomsalt bloomsize body
159162
where
160163
body :: forall s. MBloom s a -> ST s ()
161164
body mb = loop k
@@ -173,23 +176,26 @@ unfold bloomsize f k =
173176
-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"]
174177
-- @
175178
fromList :: (Foldable t, Hashable a)
176-
=> BloomPolicy
179+
=> Salt
180+
-> BloomPolicy
177181
-> t a -- ^ values to populate with
178182
-> Bloom a
179-
fromList policy xs =
180-
create bsize (\b -> mapM_ (insert b) xs)
183+
fromList bloomsalt policy xs =
184+
create bloomsalt bsize (\b -> mapM_ (insert b) xs)
181185
where
182186
bsize = sizeForPolicy policy (length xs)
183187

184-
{-# SPECIALISE deserialise :: BloomSize
188+
{-# SPECIALISE deserialise :: Salt
189+
-> BloomSize
185190
-> (MutableByteArray RealWorld -> Int -> Int -> IO ())
186191
-> IO (Bloom a) #-}
187192
deserialise :: PrimMonad m
188-
=> BloomSize
193+
=> Salt
194+
-> BloomSize
189195
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
190196
-> m (Bloom a)
191-
deserialise bloomsize fill = do
192-
mbloom <- stToPrim $ new bloomsize
197+
deserialise bloomsalt bloomsize fill = do
198+
mbloom <- stToPrim $ new bloomsalt bloomsize
193199
Internal.deserialise mbloom fill
194200
stToPrim $ unsafeFreeze mbloom
195201

@@ -235,7 +241,7 @@ insertMany bloom key n =
235241
prepareProbes !i !i_w
236242
| i_w < 0x0f && i < n = do
237243
k <- key i
238-
let !kh = hashes k
244+
let !kh = hashes (mbHashSalt bloom) k
239245
prefetchInsert bloom kh
240246
P.writePrimArray buf i_w kh
241247
prepareProbes (i+1) (i_w+1)
@@ -258,7 +264,7 @@ insertMany bloom key n =
258264
-- (from the read end of the buffer).
259265
| i < n = do
260266
k <- key i
261-
let !kh = hashes k
267+
let !kh = hashes (mbHashSalt bloom) k
262268
prefetchInsert bloom kh
263269
P.writePrimArray buf i_w kh
264270
insertProbe

bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,19 @@
99
module Data.BloomFilter.Blocked.Internal (
1010
-- * Mutable Bloom filters
1111
MBloom,
12+
mbHashSalt,
1213
new,
1314
maxSizeBits,
1415

1516
-- * Immutable Bloom filters
1617
Bloom,
18+
hashSalt,
1719
bloomInvariant,
1820
size,
1921

2022
-- * Hash-based operations
2123
Hashes,
24+
Salt (Salt),
2225
hashes,
2326
insertHashes,
2427
prefetchInsert,
@@ -52,6 +55,7 @@ import Data.BloomFilter.Blocked.BitArray (BitArray, BitIx (..),
5255
import qualified Data.BloomFilter.Blocked.BitArray as BitArray
5356
import Data.BloomFilter.Classic.Calc
5457
import Data.BloomFilter.Hash
58+
import Data.Word (Word64)
5559

5660
-- | The version of the format used by 'serialise' and 'deserialise'. The
5761
-- format number will change when there is an incompatible change in the
@@ -84,6 +88,7 @@ type MBloom :: Type -> Type -> Type
8488
data MBloom s a = MBloom {
8589
mbNumBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero
8690
, mbNumHashes :: {-# UNPACK #-} !Int
91+
, mbHashSalt :: {-# UNPACK #-} !Salt
8792
, mbBitArray :: {-# UNPACK #-} !(MBitArray s)
8893
}
8994
type role MBloom nominal nominal
@@ -100,13 +105,14 @@ instance NFData (MBloom s a) where
100105
--
101106
-- The filter size is capped at 'maxSizeBits'.
102107
--
103-
new :: BloomSize -> ST s (MBloom s a)
104-
new BloomSize { sizeBits, sizeHashes } = do
108+
new :: Salt -> BloomSize -> ST s (MBloom s a)
109+
new hashSalt BloomSize { sizeBits, sizeHashes } = do
105110
let numBlocks = bitsToBlocks (max 1 (min maxSizeBits sizeBits))
106111
mbBitArray <- BitArray.new numBlocks
107112
pure MBloom {
108113
mbNumBlocks = numBlocks,
109114
mbNumHashes = max 1 sizeHashes,
115+
mbHashSalt = hashSalt,
110116
mbBitArray
111117
}
112118

@@ -174,6 +180,7 @@ type Bloom :: Type -> Type
174180
data Bloom a = Bloom {
175181
numBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero
176182
, numHashes :: {-# UNPACK #-} !Int
183+
, hashSalt :: {-# UNPACK #-} !Salt
177184
, bitArray :: {-# UNPACK #-} !BitArray
178185
}
179186
deriving stock Eq
@@ -253,11 +260,12 @@ serialise b@Bloom{bitArray} =
253260
-- | Create an immutable Bloom filter from a mutable one. The mutable
254261
-- filter may be modified afterwards.
255262
freeze :: MBloom s a -> ST s (Bloom a)
256-
freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
263+
freeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do
257264
bitArray <- BitArray.freeze mbBitArray
258265
let !bf = Bloom {
259266
numBlocks = mbNumBlocks,
260267
numHashes = mbNumHashes,
268+
hashSalt = mbHashSalt,
261269
bitArray
262270
}
263271
assert (bloomInvariant bf) $ pure bf
@@ -266,23 +274,25 @@ freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
266274
-- mutable filter /must not/ be modified afterwards. For a safer creation
267275
-- interface, use 'freeze' or 'create'.
268276
unsafeFreeze :: MBloom s a -> ST s (Bloom a)
269-
unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do
277+
unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do
270278
bitArray <- BitArray.unsafeFreeze mbBitArray
271279
let !bf = Bloom {
272280
numBlocks = mbNumBlocks,
273281
numHashes = mbNumHashes,
282+
hashSalt = mbHashSalt,
274283
bitArray
275284
}
276285
assert (bloomInvariant bf) $ pure bf
277286

278287
-- | Copy an immutable Bloom filter to create a mutable one. There is
279288
-- no non-copying equivalent.
280289
thaw :: Bloom a -> ST s (MBloom s a)
281-
thaw Bloom { numBlocks, numHashes, bitArray } = do
290+
thaw Bloom { numBlocks, numHashes, hashSalt, bitArray } = do
282291
mbBitArray <- BitArray.thaw bitArray
283292
pure MBloom {
284293
mbNumBlocks = numBlocks,
285294
mbNumHashes = numHashes,
295+
mbHashSalt = hashSalt,
286296
mbBitArray
287297
}
288298

@@ -317,9 +327,13 @@ newtype Hashes a = Hashes Hash
317327
deriving newtype Prim
318328
type role Hashes nominal
319329

330+
-- | The salt value to be used for hashes.
331+
newtype Salt = Salt Word64
332+
deriving stock (Eq, Show)
333+
320334
{-# INLINE hashes #-}
321-
hashes :: Hashable a => a -> Hashes a
322-
hashes = Hashes . hash64
335+
hashes :: (Hashable a) => Salt -> a -> Hashes a
336+
hashes = \ (Salt !salt) !x -> Hashes (hashSalt64 salt x)
323337

324338
{-# INLINE blockIxAndBitGen #-}
325339
-- | The scheme for turning 'Hashes' into block and bit indexes is as follows:

bloomfilter/src/Data/BloomFilter/Classic.hs

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ module Data.BloomFilter.Classic (
2222

2323
-- * Types
2424
Hash,
25+
Salt (Salt),
2526
Hashable,
2627

2728
-- * Immutable Bloom filters
@@ -100,26 +101,27 @@ import Prelude hiding (elem, notElem, read)
100101
-- @
101102
--
102103
-- Note that the result of the setup function is not used.
103-
create :: BloomSize
104+
create :: Salt
105+
-> BloomSize
104106
-> (forall s. (MBloom s a -> ST s ())) -- ^ setup function
105107
-> Bloom a
106108
{-# INLINE create #-}
107-
create bloomsize body =
109+
create bloomsalt bloomsize body =
108110
runST $ do
109-
mb <- new bloomsize
111+
mb <- new bloomsalt bloomsize
110112
body mb
111113
unsafeFreeze mb
112114

113115
-- | Insert a value into a mutable Bloom filter. Afterwards, a
114116
-- membership query for the same value is guaranteed to return @True@.
115117
insert :: Hashable a => MBloom s a -> a -> ST s ()
116-
insert !mb !x = insertHashes mb (hashes x)
118+
insert !mb !x = insertHashes mb (hashes (mbHashSalt mb) x)
117119

118120
-- | Query an immutable Bloom filter for membership. If the value is
119121
-- present, return @True@. If the value is not present, there is
120122
-- /still/ some possibility that @True@ will be returned.
121123
elem :: Hashable a => a -> Bloom a -> Bool
122-
elem = \ !x !b -> elemHashes b (hashes x)
124+
elem = \ !x !b -> elemHashes b (hashes (hashSalt b) x)
123125

124126
-- | Same as 'elem' but with the opposite argument order:
125127
--
@@ -142,7 +144,7 @@ notElem = \ x b -> not (x `elem` b)
142144
-- present, return @True@. If the value is not present, there is
143145
-- /still/ some possibility that @True@ will be returned.
144146
read :: Hashable a => MBloom s a -> a -> ST s Bool
145-
read !mb !x = readHashes mb (hashes x)
147+
read !mb !x = readHashes mb (hashes (mbHashSalt mb) x)
146148

147149
-- | Build an immutable Bloom filter from a seed value. The seeding
148150
-- function populates the filter as follows.
@@ -154,13 +156,14 @@ read !mb !x = readHashes mb (hashes x)
154156
-- @b@ is used as a new seed.
155157
unfold :: forall a b.
156158
Hashable a
157-
=> BloomSize
159+
=> Salt
160+
-> BloomSize
158161
-> (b -> Maybe (a, b)) -- ^ seeding function
159162
-> b -- ^ initial seed
160163
-> Bloom a
161164
{-# INLINE unfold #-}
162-
unfold bloomsize f k =
163-
create bloomsize body
165+
unfold bloomsalt bloomsize f k =
166+
create bloomsalt bloomsize body
164167
where
165168
body :: forall s. MBloom s a -> ST s ()
166169
body mb = loop k
@@ -180,23 +183,26 @@ unfold bloomsize f k =
180183
-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"]
181184
-- @
182185
fromList :: (Foldable t, Hashable a)
183-
=> BloomPolicy
186+
=> Salt
187+
-> BloomPolicy
184188
-> t a -- ^ values to populate with
185189
-> Bloom a
186-
fromList policy xs =
187-
create bsize (\b -> mapM_ (insert b) xs)
190+
fromList bsalt policy xs =
191+
create bsalt bsize (\b -> mapM_ (insert b) xs)
188192
where
189193
bsize = sizeForPolicy policy (length xs)
190194

191-
{-# SPECIALISE deserialise :: BloomSize
195+
{-# SPECIALISE deserialise :: Salt
196+
-> BloomSize
192197
-> (MutableByteArray RealWorld -> Int -> Int -> IO ())
193198
-> IO (Bloom a) #-}
194199
deserialise :: PrimMonad m
195-
=> BloomSize
200+
=> Salt
201+
-> BloomSize
196202
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
197203
-> m (Bloom a)
198-
deserialise bloomsize fill = do
199-
mbloom <- stToPrim $ new bloomsize
204+
deserialise bloomsalt bloomsize fill = do
205+
mbloom <- stToPrim $ new bloomsalt bloomsize
200206
Internal.deserialise mbloom fill
201207
stToPrim $ unsafeFreeze mbloom
202208

0 commit comments

Comments
 (0)