1- -- |
1+ -- | A fast, space efficient Bloom filter implementation. A Bloom filter is a
2+ -- set-like data structure that provides a probabilistic membership test.
23--
3- -- A fast, space efficient Bloom filter implementation. A Bloom
4- -- filter is a set-like data structure that provides a probabilistic
5- -- membership test.
4+ -- * Queries do not give false negatives. When an element is added to a filter,
5+ -- a subsequent membership test will definitely return 'True'.
66--
7- -- * Queries do not give false negatives. When an element is added to
8- -- a filter, a subsequent membership test will definitely return
9- -- 'True' .
7+ -- * False positives /are/ possible. If an element has not been added to a
8+ -- filter, a membership test /may/ nevertheless indicate that the element is
9+ -- present .
1010--
11- -- * False positives /are/ possible. If an element has not been added
12- -- to a filter, a membership test /may/ nevertheless indicate that
13- -- the element is present.
14- --
15-
1611module Data.BloomFilter.Blocked (
12+ -- * Overview
13+ -- $overview
14+
1715 -- * Types
1816 Hash ,
1917 Salt ,
@@ -57,6 +55,7 @@ module Data.BloomFilter.Blocked (
5755 maxSizeBits ,
5856 insert ,
5957 insertMany ,
58+ read ,
6059
6160 -- ** Conversion
6261 freeze ,
@@ -68,6 +67,7 @@ module Data.BloomFilter.Blocked (
6867 hashesWithSalt ,
6968 insertHashes ,
7069 elemHashes ,
70+ readHashes ,
7171 -- ** Prefetching
7272 prefetchInsert ,
7373 prefetchElem ,
@@ -80,23 +80,60 @@ import Data.Bits ((.&.))
8080import Data.Primitive.ByteArray (MutableByteArray )
8181import qualified Data.Primitive.PrimArray as P
8282
83- import Data.BloomFilter.Blocked.Calc
83+ import Data.BloomFilter.Blocked.Calc (BitsPerEntry , BloomPolicy (.. ),
84+ BloomSize (.. ), FPR , NumEntries , policyFPR , policyForBits ,
85+ policyForFPR , sizeForBits , sizeForFPR , sizeForPolicy )
8486import Data.BloomFilter.Blocked.Internal hiding (deserialise )
8587import qualified Data.BloomFilter.Blocked.Internal as Internal
8688import Data.BloomFilter.Hash
8789
88- import Prelude hiding (elem , notElem )
90+ import Prelude hiding (elem , notElem , read )
91+
92+ -- $setup
93+ --
94+ -- >>> import Text.Printf
95+
96+ -- $overview
97+ --
98+ -- Each of the functions for creating Bloom filters accepts a 'BloomSize'. The
99+ -- size determines the number of bits that should be used for the filter. Note
100+ -- that a filter is fixed in size; it cannot be resized after creation.
101+ --
102+ -- The size can be specified by asking for a target false positive rate (FPR)
103+ -- or a number of bits per element, and the number of elements in the filter.
104+ -- For example:
105+ --
106+ -- * @'sizeForFPR' 1e-3 10_000@ for a Bloom filter sized for 10,000 elements
107+ -- with a false positive rate of 1 in 1000
108+ --
109+ -- * @'sizeForBits' 10 10_000@ for a Bloom filter sized for 10,000 elements
110+ -- with 10 bits per element
111+ --
112+ -- Depending on the application it may be more important to target a fixed
113+ -- amount of memory to use, or target a specific FPR.
114+ --
115+ -- As a very rough guide for filter sizes, here are a range of FPRs and bits
116+ -- per element:
117+ --
118+ -- * FPR of 1e-1 requires approximately 4.8 bits per element
119+ -- * FPR of 1e-2 requires approximately 9.8 bits per element
120+ -- * FPR of 1e-3 requires approximately 15.8 bits per element
121+ -- * FPR of 1e-4 requires approximately 22.6 bits per element
122+ -- * FPR of 1e-5 requires approximately 30.2 bits per element
123+ --
124+ -- >>> fmap (printf "%0.1f" . policyBits . policyForFPR) [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
125+ -- ["4.8","9.8","15.8","22.6","30.2"]
89126
90127-- | Create an immutable Bloom filter, using the given setup function
91128-- which executes in the 'ST' monad.
92129--
93130-- Example:
94131--
95- -- @
132+ -- >>> :{
96133-- filter = create (sizeForBits 16 2) 4 $ \mf -> do
97- -- insert mf \ "foo\ "
98- -- insert mf \ "bar\ "
99- -- @
134+ -- insert mf "foo"
135+ -- insert mf "bar"
136+ -- :}
100137--
101138-- Note that the result of the setup function is not used.
102139create :: BloomSize
@@ -141,6 +178,12 @@ elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x)
141178notElem :: Hashable a => a -> Bloom a -> Bool
142179notElem = \ x b -> not (x `elem` b)
143180
181+ -- | Query a mutable Bloom filter for membership. If the value is
182+ -- present, return @True@. If the value is not present, there is
183+ -- /still/ some possibility that @True@ will be returned.
184+ read :: Hashable a => MBloom s a -> a -> ST s Bool
185+ read ! mb ! x = readHashes mb (hashesWithSalt (mbHashSalt mb) x)
186+
144187-- | Build an immutable Bloom filter from a seed value. The seeding
145188-- function populates the filter as follows.
146189--
@@ -168,6 +211,7 @@ unfold bloomsize bloomsalt f k =
168211 Nothing -> pure ()
169212 Just (a, j') -> insert mb a >> loop j'
170213
214+ {-# INLINEABLE fromList #-}
171215-- | Create a Bloom filter, populating it from a sequence of values.
172216--
173217-- For example
@@ -185,10 +229,11 @@ fromList policy bloomsalt xs =
185229 where
186230 bsize = sizeForPolicy policy (length xs)
187231
188- {-# SPECIALISE deserialise :: BloomSize
189- -> Salt
190- -> (MutableByteArray RealWorld -> Int -> Int -> IO ())
191- -> IO (Bloom a) #-}
232+ {-# SPECIALISE deserialise ::
233+ BloomSize
234+ -> Salt
235+ -> (MutableByteArray RealWorld -> Int -> Int -> IO ())
236+ -> IO (Bloom a) #-}
192237deserialise :: PrimMonad m
193238 => BloomSize
194239 -> Salt
0 commit comments