diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5af5241 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.swp +cabal.project.local* +dist-newstyle diff --git a/bench/Cuckoo.hs b/bench/Cuckoo.hs new file mode 100644 index 0000000..e1e81cd --- /dev/null +++ b/bench/Cuckoo.hs @@ -0,0 +1,83 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | +-- Module: Main +-- Copyright: Copyright © 2019-2021 Lars Kuhtz +-- License: BSD3 +-- Maintainer: Lars Kuhtz +-- Stability: experimental +-- +module Main +( main +) where + +import Control.Monad (filterM) +import Control.Monad.Primitive (PrimState) +import Control.Monad.ST (runST) +import Criterion +import Criterion.Main +import Data.Foldable (traverse_) + +import Data.Cuckoo + +-- -------------------------------------------------------------------------- -- +-- Main + +main :: IO () +main = do + cf10k <- do + cf <- newCuckooFilter @4 @10 @Int 0 12000 + traverse_ (insert cf) [1..10000] + pure $! cf + + defaultMain + [ bgroup "insert" + [ insertBench 10000 12000 [1..10000] + ] + , bgroup "member" + [ memberBench "1:1 hit:hiss" 10000 cf10k (interleave [5000..10000] [10001..15000]) + , memberBench "only hit" 10000 cf10k [1..10000] + ] +#ifdef RANDOM_STANDARD + , bgroup "delete" + [ deleteBench "all" 10000 cf10k [1..10000] + ] +#endif + ] + +-- -------------------------------------------------------------------------- -- + +instance CuckooFilterHash Int + +insertBench :: Int -> Int -> [Int] -> Benchmark +insertBench n cap xs0 = bench ("n-" <> show n <> "-cap-" <> show cap) $ whnf f xs0 + where + f xs = runST $ do + cf <- newCuckooFilter @4 @10 @Int 0 (fromIntegral cap) + failed <- filterM (fmap not . insert cf) xs + pure $! length failed + +memberBench :: String -> Int -> CuckooFilter (PrimState IO) 4 10 Int -> [Int] -> Benchmark +memberBench label n cf xs = bench (label <> " - " <> show n) $ whnfIO f + where + f = fmap length (filterM (member cf) xs) + +#ifdef RANDOM_STANDARD +deleteBench :: String -> Int -> CuckooFilter (PrimState IO) 4 10 Int -> [Int] -> Benchmark +deleteBench label n cf0 xs = bench (label <> " - " <> show n) $ whnfIO f + where + f = do + (cf, _) <- splitCuckooFilter cf0 + fmap length (filterM (delete cf) xs) +#endif + +-- | Doesn't cause actual difference in bench. +interleave :: [a] -> [a] -> [a] +interleave (a:as) (b:bs) = a:b:interleave as bs +interleave as bs = as <> bs + diff --git a/cuckoo.cabal b/cuckoo.cabal index 255480f..6b8e448 100644 --- a/cuckoo.cabal +++ b/cuckoo.cabal @@ -70,6 +70,7 @@ library random-internal , vector >=0.12 cpp-options: -DRANDOM_MWC else + cpp-options: -DRANDOM_STANDARD build-depends: , random >=1.1 @@ -166,3 +167,22 @@ benchmark internal-benchmarks , base >=4.10 && <5 , criterion >= 1.5 +benchmark cuckoo-benchmarks + type: exitcode-stdio-1.0 + hs-source-dirs: bench + main-is: Cuckoo.hs + default-language: Haskell2010 + ghc-options: + -Wall + -rtsopts + -threaded + -with-rtsopts=-N + build-depends: + -- internal + , cuckoo + + -- external + , base >=4.10 && <5 + , criterion >= 1.5 + , primitive >=0.6.4.0 + diff --git a/lib/random/System/Random/Internal.hs b/lib/random/System/Random/Internal.hs index 95c296b..60aed38 100644 --- a/lib/random/System/Random/Internal.hs +++ b/lib/random/System/Random/Internal.hs @@ -20,6 +20,7 @@ module System.Random.Internal , initialize , uniform , uniformR +, genSplit ) where -- -------------------------------------------------------------------------- -- @@ -36,6 +37,12 @@ initialize -> m (Gen (PrimState m)) initialize salt = PCG.initialize 0 (fromIntegral salt) +genSplit + :: PrimMonad m + => Gen (PrimState m) + -> m (Gen (PrimState m), Gen (PrimState m)) +genSplit = error "genSplit: not available in RANDOM_PCG" + -- -------------------------------------------------------------------------- -- -- MWC #elif defined RANDOM_MWC @@ -51,6 +58,12 @@ initialize -> m (Gen (PrimState m)) initialize salt = MWC.initialize (singleton $ fromIntegral salt) +genSplit + :: PrimMonad m + => Gen (PrimState m) + -> m (Gen (PrimState m), Gen (PrimState m)) +genSplit = error "genSplit: not available in RANDOM_MWC: see https://github.com/haskell/mwc-random/issues/39" + -- -------------------------------------------------------------------------- -- -- Random #else @@ -95,5 +108,17 @@ uniform gen = stToPrim $ do writeSTRef gen g return r +-- | Odd name to avoid collision with splitGen. +-- Using legacy 'split' name for version compatibility. +genSplit + :: PrimMonad m + => Gen (PrimState m) + -> m (Gen (PrimState m), Gen (PrimState m)) +genSplit gen = stToPrim $ do + g <- readSTRef gen + let (!g0, !g1) = split g + s0 <- newSTRef g0 + s1 <- newSTRef g1 + return $ s0 `seq` s1 `seq` (s0, s1) #endif diff --git a/src/Data/Cuckoo.hs b/src/Data/Cuckoo.hs index 7c05bba..a9cff4e 100644 --- a/src/Data/Cuckoo.hs +++ b/src/Data/Cuckoo.hs @@ -109,6 +109,9 @@ module Data.Cuckoo -- * Debugging Utils , showFilter , itemHashes + +-- * Testing Utils +, splitCuckooFilter ) where import Control.Applicative @@ -332,6 +335,24 @@ newCuckooFilter salt n = do | not (64 <= n) = error "Seriously? Are you kidding me? If you need to represent such a tiny set, you'll have to pick another data structure for that" | otherwise = return () +-- | Creates two new 'CuckooFilter's with identical data as the original, and with the RNG state split. +-- +-- This function is not thread-safe. The original CuckooFilter must not be written concurrently during the split operation. +-- +-- /IMPORTANT/ For testing purposes only - not all random lib choices support RNG state splitting. +-- +splitCuckooFilter + :: PrimMonad m + => CuckooFilter (PrimState m) b f a + -> m (CuckooFilter (PrimState m) b f a, CuckooFilter (PrimState m) b f a) +splitCuckooFilter (CuckooFilter bc s rng d) = do + len <- getSizeofMutableByteArray d + d0 <- cloneMutableByteArray d 0 len + d1 <- cloneMutableByteArray d 0 len + (g0, g1) <- genSplit rng + return $! (CuckooFilter bc s g0 d0, CuckooFilter bc s g1 d1) + + -- -------------------------------------------------------------------------- -- -- Insert @@ -356,6 +377,11 @@ newCuckooFilter salt n = do -- function. If this function is used in the presence of asynchronous exceptions -- it should be apprioriately masked. -- +-- Note: if the potentially inserted elements have many repetitions, and you +-- don't intend to delete items, rather just use the filter as a set, then it is +-- advised to perform a 'member' check before insert. Otherwise repeated items +-- will take up slots and cause high load or even premature insertion failure. +-- -- >>> f <- newCuckooFilter @4 @10 @Int 0 1000 -- >>> insert f 0 -- True @@ -401,6 +427,7 @@ insert f a = do setFingerprint f b i k return k' {-# INLINE swapFingerprint #-} +{-# INLINABLE insert #-} -- -------------------------------------------------------------------------- -- -- Member Test @@ -492,6 +519,7 @@ delete f a = do Nothing -> checkBucket f b2 fp >>= \case Just i -> True <$ setFingerprint f b2 i null Nothing -> return False +{-# INLINABLE delete #-} -- -------------------------------------------------------------------------- -- -- Internal