Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.swp
cabal.project.local*
dist-newstyle
83 changes: 83 additions & 0 deletions bench/Cuckoo.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

{-# OPTIONS_GHC -fno-warn-orphans #-}

-- |
-- Module: Main
-- Copyright: Copyright © 2019-2021 Lars Kuhtz <lakuhtz@gmail.com>
-- License: BSD3
-- Maintainer: Lars Kuhtz <lakuhtz@gmail.com>
-- Stability: experimental
--
module Main
( main
) where

import Control.Monad (filterM)
import Control.Monad.Primitive (PrimState)
import Control.Monad.ST (runST)
import Criterion
import Criterion.Main
import Data.Foldable (traverse_)

import Data.Cuckoo

-- -------------------------------------------------------------------------- --
-- Main

main :: IO ()
main = do
cf10k <- do
cf <- newCuckooFilter @4 @10 @Int 0 12000
traverse_ (insert cf) [1..10000]
pure $! cf

defaultMain
[ bgroup "insert"
[ insertBench 10000 12000 [1..10000]
]
, bgroup "member"
[ memberBench "1:1 hit:hiss" 10000 cf10k (interleave [5000..10000] [10001..15000])
, memberBench "only hit" 10000 cf10k [1..10000]
]
#ifdef RANDOM_STANDARD
, bgroup "delete"
[ deleteBench "all" 10000 cf10k [1..10000]
]
#endif
]

-- -------------------------------------------------------------------------- --

instance CuckooFilterHash Int

insertBench :: Int -> Int -> [Int] -> Benchmark
insertBench n cap xs0 = bench ("n-" <> show n <> "-cap-" <> show cap) $ whnf f xs0
where
f xs = runST $ do
cf <- newCuckooFilter @4 @10 @Int 0 (fromIntegral cap)
failed <- filterM (fmap not . insert cf) xs
pure $! length failed

memberBench :: String -> Int -> CuckooFilter (PrimState IO) 4 10 Int -> [Int] -> Benchmark
memberBench label n cf xs = bench (label <> " - " <> show n) $ whnfIO f
where
f = fmap length (filterM (member cf) xs)

#ifdef RANDOM_STANDARD
deleteBench :: String -> Int -> CuckooFilter (PrimState IO) 4 10 Int -> [Int] -> Benchmark
deleteBench label n cf0 xs = bench (label <> " - " <> show n) $ whnfIO f
where
f = do
(cf, _) <- splitCuckooFilter cf0
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of splitting the filter here before doing the deletes?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was to pre-create the filter once, and then the benchmark would only time the deletions (not the filter creation). The split was just a way to prevent the original filter from being mutated across the different bench iterations.

But maybe there are other ways to get this, without split - I learned there is perRunEnv in criterion that lets one create the input separately from the measurement. That would let one get rid of the split functionality.

I'm open to whichever way is preferred, or alternatives.

fmap length (filterM (delete cf) xs)
#endif

-- | Doesn't cause actual difference in bench.
interleave :: [a] -> [a] -> [a]
interleave (a:as) (b:bs) = a:b:interleave as bs
interleave as bs = as <> bs

20 changes: 20 additions & 0 deletions cuckoo.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ library random-internal
, vector >=0.12
cpp-options: -DRANDOM_MWC
else
cpp-options: -DRANDOM_STANDARD
build-depends:
, random >=1.1

Expand Down Expand Up @@ -166,3 +167,22 @@ benchmark internal-benchmarks
, base >=4.10 && <5
, criterion >= 1.5

benchmark cuckoo-benchmarks
type: exitcode-stdio-1.0
hs-source-dirs: bench
main-is: Cuckoo.hs
default-language: Haskell2010
ghc-options:
-Wall
-rtsopts
-threaded
-with-rtsopts=-N
build-depends:
-- internal
, cuckoo

-- external
, base >=4.10 && <5
, criterion >= 1.5
, primitive >=0.6.4.0

25 changes: 25 additions & 0 deletions lib/random/System/Random/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ module System.Random.Internal
, initialize
, uniform
, uniformR
, genSplit
) where

-- -------------------------------------------------------------------------- --
Expand All @@ -36,6 +37,12 @@ initialize
-> m (Gen (PrimState m))
initialize salt = PCG.initialize 0 (fromIntegral salt)

genSplit
:: PrimMonad m
=> Gen (PrimState m)
-> m (Gen (PrimState m), Gen (PrimState m))
genSplit = error "genSplit: not available in RANDOM_PCG"

-- -------------------------------------------------------------------------- --
-- MWC
#elif defined RANDOM_MWC
Expand All @@ -51,6 +58,12 @@ initialize
-> m (Gen (PrimState m))
initialize salt = MWC.initialize (singleton $ fromIntegral salt)

genSplit
:: PrimMonad m
=> Gen (PrimState m)
-> m (Gen (PrimState m), Gen (PrimState m))
genSplit = error "genSplit: not available in RANDOM_MWC: see https://github.com/haskell/mwc-random/issues/39"

-- -------------------------------------------------------------------------- --
-- Random
#else
Expand Down Expand Up @@ -95,5 +108,17 @@ uniform gen = stToPrim $ do
writeSTRef gen g
return r

-- | Odd name to avoid collision with splitGen.
-- Using legacy 'split' name for version compatibility.
genSplit
:: PrimMonad m
=> Gen (PrimState m)
-> m (Gen (PrimState m), Gen (PrimState m))
genSplit gen = stToPrim $ do
g <- readSTRef gen
let (!g0, !g1) = split g
s0 <- newSTRef g0
s1 <- newSTRef g1
return $ s0 `seq` s1 `seq` (s0, s1)
#endif

28 changes: 28 additions & 0 deletions src/Data/Cuckoo.hs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ module Data.Cuckoo
-- * Debugging Utils
, showFilter
, itemHashes

-- * Testing Utils
, splitCuckooFilter
) where

import Control.Applicative
Expand Down Expand Up @@ -332,6 +335,24 @@ newCuckooFilter salt n = do
| not (64 <= n) = error "Seriously? Are you kidding me? If you need to represent such a tiny set, you'll have to pick another data structure for that"
| otherwise = return ()

-- | Creates two new 'CuckooFilter's with identical data as the original, and with the RNG state split.
--
-- This function is not thread-safe. The original CuckooFilter must not be written concurrently during the split operation.
--
-- /IMPORTANT/ For testing purposes only - not all random lib choices support RNG state splitting.
--
splitCuckooFilter
:: PrimMonad m
=> CuckooFilter (PrimState m) b f a
-> m (CuckooFilter (PrimState m) b f a, CuckooFilter (PrimState m) b f a)
splitCuckooFilter (CuckooFilter bc s rng d) = do
len <- getSizeofMutableByteArray d
d0 <- cloneMutableByteArray d 0 len
d1 <- cloneMutableByteArray d 0 len
(g0, g1) <- genSplit rng
return $! (CuckooFilter bc s g0 d0, CuckooFilter bc s g1 d1)


-- -------------------------------------------------------------------------- --
-- Insert

Expand All @@ -356,6 +377,11 @@ newCuckooFilter salt n = do
-- function. If this function is used in the presence of asynchronous exceptions
-- it should be apprioriately masked.
--
-- Note: if the potentially inserted elements have many repetitions, and you
-- don't intend to delete items, rather just use the filter as a set, then it is
-- advised to perform a 'member' check before insert. Otherwise repeated items
-- will take up slots and cause high load or even premature insertion failure.
--
Comment on lines +380 to +384
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

-- >>> f <- newCuckooFilter @4 @10 @Int 0 1000
-- >>> insert f 0
-- True
Expand Down Expand Up @@ -401,6 +427,7 @@ insert f a = do
setFingerprint f b i k
return k'
{-# INLINE swapFingerprint #-}
{-# INLINABLE insert #-}

-- -------------------------------------------------------------------------- --
-- Member Test
Expand Down Expand Up @@ -492,6 +519,7 @@ delete f a = do
Nothing -> checkBucket f b2 fp >>= \case
Just i -> True <$ setFingerprint f b2 i null
Nothing -> return False
{-# INLINABLE delete #-}

-- -------------------------------------------------------------------------- --
-- Internal
Expand Down