Skip to content

Commit 8562003

Browse files
authored
Optimize IntSet.Bin (#998)
* Optimize IntSet.Bin * Replace the separate Prefix and Mask Int fields in the Bin constructor with a single Int field which contains both merged together. This reduces the memory required by a Bin from 5 to 4 words, at the cost of more computations (which are cheap bitwise ops) being necessary for certains operations. This follows a similar change done for IntMap.Bin. * Benchmarks show that runtimes for most operations remain unchanged or decrease by a small amount (<10%). As expected, allocations are consistently lower by 11-16% for all set operations that have to make O(log n) allocations. * The functions and types used by both IntSet and IntMap have been moved into a IntTreeCommons module. * IntSet validity: Tip cannot be empty * Generate large keys in Arbitrary IntSet * Fix subsetCmp error * union, intersection, difference tests using Arbitrary IntSet * Fix prefixOk not checking all Bins
1 parent c651094 commit 8562003

File tree

13 files changed

+592
-630
lines changed

13 files changed

+592
-630
lines changed

containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
module LookupGE_IntMap where
33

44
import Prelude hiding (null)
5+
import Data.IntSet.Internal.IntTreeCommons
6+
(Key, Prefix(..), nomatch, signBranch, left)
57
import Data.IntMap.Internal
68

79
lookupGE1 :: Key -> IntMap a -> Maybe (Key,a)

containers-tests/containers-tests.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ library
9696
Data.IntMap.Strict.Internal
9797
Data.IntSet
9898
Data.IntSet.Internal
99+
Data.IntSet.Internal.IntTreeCommons
99100
Data.Map
100101
Data.Map.Internal
101102
Data.Map.Internal.Debug

containers-tests/tests/IntMapValidity.hs

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ module IntMapValidity
66

77
import Data.Bits (finiteBitSize, testBit, xor, (.&.))
88
import Data.List (intercalate, elemIndex)
9+
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
910
import Data.IntMap.Internal
1011
import Numeric (showHex)
1112
import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
@@ -17,7 +18,7 @@ import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
1718
valid :: IntMap a -> Property
1819
valid t =
1920
counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&.
20-
counterexample "prefixOk" (prefixOk t)
21+
counterexample "prefixesOk" (prefixesOk t)
2122

2223
-- Invariant: Nil is never found as a child of Bin.
2324
nilNeverChildOfBin :: IntMap a -> Bool
@@ -37,26 +38,26 @@ nilNeverChildOfBin t =
3738
-- * All keys in a Bin start with the Bin's shared prefix.
3839
-- * All keys in the Bin's left child have the Prefix's mask bit unset.
3940
-- * All keys in the Bin's right child have the Prefix's mask bit set.
40-
prefixOk :: IntMap a -> Property
41-
prefixOk t =
42-
case t of
43-
Nil -> property ()
44-
Tip _ _ -> property ()
45-
Bin p l r ->
46-
let px = unPrefix p
47-
m = px .&. (-px)
48-
keysl = keys l
49-
keysr = keys r
50-
debugStr = concat
51-
[ "px=" ++ showIntHex px
52-
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
53-
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
54-
]
55-
in counterexample debugStr $
56-
counterexample "mask bit absent" (px /= 0) .&&.
57-
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
58-
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
59-
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)
41+
prefixesOk :: IntMap a -> Property
42+
prefixesOk t = case t of
43+
Nil -> property ()
44+
Tip _ _ -> property ()
45+
Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r
46+
where
47+
px = unPrefix p
48+
m = px .&. (-px)
49+
keysl = keys l
50+
keysr = keys r
51+
debugStr = concat
52+
[ "px=" ++ showIntHex px
53+
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
54+
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
55+
]
56+
currentOk = counterexample debugStr $
57+
counterexample "mask bit absent" (px /= 0) .&&.
58+
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
59+
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
60+
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)
6061

6162
hasPrefix :: Int -> Prefix -> Bool
6263
hasPrefix i p = not (nomatch i p)

containers-tests/tests/IntSetValidity.hs

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
module IntSetValidity (valid) where
33

44
import Data.Bits (xor, (.&.))
5+
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
56
import Data.IntSet.Internal
7+
import Data.List (intercalate)
8+
import Numeric (showHex)
69
import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
710
import Utils.Containers.Internal.BitUtil (bitcount)
811

@@ -13,9 +16,7 @@ import Utils.Containers.Internal.BitUtil (bitcount)
1316
valid :: IntSet -> Property
1417
valid t =
1518
counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&.
16-
counterexample "maskPowerOfTwo" (maskPowerOfTwo t) .&&.
17-
counterexample "commonPrefix" (commonPrefix t) .&&.
18-
counterexample "markRespected" (maskRespected t) .&&.
19+
counterexample "prefixesOk" (prefixesOk t) .&&.
1920
counterexample "tipsValid" (tipsValid t)
2021

2122
-- Invariant: Nil is never found as a child of Bin.
@@ -24,48 +25,41 @@ nilNeverChildOfBin t =
2425
case t of
2526
Nil -> True
2627
Tip _ _ -> True
27-
Bin _ _ l r -> noNilInSet l && noNilInSet r
28+
Bin _ l r -> noNilInSet l && noNilInSet r
2829
where
2930
noNilInSet t' =
3031
case t' of
3132
Nil -> False
3233
Tip _ _ -> True
33-
Bin _ _ l' r' -> noNilInSet l' && noNilInSet r'
34+
Bin _ l' r' -> noNilInSet l' && noNilInSet r'
3435

35-
-- Invariant: The Mask is a power of 2. It is the largest bit position at which
36-
-- two elements of the set differ.
37-
maskPowerOfTwo :: IntSet -> Bool
38-
maskPowerOfTwo t =
39-
case t of
40-
Nil -> True
41-
Tip _ _ -> True
42-
Bin _ m l r ->
43-
bitcount 0 (fromIntegral m) == 1 && maskPowerOfTwo l && maskPowerOfTwo r
44-
45-
-- Invariant: Prefix is the common high-order bits that all elements share to
46-
-- the left of the Mask bit.
47-
commonPrefix :: IntSet -> Bool
48-
commonPrefix t =
49-
case t of
50-
Nil -> True
51-
Tip _ _ -> True
52-
b@(Bin p _ l r) -> all (sharedPrefix p) (elems b) && commonPrefix l && commonPrefix r
53-
where
54-
sharedPrefix :: Prefix -> Int -> Bool
55-
sharedPrefix p a = p == p .&. a
36+
-- Invariants:
37+
-- * All keys in a Bin start with the Bin's shared prefix.
38+
-- * All keys in the Bin's left child have the Prefix's mask bit unset.
39+
-- * All keys in the Bin's right child have the Prefix's mask bit set.
40+
prefixesOk :: IntSet -> Property
41+
prefixesOk t = case t of
42+
Nil -> property ()
43+
Tip _ _ -> property ()
44+
Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r
45+
where
46+
px = unPrefix p
47+
m = px .&. (-px)
48+
keysl = elems l
49+
keysr = elems r
50+
debugStr = concat
51+
[ "px=" ++ showIntHex px
52+
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
53+
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
54+
]
55+
currentOk = counterexample debugStr $
56+
counterexample "mask bit absent" (px /= 0) .&&.
57+
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
58+
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
59+
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)
5660

57-
-- Invariant: In Bin prefix mask left right, left consists of the elements that
58-
-- don't have the mask bit set; right is all the elements that do.
59-
maskRespected :: IntSet -> Bool
60-
maskRespected t =
61-
case t of
62-
Nil -> True
63-
Tip _ _ -> True
64-
Bin _ binMask l r ->
65-
all (\x -> zero x binMask) (elems l) &&
66-
all (\x -> not (zero x binMask)) (elems r) &&
67-
maskRespected l &&
68-
maskRespected r
61+
hasPrefix :: Int -> Prefix -> Bool
62+
hasPrefix i p = not (nomatch i p)
6963

7064
-- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits
7165
-- (on 64 bit arches). The values of the set represented by a tip
@@ -76,14 +70,17 @@ tipsValid :: IntSet -> Bool
7670
tipsValid t =
7771
case t of
7872
Nil -> True
79-
tip@(Tip p b) -> validTipPrefix p
80-
Bin _ _ l r -> tipsValid l && tipsValid r
73+
tip@(Tip p b) -> validTipPrefix p && b /= 0
74+
Bin _ l r -> tipsValid l && tipsValid r
8175

82-
validTipPrefix :: Prefix -> Bool
76+
validTipPrefix :: Int -> Bool
8377
#if WORD_SIZE_IN_BITS==32
8478
-- Last 5 bits of the prefix must be zero for 32 bit arches.
8579
validTipPrefix p = (0x0000001F .&. p) == 0
8680
#else
8781
-- Last 6 bits of the prefix must be zero for 64 bit arches.
8882
validTipPrefix p = (0x000000000000003F .&. p) == 0
8983
#endif
84+
85+
showIntHex :: Int -> String
86+
showIntHex x = "0x" ++ showHex (fromIntegral x :: Word) ""

containers-tests/tests/intmap-properties.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import Data.IntMap.Internal (traverseMaybeWithKey)
1010
import Data.IntMap.Merge.Lazy
1111
#endif
1212
import Data.IntMap.Internal.Debug (showTree)
13-
import Data.IntMap.Internal (Prefix(..))
13+
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
1414
import IntMapValidity (hasPrefix, hasPrefixSimple, valid)
1515

1616
import Control.Applicative (Applicative(..))

containers-tests/tests/intset-properties.hs

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,15 @@ main = defaultMain $ testGroup "intset-properties"
3838
, testProperty "prop_UnionInsert" prop_UnionInsert
3939
, testProperty "prop_UnionAssoc" prop_UnionAssoc
4040
, testProperty "prop_UnionComm" prop_UnionComm
41-
, testProperty "prop_Diff" prop_Diff
42-
, testProperty "prop_Int" prop_Int
41+
, testProperty "prop_union" prop_union
42+
, testProperty "prop_difference" prop_difference
43+
, testProperty "prop_intersection" prop_intersection
4344
, testProperty "prop_Ordered" prop_Ordered
4445
, testProperty "prop_List" prop_List
4546
, testProperty "prop_DescList" prop_DescList
4647
, testProperty "prop_AscDescList" prop_AscDescList
4748
, testProperty "prop_fromList" prop_fromList
4849
, testProperty "prop_fromRange" prop_fromRange
49-
, testProperty "prop_MaskPow2" prop_MaskPow2
50-
, testProperty "prop_Prefix" prop_Prefix
51-
, testProperty "prop_LeftRight" prop_LeftRight
5250
, testProperty "prop_isProperSubsetOf" prop_isProperSubsetOf
5351
, testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2
5452
, testProperty "prop_isSubsetOf" prop_isSubsetOf
@@ -113,9 +111,8 @@ test_split = do
113111
Arbitrary, reasonably balanced trees
114112
--------------------------------------------------------------------}
115113
instance Arbitrary IntSet where
116-
arbitrary = do{ xs <- arbitrary
117-
; return (fromList xs)
118-
}
114+
arbitrary = fromList <$> oneof [arbitrary, fmap (fmap getLarge) arbitrary]
115+
shrink = fmap fromList . shrink . toAscList
119116

120117
{--------------------------------------------------------------------
121118
Valid IntMaps
@@ -232,19 +229,26 @@ prop_UnionComm :: IntSet -> IntSet -> Bool
232229
prop_UnionComm t1 t2
233230
= (union t1 t2 == union t2 t1)
234231

235-
prop_Diff :: [Int] -> [Int] -> Property
236-
prop_Diff xs ys =
237-
case difference (fromList xs) (fromList ys) of
232+
prop_union :: IntSet -> IntSet -> Property
233+
prop_union xs ys =
234+
case union xs ys of
238235
t ->
239236
valid t .&&.
240-
toAscList t === List.sort ((List.\\) (nub xs) (nub ys))
237+
toAscList t === List.nub (List.sort (toAscList xs ++ toAscList ys))
241238

242-
prop_Int :: [Int] -> [Int] -> Property
243-
prop_Int xs ys =
244-
case intersection (fromList xs) (fromList ys) of
239+
prop_difference :: IntSet -> IntSet -> Property
240+
prop_difference xs ys =
241+
case difference xs ys of
245242
t ->
246243
valid t .&&.
247-
toAscList t === List.sort (nub ((List.intersect) (xs) (ys)))
244+
toAscList t === (toAscList xs List.\\ toAscList ys)
245+
246+
prop_intersection :: IntSet -> IntSet -> Property
247+
prop_intersection xs ys =
248+
case intersection xs ys of
249+
t ->
250+
valid t .&&.
251+
toAscList t === (toAscList xs `List.intersect` toAscList ys)
248252

249253
prop_disjoint :: IntSet -> IntSet -> Bool
250254
prop_disjoint a b = a `disjoint` b == null (a `intersection` b)
@@ -284,28 +288,6 @@ prop_fromRange = forAll (scale (*100) arbitrary) go
284288
go (l,h) = valid t .&&. t === fromAscList [l..h]
285289
where t = fromRange (l,h)
286290

287-
{--------------------------------------------------------------------
288-
Bin invariants
289-
--------------------------------------------------------------------}
290-
powersOf2 :: IntSet
291-
powersOf2 = fromList [2^i | i <- [0..63]]
292-
293-
-- Check the invariant that the mask is a power of 2.
294-
prop_MaskPow2 :: IntSet -> Bool
295-
prop_MaskPow2 (Bin _ msk left right) = member msk powersOf2 && prop_MaskPow2 left && prop_MaskPow2 right
296-
prop_MaskPow2 _ = True
297-
298-
-- Check that the prefix satisfies its invariant.
299-
prop_Prefix :: IntSet -> Bool
300-
prop_Prefix s@(Bin prefix msk left right) = all (\elem -> match elem prefix msk) (toList s) && prop_Prefix left && prop_Prefix right
301-
prop_Prefix _ = True
302-
303-
-- Check that the left elements don't have the mask bit set, and the right
304-
-- ones do.
305-
prop_LeftRight :: IntSet -> Bool
306-
prop_LeftRight (Bin _ msk left right) = and [x .&. msk == 0 | x <- toList left] && and [x .&. msk == msk | x <- toList right]
307-
prop_LeftRight _ = True
308-
309291
{--------------------------------------------------------------------
310292
IntSet operations are like Set operations
311293
--------------------------------------------------------------------}

containers/containers.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Library
5353
Data.IntMap.Merge.Lazy
5454
Data.IntMap.Merge.Strict
5555
Data.IntSet.Internal
56+
Data.IntSet.Internal.IntTreeCommons
5657
Data.IntSet
5758
Data.Map
5859
Data.Map.Lazy

0 commit comments

Comments
 (0)