Skip to content

Commit 06761b1

Browse files
authored
Improve folds over bits for IntSet (#1079)
* Make foldr and foldl short-circuit instead of lazily accumulating thunks. * Switch to a non-empty style to avoid unnecessary comparisons. This also helps GHC with arity analysis (somehow), which greatly improves performance of CPS-style foldr and foldl. * Change the bitwise operations used from bitmask = bm .&. -bm; bi = ctz bitmask; bm' = bm `xor` bitmask to bi = ctz bm; bm' = bm .&. (bm-1) which is slightly faster.
1 parent 2538b79 commit 06761b1

File tree

4 files changed

+61
-51
lines changed

4 files changed

+61
-51
lines changed

containers-tests/benchmarks/Utils/Fold.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ foldBenchmarks foldr foldl foldr' foldl' foldMap xs =
3737
, bench "foldr_traverseSum" $ whnf foldr_traverseSum xs
3838

3939
-- foldl
40-
, bench "foldl_skip" $ whnf foldl_elem xs
40+
, bench "foldl_elem" $ whnf foldl_elem xs
4141
, bench "foldl_cpsSum" $ whnf foldl_cpsSum xs
4242
, bench "foldl_cpsOneShotSum" $ whnf foldl_cpsOneShotSum xs
4343
, bench "foldl_traverseSum" $ whnf foldl_traverseSum xs

containers-tests/tests/intset-properties.hs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ main = defaultMain $ testGroup "intset-properties"
6464
, testProperty "prop_findMin" prop_findMin
6565
, testProperty "prop_ord" prop_ord
6666
, testProperty "prop_readShow" prop_readShow
67-
, testProperty "prop_foldR" prop_foldR
68-
, testProperty "prop_foldR'" prop_foldR'
69-
, testProperty "prop_foldL" prop_foldL
70-
, testProperty "prop_foldL'" prop_foldL'
67+
, testProperty "prop_foldr" prop_foldr
68+
, testProperty "prop_foldr'" prop_foldr'
69+
, testProperty "prop_foldl" prop_foldl
70+
, testProperty "prop_foldl'" prop_foldl'
7171
, testProperty "prop_foldMap" prop_foldMap
7272
, testProperty "prop_map" prop_map
7373
, testProperty "prop_mapMonotonicId" prop_mapMonotonicId
@@ -370,17 +370,17 @@ prop_ord s1 s2 = s1 `compare` s2 == toList s1 `compare` toList s2
370370
prop_readShow :: IntSet -> Bool
371371
prop_readShow s = s == read (show s)
372372

373-
prop_foldR :: IntSet -> Bool
374-
prop_foldR s = foldr (:) [] s == toList s
373+
prop_foldr :: IntSet -> Property
374+
prop_foldr s = foldr (:) [] s === toList s
375375

376-
prop_foldR' :: IntSet -> Bool
377-
prop_foldR' s = foldr' (:) [] s == toList s
376+
prop_foldr' :: IntSet -> Property
377+
prop_foldr' s = foldr' (:) [] s === toList s
378378

379-
prop_foldL :: IntSet -> Bool
380-
prop_foldL s = foldl (flip (:)) [] s == List.foldl (flip (:)) [] (toList s)
379+
prop_foldl :: IntSet -> Property
380+
prop_foldl s = foldl (flip (:)) [] s === toDescList s
381381

382-
prop_foldL' :: IntSet -> Bool
383-
prop_foldL' s = foldl' (flip (:)) [] s == List.foldl' (flip (:)) [] (toList s)
382+
prop_foldl' :: IntSet -> Property
383+
prop_foldl' s = foldl' (flip (:)) [] s === toDescList s
384384

385385
prop_foldMap :: IntSet -> Property
386386
prop_foldMap s = foldMap (:[]) s === toList s

containers/changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
* For `Data.Graph.SCC`, `Foldable.toList` and `Foldable1.toNonEmpty` now
5959
do not perform a copy. (Soumik Sarkar)
6060

61+
* Improved performance for `Data.Intset`'s `foldr`, `foldl'`, `foldl`, `foldr'`.
62+
6163
## Unreleased with `@since` annotation for 0.7.1:
6264

6365
### Additions

containers/src/Data/IntSet/Internal.hs

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,10 +1703,6 @@ takeWhileAntitoneBits :: Int -> (Int -> Bool) -> Word -> Word
17031703

17041704
#if defined(__GLASGOW_HASKELL__)
17051705

1706-
lowestBitMask :: Word -> Word
1707-
lowestBitMask x = x .&. negate x
1708-
{-# INLINE lowestBitMask #-}
1709-
17101706
lowestBitSet x = countTrailingZeros x
17111707

17121708
highestBitSet x = WORD_SIZE_IN_BITS - 1 - countLeadingZeros x
@@ -1728,45 +1724,57 @@ revWord x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x55555
17281724
x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
17291725
#endif
17301726

1731-
foldlBits prefix f z bitmap = go bitmap z
1732-
where go 0 acc = acc
1733-
go bm acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1734-
where
1735-
!bitmask = lowestBitMask bm
1736-
!bi = countTrailingZeros bitmask
1737-
1738-
foldl'Bits prefix f z bitmap = go bitmap z
1739-
where go 0 acc = acc
1740-
go bm !acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1741-
where !bitmask = lowestBitMask bm
1742-
!bi = countTrailingZeros bitmask
1743-
1744-
foldrBits prefix f z bitmap = go (revWord bitmap) z
1745-
where go 0 acc = acc
1746-
go bm acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1747-
where !bitmask = lowestBitMask bm
1748-
!bi = countTrailingZeros bitmask
1749-
1750-
1751-
foldr'Bits prefix f z bitmap = go (revWord bitmap) z
1752-
where go 0 acc = acc
1753-
go bm !acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1754-
where !bitmask = lowestBitMask bm
1755-
!bi = countTrailingZeros bitmask
1756-
1757-
foldMapBits prefix f bitmap = go (prefix + bi0) (bitmap `xor` bitmask0)
1727+
foldlBits prefix f z0 bitmap = go z0 $! revWord bitmap
1728+
where
1729+
-- Note: We pass the z as a static argument because it helps GHC with demand
1730+
-- analysis. See GHC #25578 for details.
1731+
go z !bm = f (if bm' == 0 then z else go z bm') x
1732+
where
1733+
bi = WORD_SIZE_IN_BITS - 1 - countTrailingZeros bm
1734+
!x = prefix .|. bi
1735+
bm' = bm .&. (bm-1)
1736+
1737+
foldl'Bits prefix f z0 bitmap = go z0 bitmap
1738+
where
1739+
go !z !bm = if bm' == 0 then z' else go z' bm'
1740+
where
1741+
bi = countTrailingZeros bm
1742+
!x = prefix .|. bi
1743+
!z' = f z x
1744+
bm' = bm .&. (bm-1)
1745+
1746+
foldrBits prefix f z0 bitmap = go bitmap z0
1747+
where
1748+
-- Note: We pass the z as a static argument because it helps GHC with demand
1749+
-- analysis. See GHC #25578 for details.
1750+
go !bm z = f x (if bm' == 0 then z else go bm' z)
1751+
where
1752+
bi = countTrailingZeros bm
1753+
!x = prefix .|. bi
1754+
bm' = bm .&. (bm-1)
1755+
1756+
foldr'Bits prefix f z0 bitmap = (go $! revWord bitmap) z0
1757+
where
1758+
go !bm !z = if bm' == 0 then z' else go bm' z'
1759+
where
1760+
bi = WORD_SIZE_IN_BITS - 1 - countTrailingZeros bm
1761+
!x = prefix .|. bi
1762+
!z' = f x z
1763+
bm' = bm .&. (bm-1)
1764+
1765+
foldMapBits prefix f bitmap = go bitmap
17581766
where
1759-
bitmask0 = lowestBitMask bitmap
1760-
bi0 = countTrailingZeros bitmask0
1761-
go !x 0 = f x
1767+
go !bm = if bm' == 0
1768+
then f x
17621769
#if MIN_VERSION_base(4,11,0)
1763-
go !x bm = f x <> go (prefix + bi) (bm `xor` bitmask)
1770+
else f x <> go bm'
17641771
#else
1765-
go !x bm = f x `mappend` go (prefix + bi) (bm `xor` bitmask)
1772+
else f x `mappend` go bm'
17661773
#endif
17671774
where
1768-
bitmask = lowestBitMask bm
1769-
bi = countTrailingZeros bitmask
1775+
bi = countTrailingZeros bm
1776+
!x = prefix .|. bi
1777+
bm' = bm .&. (bm-1)
17701778

17711779
takeWhileAntitoneBits prefix predicate bitmap =
17721780
-- Binary search for the first index where the predicate returns false, but skip a predicate

0 commit comments

Comments
 (0)