Skip to content

Commit 17d354f

Browse files
authored
Make Int{Map,Set} folds friendlier to optimizations (#1149)
Move the Nil branch to the top-level and error on Nil in go. The Note [IntMap folds] added in Data.IntMap.Internal explains the details. For the "maximum" benchmarks added here, the time improves by ~40% for IntMap and ~30% for sparse IntSets. "traverseSum" and "cpsSum" benchmarks also improve by 30-60% for IntMaps and sparse IntSets. Dense IntSets are barely affected.
1 parent 0855d51 commit 17d354f

File tree

3 files changed

+169
-45
lines changed

3 files changed

+169
-45
lines changed

containers-tests/benchmarks/Utils/Fold.hs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@ foldBenchmarks foldr foldl foldr' foldl' foldMap xs =
4444

4545
-- foldr'
4646
, bench "foldr'_sum" $ whnf (foldr' (+) 0) xs
47+
, bench "foldr'_maximum" $ whnf foldr'_maximum xs
4748

4849
-- foldl'
4950
, bench "foldl'_sum" $ whnf (foldl' (+) 0) xs
51+
, bench "foldl'_maximum" $ whnf foldl'_maximum xs
5052

5153
-- foldMap
5254
, bench "foldMap_elem" $ whnf foldMap_elem xs
@@ -81,6 +83,12 @@ foldBenchmarks foldr foldl foldr' foldl' foldMap xs =
8183
foldl_traverseSum xs =
8284
execState (foldl (\z x -> modify' (+x) *> z) (pure ()) xs) 0
8385

86+
foldr'_maximum :: f -> Maybe Int
87+
foldr'_maximum = foldr' (\x z -> Just $! maybe x (max x) z) Nothing
88+
89+
foldl'_maximum :: f -> Maybe Int
90+
foldl'_maximum = foldl' (\z x -> Just $! maybe x (max x) z) Nothing
91+
8492
foldMap_elem :: f -> Any
8593
foldMap_elem = foldMap (\x -> Any (x == minBound))
8694

@@ -138,9 +146,12 @@ instance Applicative f => Monoid (Effect f) where
138146
-- Folding with an effect. In practice:
139147
-- * Folds defined using foldr, such as Data.Foldable.traverse_ and friends
140148
--
141-
-- foldl', foldr'
149+
-- foldl'_sum, foldr'_sum
142150
-- Strict folds.
143151
--
152+
-- foldl'_maximum, foldr'_maximum
153+
-- Strict folds with a `Maybe` as accumulator which could be optimized away.
154+
--
144155
-- foldMap_elem
145156
-- Simple lazy fold that visits every element. In practice:
146157
-- * Worst case for lazy folds defined using foldMap, such as

containers/src/Data/IntMap/Internal.hs

Lines changed: 117 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ import Data.Bits
326326
import qualified Data.Foldable as Foldable
327327
import Data.Maybe (fromMaybe)
328328
import Utils.Containers.Internal.Prelude hiding
329-
(lookup, map, filter, foldr, foldl, foldl', null)
329+
(lookup, map, filter, foldr, foldl, foldl', foldMap, null)
330330
import Prelude ()
331331

332332
import Data.IntSet.Internal (IntSet)
@@ -470,23 +470,13 @@ instance Semigroup (IntMap a) where
470470

471471
-- | Folds in order of increasing key.
472472
instance Foldable.Foldable IntMap where
473-
fold = go
474-
where go Nil = mempty
475-
go (Tip _ v) = v
476-
go (Bin p l r)
477-
| signBranch p = go r `mappend` go l
478-
| otherwise = go l `mappend` go r
473+
fold = foldMap id
479474
{-# INLINABLE fold #-}
480475
foldr = foldr
481476
{-# INLINE foldr #-}
482477
foldl = foldl
483478
{-# INLINE foldl #-}
484-
foldMap f t = go t
485-
where go Nil = mempty
486-
go (Tip _ v) = f v
487-
go (Bin p l r)
488-
| signBranch p = go r `mappend` go l
489-
| otherwise = go l `mappend` go r
479+
foldMap = foldMap
490480
{-# INLINE foldMap #-}
491481
foldl' = foldl'
492482
{-# INLINE foldl' #-}
@@ -3033,31 +3023,37 @@ splitLookup k t =
30333023
--
30343024
-- > let f a len = len + (length a)
30353025
-- > foldr f 0 (fromList [(5,"a"), (3,"bbb")]) == 4
3026+
3027+
-- See Note [IntMap folds]
30363028
foldr :: (a -> b -> b) -> b -> IntMap a -> b
30373029
foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
30383030
case t of
3031+
Nil -> z
30393032
Bin p l r
30403033
| signBranch p -> go (go z l) r -- put negative numbers before
30413034
| otherwise -> go (go z r) l
30423035
_ -> go z t
30433036
where
3044-
go z' Nil = z'
3037+
go _ Nil = error "foldr.go: Nil"
30453038
go z' (Tip _ x) = f x z'
30463039
go z' (Bin _ l r) = go (go z' r) l
30473040
{-# INLINE foldr #-}
30483041

30493042
-- | \(O(n)\). A strict version of 'foldr'. Each application of the operator is
30503043
-- evaluated before using the result in the next application. This
30513044
-- function is strict in the starting value.
3045+
3046+
-- See Note [IntMap folds]
30523047
foldr' :: (a -> b -> b) -> b -> IntMap a -> b
30533048
foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
30543049
case t of
3050+
Nil -> z
30553051
Bin p l r
30563052
| signBranch p -> go (go z l) r -- put negative numbers before
30573053
| otherwise -> go (go z r) l
30583054
_ -> go z t
30593055
where
3060-
go !z' Nil = z'
3056+
go !_ Nil = error "foldr'.go: Nil"
30613057
go z' (Tip _ x) = f x z'
30623058
go z' (Bin _ l r) = go (go z' r) l
30633059
{-# INLINE foldr' #-}
@@ -3071,35 +3067,65 @@ foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
30713067
--
30723068
-- > let f len a = len + (length a)
30733069
-- > foldl f 0 (fromList [(5,"a"), (3,"bbb")]) == 4
3070+
3071+
-- See Note [IntMap folds]
30743072
foldl :: (a -> b -> a) -> a -> IntMap b -> a
30753073
foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
30763074
case t of
3075+
Nil -> z
30773076
Bin p l r
30783077
| signBranch p -> go (go z r) l -- put negative numbers before
30793078
| otherwise -> go (go z l) r
30803079
_ -> go z t
30813080
where
3082-
go z' Nil = z'
3081+
go _ Nil = error "foldl.go: Nil"
30833082
go z' (Tip _ x) = f z' x
30843083
go z' (Bin _ l r) = go (go z' l) r
30853084
{-# INLINE foldl #-}
30863085

30873086
-- | \(O(n)\). A strict version of 'foldl'. Each application of the operator is
30883087
-- evaluated before using the result in the next application. This
30893088
-- function is strict in the starting value.
3089+
3090+
-- See Note [IntMap folds]
30903091
foldl' :: (a -> b -> a) -> a -> IntMap b -> a
30913092
foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
30923093
case t of
3094+
Nil -> z
30933095
Bin p l r
30943096
| signBranch p -> go (go z r) l -- put negative numbers before
30953097
| otherwise -> go (go z l) r
30963098
_ -> go z t
30973099
where
3098-
go !z' Nil = z'
3100+
go !_ Nil = error "foldl'.go: Nil"
30993101
go z' (Tip _ x) = f z' x
31003102
go z' (Bin _ l r) = go (go z' l) r
31013103
{-# INLINE foldl' #-}
31023104

3105+
-- See Note [IntMap folds]
3106+
foldMap :: Monoid m => (a -> m) -> IntMap a -> m
3107+
foldMap f = \t -> -- Use lambda to be inlinable with two arguments.
3108+
case t of
3109+
Nil -> mempty
3110+
Bin p l r
3111+
#if MIN_VERSION_base(4,11,0)
3112+
| signBranch p -> go r <> go l
3113+
| otherwise -> go l <> go r
3114+
#else
3115+
| signBranch p -> go r `mappend` go l
3116+
| otherwise -> go l `mappend` go r
3117+
#endif
3118+
_ -> go t
3119+
where
3120+
go Nil = error "foldMap.go: Nil"
3121+
go (Tip _ x) = f x
3122+
#if MIN_VERSION_base(4,11,0)
3123+
go (Bin _ l r) = go l <> go r
3124+
#else
3125+
go (Bin _ l r) = go l `mappend` go r
3126+
#endif
3127+
{-# INLINE foldMap #-}
3128+
31033129
-- | \(O(n)\). Fold the keys and values in the map using the given right-associative
31043130
-- binary operator, such that
31053131
-- @'foldrWithKey' f z == 'Prelude.foldr' ('uncurry' f) z . 'toAscList'@.
@@ -3110,31 +3136,37 @@ foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
31103136
--
31113137
-- > let f k a result = result ++ "(" ++ (show k) ++ ":" ++ a ++ ")"
31123138
-- > foldrWithKey f "Map: " (fromList [(5,"a"), (3,"b")]) == "Map: (5:a)(3:b)"
3139+
3140+
-- See Note [IntMap folds]
31133141
foldrWithKey :: (Key -> a -> b -> b) -> b -> IntMap a -> b
31143142
foldrWithKey f z = \t -> -- Use lambda t to be inlinable with two arguments only.
31153143
case t of
3144+
Nil -> z
31163145
Bin p l r
31173146
| signBranch p -> go (go z l) r -- put negative numbers before
31183147
| otherwise -> go (go z r) l
31193148
_ -> go z t
31203149
where
3121-
go z' Nil = z'
3150+
go _ Nil = error "foldrWithKey.go: Nil"
31223151
go z' (Tip kx x) = f kx x z'
31233152
go z' (Bin _ l r) = go (go z' r) l
31243153
{-# INLINE foldrWithKey #-}
31253154

31263155
-- | \(O(n)\). A strict version of 'foldrWithKey'. Each application of the operator is
31273156
-- evaluated before using the result in the next application. This
31283157
-- function is strict in the starting value.
3158+
3159+
-- See Note [IntMap folds]
31293160
foldrWithKey' :: (Key -> a -> b -> b) -> b -> IntMap a -> b
31303161
foldrWithKey' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
31313162
case t of
3163+
Nil -> z
31323164
Bin p l r
31333165
| signBranch p -> go (go z l) r -- put negative numbers before
31343166
| otherwise -> go (go z r) l
31353167
_ -> go z t
31363168
where
3137-
go !z' Nil = z'
3169+
go !_ Nil = error "foldrWithKey'.go: Nil"
31383170
go z' (Tip kx x) = f kx x z'
31393171
go z' (Bin _ l r) = go (go z' r) l
31403172
{-# INLINE foldrWithKey' #-}
@@ -3149,31 +3181,37 @@ foldrWithKey' f z = \t -> -- Use lambda t to be inlinable with two argument
31493181
--
31503182
-- > let f result k a = result ++ "(" ++ (show k) ++ ":" ++ a ++ ")"
31513183
-- > foldlWithKey f "Map: " (fromList [(5,"a"), (3,"b")]) == "Map: (3:b)(5:a)"
3184+
3185+
-- See Note [IntMap folds]
31523186
foldlWithKey :: (a -> Key -> b -> a) -> a -> IntMap b -> a
31533187
foldlWithKey f z = \t -> -- Use lambda t to be inlinable with two arguments only.
31543188
case t of
3189+
Nil -> z
31553190
Bin p l r
31563191
| signBranch p -> go (go z r) l -- put negative numbers before
31573192
| otherwise -> go (go z l) r
31583193
_ -> go z t
31593194
where
3160-
go z' Nil = z'
3195+
go _ Nil = error "foldlWithKey.go: Nil"
31613196
go z' (Tip kx x) = f z' kx x
31623197
go z' (Bin _ l r) = go (go z' l) r
31633198
{-# INLINE foldlWithKey #-}
31643199

31653200
-- | \(O(n)\). A strict version of 'foldlWithKey'. Each application of the operator is
31663201
-- evaluated before using the result in the next application. This
31673202
-- function is strict in the starting value.
3203+
3204+
-- See Note [IntMap folds]
31683205
foldlWithKey' :: (a -> Key -> b -> a) -> a -> IntMap b -> a
31693206
foldlWithKey' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
31703207
case t of
3208+
Nil -> z
31713209
Bin p l r
31723210
| signBranch p -> go (go z r) l -- put negative numbers before
31733211
| otherwise -> go (go z l) r
31743212
_ -> go z t
31753213
where
3176-
go !z' Nil = z'
3214+
go !_ Nil = error "foldlWithKey'.go: Nil"
31773215
go z' (Tip kx x) = f z' kx x
31783216
go z' (Bin _ l r) = go (go z' l) r
31793217
{-# INLINE foldlWithKey' #-}
@@ -3185,14 +3223,29 @@ foldlWithKey' f z = \t -> -- Use lambda t to be inlinable with two argument
31853223
-- This can be an asymptotically faster than 'foldrWithKey' or 'foldlWithKey' for some monoids.
31863224
--
31873225
-- @since 0.5.4
3226+
3227+
-- See Note [IntMap folds]
31883228
foldMapWithKey :: Monoid m => (Key -> a -> m) -> IntMap a -> m
3189-
foldMapWithKey f = go
3229+
foldMapWithKey f = \t -> -- Use lambda to be inlinable with two arguments.
3230+
case t of
3231+
Nil -> mempty
3232+
Bin p l r
3233+
#if MIN_VERSION_base(4,11,0)
3234+
| signBranch p -> go r <> go l
3235+
| otherwise -> go l <> go r
3236+
#else
3237+
| signBranch p -> go r `mappend` go l
3238+
| otherwise -> go l `mappend` go r
3239+
#endif
3240+
_ -> go t
31903241
where
3191-
go Nil = mempty
3192-
go (Tip kx x) = f kx x
3193-
go (Bin p l r)
3194-
| signBranch p = go r `mappend` go l
3195-
| otherwise = go l `mappend` go r
3242+
go Nil = error "foldMap.go: Nil"
3243+
go (Tip kx x) = f kx x
3244+
#if MIN_VERSION_base(4,11,0)
3245+
go (Bin _ l r) = go l <> go r
3246+
#else
3247+
go (Bin _ l r) = go l `mappend` go r
3248+
#endif
31963249
{-# INLINE foldMapWithKey #-}
31973250

31983251
{--------------------------------------------------------------------
@@ -4069,3 +4122,40 @@ withEmpty bars = " ":bars
40694122
--
40704123
-- The implementation is defined as a foldl' over the input list, which makes
40714124
-- it a good consumer in list fusion.
4125+
4126+
-- Note [IntMap folds]
4127+
-- ~~~~~~~~~~~~~~~~~~~
4128+
-- Folds on IntMap are defined in a particular way for a few reasons.
4129+
--
4130+
-- foldl' :: (a -> b -> a) -> a -> IntMap b -> a
4131+
-- foldl' f z = \t ->
4132+
-- case t of
4133+
-- Nil -> z
4134+
-- Bin p l r
4135+
-- | signBranch p -> go (go z r) l
4136+
-- | otherwise -> go (go z l) r
4137+
-- _ -> go z t
4138+
-- where
4139+
-- go !_ Nil = error "foldl'.go: Nil"
4140+
-- go z' (Tip _ x) = f z' x
4141+
-- go z' (Bin _ l r) = go (go z' l) r
4142+
-- {-# INLINE foldl' #-}
4143+
--
4144+
-- 1. We first check if the Bin separates negative and positive keys, and fold
4145+
-- over the children accordingly. This check is not inside `go` because it
4146+
-- can only happen at the top level and we don't need to check every Bin.
4147+
-- 2. We also check for Nil at the top level instead of, say, `go z Nil = z`.
4148+
-- That's because `Nil` is also allowed only at the top-level, but more
4149+
-- importantly it allows for better optimizations if the `Nil` branch errors
4150+
-- in `go`. For example, if we have
4151+
-- maximum :: Ord a => IntMap a -> Maybe a
4152+
-- maximum = foldl' (\m x -> Just $! maybe x (max x) m) Nothing
4153+
-- because `go` certainly returns a `Just` (or errors), CPR analysis will
4154+
-- optimize it to return `(# a #)` instead of `Maybe a`. This makes it
4155+
-- satisfy the conditions for SpecConstr, which generates two specializations
4156+
-- of `go` for `Nothing` and `Just` inputs. Now both `Maybe`s have been
4157+
-- optimized out of `go`.
4158+
-- 3. The `Tip` is not matched on at the top-level to avoid using `f` more than
4159+
-- once. This allows `f` to be inlined into `go` even if `f` is big, since
4160+
-- it's likely to be the only place `f` is used, and not inlining `f` means
4161+
-- missing out on optimizations. See GHC #25259 for more on this.

0 commit comments

Comments
 (0)