Skip to content

Commit 48196fb

Browse files
authored
Improve {Set,Map}.fromDistinct{Asc,Desc}List (#950)
* Benchmarks for {Set,Map}.fromDistinct{Asc,Desc}List * Improve {Set,Map}.fromDistinct{Asc,Desc}List A faster and fusion-friendly implemention of the current strategy. On GHC 9.2.5: For Set this takes 56% less time when there is fusion and 30% when not. For Map this takes 55% less time when there is fusion and 16% when not. * Remove a now inaccurate claim in {Set,Map}.fromList * Add a note explaining the implementation * Pull out linkTop and linkAll helpers as INLINABLE And leave further optimization to GHC.
1 parent f6e81f2 commit 48196fb

File tree

7 files changed

+226
-105
lines changed

7 files changed

+226
-105
lines changed

containers-tests/benchmarks/Map.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ main = do
2121
m_even = M.fromAscList elems_even :: M.Map Int Int
2222
m_odd = M.fromAscList elems_odd :: M.Map Int Int
2323
evaluate $ rnf [m, m_even, m_odd]
24+
evaluate $ rnf elems_rev
2425
defaultMain
2526
[ bench "lookup absent" $ whnf (lookup evens) m_odd
2627
, bench "lookup present" $ whnf (lookup evens) m_even
@@ -90,13 +91,17 @@ main = do
9091
, bench "fromList-desc" $ whnf M.fromList (reverse elems)
9192
, bench "fromAscList" $ whnf M.fromAscList elems
9293
, bench "fromDistinctAscList" $ whnf M.fromDistinctAscList elems
94+
, bench "fromDistinctAscList:fusion" $ whnf (\n -> M.fromDistinctAscList [(i,i) | i <- [1..n]]) bound
95+
, bench "fromDistinctDescList" $ whnf M.fromDistinctDescList elems_rev
96+
, bench "fromDistinctDescList:fusion" $ whnf (\n -> M.fromDistinctDescList [(i,i) | i <- [n,n-1..1]]) bound
9397
, bench "minView" $ whnf (\m' -> case M.minViewWithKey m' of {Nothing -> 0; Just ((k,v),m'') -> k+v+M.size m''}) (M.fromAscList $ zip [1..10::Int] [100..110::Int])
9498
]
9599
where
96100
bound = 2^12
97101
elems = zip keys values
98102
elems_even = zip evens evens
99103
elems_odd = zip odds odds
104+
elems_rev = reverse elems
100105
keys = [1..bound]
101106
evens = [2,4..bound]
102107
odds = [1,3..bound]

containers-tests/benchmarks/Set.hs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ main = do
1414
s_odd = S.fromAscList elems_odd :: S.Set Int
1515
strings_s = S.fromList strings
1616
evaluate $ rnf [s, s_even, s_odd]
17+
evaluate $ rnf elems_rev
1718
defaultMain
1819
[ bench "member" $ whnf (member elems) s
1920
, bench "insert" $ whnf (ins elems) S.empty
@@ -34,6 +35,9 @@ main = do
3435
, bench "fromList-desc" $ whnf S.fromList (reverse elems)
3536
, bench "fromAscList" $ whnf S.fromAscList elems
3637
, bench "fromDistinctAscList" $ whnf S.fromDistinctAscList elems
38+
, bench "fromDistinctAscList:fusion" $ whnf (\n -> S.fromDistinctAscList [1..n]) bound
39+
, bench "fromDistinctDescList" $ whnf S.fromDistinctDescList elems_rev
40+
, bench "fromDistinctDescList:fusion" $ whnf (\n -> S.fromDistinctDescList [n,n-1..1]) bound
3741
, bench "disjoint:false" $ whnf (S.disjoint s) s_even
3842
, bench "disjoint:true" $ whnf (S.disjoint s_odd) s_even
3943
, bench "null.intersection:false" $ whnf (S.null. S.intersection s) s_even
@@ -53,9 +57,11 @@ main = do
5357
, bench "member.powerSet (18)" $ whnf (\ s -> all (flip S.member s) s) (S.powerSet (S.fromList [1..18]))
5458
]
5559
where
56-
elems = [1..2^12]
57-
elems_even = [2,4..2^12]
58-
elems_odd = [1,3..2^12]
60+
bound = 2^12
61+
elems = [1..bound]
62+
elems_even = [2,4..bound]
63+
elems_odd = [1,3..bound]
64+
elems_rev = reverse elems
5965
strings = map show elems
6066

6167
member :: [Int] -> S.Set Int -> Int

containers-tests/tests/map-properties.hs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ main = defaultMain $ testGroup "map-properties"
187187
, testProperty "mergeWithKey model" prop_mergeWithKeyModel
188188
, testProperty "mergeA effects" prop_mergeA_effects
189189
, testProperty "fromAscList" prop_ordered
190+
, testProperty "fromDistinctAscList" prop_fromDistinctAscList
190191
, testProperty "fromDescList" prop_rev_ordered
191192
, testProperty "fromDistinctDescList" prop_fromDistinctDescList
192193
, testProperty "fromList then toList" prop_list
@@ -1243,10 +1244,13 @@ prop_list xs = (sort (nub xs) == [x | (x,()) <- toList (fromList [(x,()) | x <-
12431244
prop_descList :: [Int] -> Bool
12441245
prop_descList xs = (reverse (sort (nub xs)) == [x | (x,()) <- toDescList (fromList [(x,()) | x <- xs])])
12451246

1246-
prop_fromDistinctDescList :: Int -> [A] -> Property
1247-
prop_fromDistinctDescList top lst = valid converted .&&. (toList converted === reverse original) where
1248-
original = zip [top, (top-1)..0] lst
1249-
converted = fromDistinctDescList original
1247+
prop_fromDistinctDescList :: [(Int, A)] -> Property
1248+
prop_fromDistinctDescList xs =
1249+
valid t .&&.
1250+
toList t === nub_sort_xs
1251+
where
1252+
t = fromDistinctDescList (reverse nub_sort_xs)
1253+
nub_sort_xs = List.map List.head $ List.groupBy ((==) `on` fst) $ List.sortBy (comparing fst) xs
12501254

12511255
prop_ascDescList :: [Int] -> Bool
12521256
prop_ascDescList xs = toAscList m == reverse (toDescList m)
@@ -1256,10 +1260,16 @@ prop_fromList :: [Int] -> Bool
12561260
prop_fromList xs
12571261
= case fromList (zip xs xs) of
12581262
t -> t == fromAscList (zip sort_xs sort_xs) &&
1259-
t == fromDistinctAscList (zip nub_sort_xs nub_sort_xs) &&
12601263
t == List.foldr (uncurry insert) empty (zip xs xs)
12611264
where sort_xs = sort xs
1262-
nub_sort_xs = List.map List.head $ List.group sort_xs
1265+
1266+
prop_fromDistinctAscList :: [(Int, A)] -> Property
1267+
prop_fromDistinctAscList xs =
1268+
valid t .&&.
1269+
toList t === nub_sort_xs
1270+
where
1271+
t = fromDistinctAscList nub_sort_xs
1272+
nub_sort_xs = List.map List.head $ List.groupBy ((==) `on` fst) $ List.sortBy (comparing fst) xs
12631273

12641274
----------------------------------------------------------------
12651275

containers-tests/tests/set-properties.hs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ main = defaultMain $ testGroup "set-properties"
6767
, testProperty "prop_DescList" prop_DescList
6868
, testProperty "prop_AscDescList" prop_AscDescList
6969
, testProperty "prop_fromList" prop_fromList
70+
, testProperty "prop_fromDistinctAscList" prop_fromDistinctAscList
7071
, testProperty "prop_fromListDesc" prop_fromListDesc
72+
, testProperty "prop_fromDistinctDescList" prop_fromDistinctDescList
7173
, testProperty "prop_isProperSubsetOf" prop_isProperSubsetOf
7274
, testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2
7375
, testProperty "prop_isSubsetOf" prop_isSubsetOf
@@ -514,11 +516,17 @@ prop_AscDescList xs = toAscList s == reverse (toDescList s)
514516
prop_fromList :: [Int] -> Property
515517
prop_fromList xs =
516518
t === fromAscList sort_xs .&&.
517-
t === fromDistinctAscList nub_sort_xs .&&.
518519
t === List.foldr insert empty xs
519520
where t = fromList xs
520521
sort_xs = sort xs
521-
nub_sort_xs = List.map List.head $ List.group sort_xs
522+
523+
prop_fromDistinctAscList :: [Int] -> Property
524+
prop_fromDistinctAscList xs =
525+
valid t .&&.
526+
toList t === nub_sort_xs
527+
where
528+
t = fromDistinctAscList nub_sort_xs
529+
nub_sort_xs = List.map List.head $ List.group $ sort xs
522530

523531
prop_fromListDesc :: [Int] -> Property
524532
prop_fromListDesc xs =
@@ -529,6 +537,14 @@ prop_fromListDesc xs =
529537
sort_xs = reverse (sort xs)
530538
nub_sort_xs = List.map List.head $ List.group sort_xs
531539

540+
prop_fromDistinctDescList :: [Int] -> Property
541+
prop_fromDistinctDescList xs =
542+
valid t .&&.
543+
toList t === nub_sort_xs
544+
where
545+
t = fromDistinctDescList (reverse nub_sort_xs)
546+
nub_sort_xs = List.map List.head $ List.group $ sort xs
547+
532548
{--------------------------------------------------------------------
533549
Set operations are like IntSet operations
534550
--------------------------------------------------------------------}

containers/src/Data/Map/Internal.hs

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,15 @@ module Data.Map.Internal (
355355
, link
356356
, link2
357357
, glue
358+
, fromDistinctAscList_linkTop
359+
, fromDistinctAscList_linkAll
360+
, fromDistinctDescList_linkTop
361+
, fromDistinctDescList_linkAll
358362
, MaybeS(..)
359363
, Identity(..)
364+
, FromDistinctMonoState(..)
365+
, Stack(..)
366+
, foldl'Stack
360367

361368
-- Used by Map.Merge.Lazy
362369
, mapWhenMissing
@@ -3410,8 +3417,7 @@ instance (Ord k) => GHCExts.IsList (Map k v) where
34103417
-- If the list contains more than one value for the same key, the last value
34113418
-- for the key is retained.
34123419
--
3413-
-- If the keys of the list are ordered, linear-time implementation is used,
3414-
-- with the performance equal to 'fromDistinctAscList'.
3420+
-- If the keys of the list are ordered, a linear-time implementation is used.
34153421
--
34163422
-- > fromList [] == empty
34173423
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
@@ -3701,22 +3707,26 @@ fromDescListWithKey f xs
37013707

37023708
-- For some reason, when 'singleton' is used in fromDistinctAscList or in
37033709
-- create, it is not inlined, so we inline it manually.
3710+
3711+
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
37043712
fromDistinctAscList :: [(k,a)] -> Map k a
3705-
fromDistinctAscList [] = Tip
3706-
fromDistinctAscList ((kx0, x0) : xs0) = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
3713+
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
37073714
where
3708-
go !_ t [] = t
3709-
go s l ((kx, x) : xs) = case create s xs of
3710-
(r :*: ys) -> let !t' = link kx x l r
3711-
in go (s `shiftL` 1) t' ys
3712-
3713-
create !_ [] = (Tip :*: [])
3714-
create s xs@(x' : xs')
3715-
| s == 1 = case x' of (kx, x) -> (Bin 1 kx x Tip Tip :*: xs')
3716-
| otherwise = case create (s `shiftR` 1) xs of
3717-
res@(_ :*: []) -> res
3718-
(l :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
3719-
(r :*: zs) -> (link ky y l r :*: zs)
3715+
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
3716+
next (State0 stk) (!kx, x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
3717+
next (State1 l stk) (kx, x) = State0 (Push kx x l stk)
3718+
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion
3719+
3720+
fromDistinctAscList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
3721+
fromDistinctAscList_linkTop r@(Bin rsz _ _ _ _) (Push kx x l@(Bin lsz _ _ _ _) stk)
3722+
| rsz == lsz = fromDistinctAscList_linkTop (bin kx x l r) stk
3723+
fromDistinctAscList_linkTop l stk = State1 l stk
3724+
{-# INLINABLE fromDistinctAscList_linkTop #-}
3725+
3726+
fromDistinctAscList_linkAll :: FromDistinctMonoState k a -> Map k a
3727+
fromDistinctAscList_linkAll (State0 stk) = foldl'Stack (\r kx x l -> link kx x l r) Tip stk
3728+
fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r kx x l -> link kx x l r) r0 stk
3729+
{-# INLINABLE fromDistinctAscList_linkAll #-}
37203730

37213731
-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
37223732
-- /The precondition is not checked./
@@ -3729,22 +3739,39 @@ fromDistinctAscList ((kx0, x0) : xs0) = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
37293739

37303740
-- For some reason, when 'singleton' is used in fromDistinctDescList or in
37313741
-- create, it is not inlined, so we inline it manually.
3742+
3743+
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
37323744
fromDistinctDescList :: [(k,a)] -> Map k a
3733-
fromDistinctDescList [] = Tip
3734-
fromDistinctDescList ((kx0, x0) : xs0) = go (1 :: Int) (Bin 1 kx0 x0 Tip Tip) xs0
3745+
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
3746+
where
3747+
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
3748+
next (State0 stk) (!kx, x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
3749+
next (State1 r stk) (kx, x) = State0 (Push kx x r stk)
3750+
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion
3751+
3752+
fromDistinctDescList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
3753+
fromDistinctDescList_linkTop l@(Bin lsz _ _ _ _) (Push kx x r@(Bin rsz _ _ _ _) stk)
3754+
| lsz == rsz = fromDistinctDescList_linkTop (bin kx x l r) stk
3755+
fromDistinctDescList_linkTop r stk = State1 r stk
3756+
{-# INLINABLE fromDistinctDescList_linkTop #-}
3757+
3758+
fromDistinctDescList_linkAll :: FromDistinctMonoState k a -> Map k a
3759+
fromDistinctDescList_linkAll (State0 stk) = foldl'Stack (\l kx x r -> link kx x l r) Tip stk
3760+
fromDistinctDescList_linkAll (State1 l0 stk) = foldl'Stack (\l kx x r -> link kx x l r) l0 stk
3761+
{-# INLINABLE fromDistinctDescList_linkAll #-}
3762+
3763+
data FromDistinctMonoState k a
3764+
= State0 !(Stack k a)
3765+
| State1 !(Map k a) !(Stack k a)
3766+
3767+
data Stack k a = Push !k a !(Map k a) !(Stack k a) | Nada
3768+
3769+
foldl'Stack :: (b -> k -> a -> Map k a -> b) -> b -> Stack k a -> b
3770+
foldl'Stack f = go
37353771
where
3736-
go !_ t [] = t
3737-
go s r ((kx, x) : xs) = case create s xs of
3738-
(l :*: ys) -> let !t' = link kx x l r
3739-
in go (s `shiftL` 1) t' ys
3740-
3741-
create !_ [] = (Tip :*: [])
3742-
create s xs@(x' : xs')
3743-
| s == 1 = case x' of (kx, x) -> (Bin 1 kx x Tip Tip :*: xs')
3744-
| otherwise = case create (s `shiftR` 1) xs of
3745-
res@(_ :*: []) -> res
3746-
(r :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
3747-
(l :*: zs) -> (link ky y l r :*: zs)
3772+
go !z Nada = z
3773+
go z (Push kx x t stk) = go (f z kx x t) stk
3774+
{-# INLINE foldl'Stack #-}
37483775

37493776
{-
37503777
-- Functions very similar to these were used to implement

containers/src/Data/Map/Strict/Internal.hs

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,12 @@ import Data.Map.Internal
328328
, filterAMissing
329329
, merge
330330
, mergeA
331+
, fromDistinctAscList_linkTop
332+
, fromDistinctAscList_linkAll
333+
, fromDistinctDescList_linkTop
334+
, fromDistinctDescList_linkAll
335+
, FromDistinctMonoState (..)
336+
, Stack (..)
331337
, (!)
332338
, (!?)
333339
, (\\)
@@ -1489,8 +1495,7 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA
14891495
-- If the list contains more than one value for the same key, the last value
14901496
-- for the key is retained.
14911497
--
1492-
-- If the keys of the list are ordered, linear-time implementation is used,
1493-
-- with the performance equal to 'fromDistinctAscList'.
1498+
-- If the keys of the list are ordered, a linear-time implementation is used.
14941499
--
14951500
-- > fromList [] == empty
14961501
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
@@ -1697,23 +1702,15 @@ fromDescListWithKey f xs
16971702

16981703
-- For some reason, when 'singleton' is used in fromDistinctAscList or in
16991704
-- create, it is not inlined, so we inline it manually.
1705+
1706+
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
17001707
fromDistinctAscList :: [(k,a)] -> Map k a
1701-
fromDistinctAscList [] = Tip
1702-
fromDistinctAscList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
1708+
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
17031709
where
1704-
go !_ t [] = t
1705-
go s l ((kx, x) : xs) =
1706-
case create s xs of
1707-
(r :*: ys) -> x `seq` let !t' = link kx x l r
1708-
in go (s `shiftL` 1) t' ys
1709-
1710-
create !_ [] = (Tip :*: [])
1711-
create s xs@(x' : xs')
1712-
| s == 1 = case x' of (kx, x) -> x `seq` (Bin 1 kx x Tip Tip :*: xs')
1713-
| otherwise = case create (s `shiftR` 1) xs of
1714-
res@(_ :*: []) -> res
1715-
(l :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
1716-
(r :*: zs) -> y `seq` (link ky y l r :*: zs)
1710+
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
1711+
next (State0 stk) (!kx, !x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
1712+
next (State1 l stk) (kx, x) = State0 (Push kx x l stk)
1713+
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion
17171714

17181715
-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
17191716
-- /The precondition is not checked./
@@ -1724,20 +1721,12 @@ fromDistinctAscList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip T
17241721

17251722
-- For some reason, when 'singleton' is used in fromDistinctDescList or in
17261723
-- create, it is not inlined, so we inline it manually.
1724+
1725+
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
17271726
fromDistinctDescList :: [(k,a)] -> Map k a
1728-
fromDistinctDescList [] = Tip
1729-
fromDistinctDescList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
1727+
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
17301728
where
1731-
go !_ t [] = t
1732-
go s r ((kx, x) : xs) =
1733-
case create s xs of
1734-
(l :*: ys) -> x `seq` let !t' = link kx x l r
1735-
in go (s `shiftL` 1) t' ys
1736-
1737-
create !_ [] = (Tip :*: [])
1738-
create s xs@(x' : xs')
1739-
| s == 1 = case x' of (kx, x) -> x `seq` (Bin 1 kx x Tip Tip :*: xs')
1740-
| otherwise = case create (s `shiftR` 1) xs of
1741-
res@(_ :*: []) -> res
1742-
(r :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
1743-
(l :*: zs) -> y `seq` (link ky y l r :*: zs)
1729+
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
1730+
next (State0 stk) (!kx, !x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
1731+
next (State1 r stk) (kx, x) = State0 (Push kx x r stk)
1732+
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

0 commit comments

Comments
 (0)