Skip to content

Commit 41005b5

Browse files
authored
Simplify {Set,Map}.fromDistinct{Asc,Desc}List (#1029)
Uses only the Stack, making FromDistinctMonoState unnecessary. This implementation also allows for quick access to the last element, which may be used in fromAscListWith, mapKeysWith, etc.
1 parent 4af12df commit 41005b5

File tree

3 files changed

+86
-108
lines changed

3 files changed

+86
-108
lines changed

containers/src/Data/Map/Internal.hs

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,12 @@ module Data.Map.Internal (
358358
, link
359359
, link2
360360
, glue
361-
, fromDistinctAscList_linkTop
362-
, fromDistinctAscList_linkAll
363-
, fromDistinctDescList_linkTop
364-
, fromDistinctDescList_linkAll
361+
, ascLinkTop
362+
, ascLinkAll
363+
, descLinkTop
364+
, descLinkAll
365365
, MaybeS(..)
366366
, Identity(..)
367-
, FromDistinctMonoState(..)
368367
, Stack(..)
369368
, foldl'Stack
370369

@@ -3833,28 +3832,25 @@ fromDescListWithKey f xs
38333832
-- > valid (fromDistinctAscList [(3,"b"), (5,"a")]) == True
38343833
-- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False
38353834

3836-
-- For some reason, when 'singleton' is used in fromDistinctAscList or in
3837-
-- create, it is not inlined, so we inline it manually.
3838-
38393835
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
38403836
fromDistinctAscList :: [(k,a)] -> Map k a
3841-
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
3837+
fromDistinctAscList = ascLinkAll . Foldable.foldl' next Nada
38423838
where
3843-
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
3844-
next (State0 stk) (!kx, x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
3845-
next (State1 l stk) (kx, x) = State0 (Push kx x l stk)
3839+
next :: Stack k a -> (k, a) -> Stack k a
3840+
next (Push kx x Tip stk) (!ky, y) = ascLinkTop stk 1 (singleton kx x) ky y
3841+
next stk (!kx, x) = Push kx x Tip stk
38463842
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion
38473843

3848-
fromDistinctAscList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
3849-
fromDistinctAscList_linkTop r@(Bin rsz _ _ _ _) (Push kx x l@(Bin lsz _ _ _ _) stk)
3850-
| rsz == lsz = fromDistinctAscList_linkTop (bin kx x l r) stk
3851-
fromDistinctAscList_linkTop l stk = State1 l stk
3852-
{-# INLINABLE fromDistinctAscList_linkTop #-}
3844+
ascLinkTop :: Stack k a -> Int -> Map k a -> k -> a -> Stack k a
3845+
ascLinkTop (Push kx x l@(Bin lsz _ _ _ _) stk) !rsz r ky y
3846+
| lsz == rsz = ascLinkTop stk sz (Bin sz kx x l r) ky y
3847+
where
3848+
sz = lsz + rsz + 1
3849+
ascLinkTop stk !_ l kx x = Push kx x l stk
38533850

3854-
fromDistinctAscList_linkAll :: FromDistinctMonoState k a -> Map k a
3855-
fromDistinctAscList_linkAll (State0 stk) = foldl'Stack (\r kx x l -> link kx x l r) Tip stk
3856-
fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r kx x l -> link kx x l r) r0 stk
3857-
{-# INLINABLE fromDistinctAscList_linkAll #-}
3851+
ascLinkAll :: Stack k a -> Map k a
3852+
ascLinkAll stk = foldl'Stack (\r kx x l -> link kx x l r) Tip stk
3853+
{-# INLINABLE ascLinkAll #-}
38583854

38593855
-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
38603856
-- /The precondition is not checked./
@@ -3865,32 +3861,26 @@ fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r kx x l -> link kx
38653861
--
38663862
-- @since 0.5.8
38673863

3868-
-- For some reason, when 'singleton' is used in fromDistinctDescList or in
3869-
-- create, it is not inlined, so we inline it manually.
3870-
38713864
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
38723865
fromDistinctDescList :: [(k,a)] -> Map k a
3873-
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
3866+
fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
38743867
where
3875-
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
3876-
next (State0 stk) (!kx, x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
3877-
next (State1 r stk) (kx, x) = State0 (Push kx x r stk)
3868+
next :: Stack k a -> (k, a) -> Stack k a
3869+
next (Push ky y Tip stk) (!kx, x) = descLinkTop kx x 1 (singleton ky y) stk
3870+
next stk (!ky, y) = Push ky y Tip stk
38783871
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion
38793872

3880-
fromDistinctDescList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
3881-
fromDistinctDescList_linkTop l@(Bin lsz _ _ _ _) (Push kx x r@(Bin rsz _ _ _ _) stk)
3882-
| lsz == rsz = fromDistinctDescList_linkTop (bin kx x l r) stk
3883-
fromDistinctDescList_linkTop r stk = State1 r stk
3884-
{-# INLINABLE fromDistinctDescList_linkTop #-}
3885-
3886-
fromDistinctDescList_linkAll :: FromDistinctMonoState k a -> Map k a
3887-
fromDistinctDescList_linkAll (State0 stk) = foldl'Stack (\l kx x r -> link kx x l r) Tip stk
3888-
fromDistinctDescList_linkAll (State1 l0 stk) = foldl'Stack (\l kx x r -> link kx x l r) l0 stk
3889-
{-# INLINABLE fromDistinctDescList_linkAll #-}
3873+
descLinkTop :: k -> a -> Int -> Map k a -> Stack k a -> Stack k a
3874+
descLinkTop kx x !lsz l (Push ky y r@(Bin rsz _ _ _ _) stk)
3875+
| lsz == rsz = descLinkTop kx x sz (Bin sz ky y l r) stk
3876+
where
3877+
sz = lsz + rsz + 1
3878+
descLinkTop ky y !_ r stk = Push ky y r stk
3879+
{-# INLINABLE descLinkTop #-}
38903880

3891-
data FromDistinctMonoState k a
3892-
= State0 !(Stack k a)
3893-
| State1 !(Map k a) !(Stack k a)
3881+
descLinkAll :: Stack k a -> Map k a
3882+
descLinkAll stk = foldl'Stack (\l kx x r -> link kx x l r) Tip stk
3883+
{-# INLINABLE descLinkAll #-}
38943884

38953885
data Stack k a = Push !k a !(Map k a) !(Stack k a) | Nada
38963886

containers/src/Data/Map/Strict/Internal.hs

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,10 @@ import Data.Map.Internal
331331
, filterAMissing
332332
, merge
333333
, mergeA
334-
, fromDistinctAscList_linkTop
335-
, fromDistinctAscList_linkAll
336-
, fromDistinctDescList_linkTop
337-
, fromDistinctDescList_linkAll
338-
, FromDistinctMonoState (..)
334+
, ascLinkTop
335+
, ascLinkAll
336+
, descLinkTop
337+
, descLinkAll
339338
, Stack (..)
340339
, (!)
341340
, (!?)
@@ -1733,16 +1732,13 @@ fromDescListWithKey f xs0 = fromDistinctDescList xs1
17331732
-- > valid (fromDistinctAscList [(3,"b"), (5,"a")]) == True
17341733
-- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False
17351734

1736-
-- For some reason, when 'singleton' is used in fromDistinctAscList or in
1737-
-- create, it is not inlined, so we inline it manually.
1738-
17391735
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
17401736
fromDistinctAscList :: [(k,a)] -> Map k a
1741-
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
1737+
fromDistinctAscList = ascLinkAll . Foldable.foldl' next Nada
17421738
where
1743-
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
1744-
next (State0 stk) (!kx, !x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
1745-
next (State1 l stk) (!kx, !x) = State0 (Push kx x l stk)
1739+
next :: Stack k a -> (k, a) -> Stack k a
1740+
next (Push kx x Tip stk) (!ky, !y) = ascLinkTop stk 1 (singleton kx x) ky y
1741+
next stk (!kx, !x) = Push kx x Tip stk
17461742
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion
17471743

17481744
-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
@@ -1752,14 +1748,11 @@ fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0
17521748
-- > valid (fromDistinctDescList [(5,"a"), (3,"b")]) == True
17531749
-- > valid (fromDistinctDescList [(5,"a"), (3,"b"), (3,"a")]) == False
17541750

1755-
-- For some reason, when 'singleton' is used in fromDistinctDescList or in
1756-
-- create, it is not inlined, so we inline it manually.
1757-
17581751
-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
17591752
fromDistinctDescList :: [(k,a)] -> Map k a
1760-
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
1753+
fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
17611754
where
1762-
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
1763-
next (State0 stk) (!kx, !x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
1764-
next (State1 r stk) (!kx, !x) = State0 (Push kx x r stk)
1755+
next :: Stack k a -> (k, a) -> Stack k a
1756+
next (Push ky y Tip stk) (!kx, !x) = descLinkTop kx x 1 (singleton ky y) stk
1757+
next stk (!ky, !y) = Push ky y Tip stk
17651758
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

containers/src/Data/Set/Internal.hs

Lines changed: 43 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,60 +1208,50 @@ combineEq (x : xs) = combineEq' x xs
12081208
-- | \(O(n)\). Build a set from an ascending list of distinct elements in linear time.
12091209
-- /The precondition (input list is strictly ascending) is not checked./
12101210

1211-
-- For some reason, when 'singleton' is used in fromDistinctAscList or in
1212-
-- create, it is not inlined, so we inline it manually.
1213-
12141211
-- See Note [fromDistinctAscList implementation]
12151212
fromDistinctAscList :: [a] -> Set a
1216-
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
1213+
fromDistinctAscList = ascLinkAll . Foldable.foldl' next Nada
12171214
where
1218-
next :: FromDistinctMonoState a -> a -> FromDistinctMonoState a
1219-
next (State0 stk) !x = fromDistinctAscList_linkTop (Bin 1 x Tip Tip) stk
1220-
next (State1 l stk) x = State0 (Push x l stk)
1215+
next :: Stack a -> a -> Stack a
1216+
next (Push x Tip stk) !y = ascLinkTop stk 1 (singleton x) y
1217+
next stk !x = Push x Tip stk
12211218
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion
12221219

1223-
fromDistinctAscList_linkTop :: Set a -> Stack a -> FromDistinctMonoState a
1224-
fromDistinctAscList_linkTop r@(Bin rsz _ _ _) (Push x l@(Bin lsz _ _ _) stk)
1225-
| rsz == lsz = fromDistinctAscList_linkTop (bin x l r) stk
1226-
fromDistinctAscList_linkTop l stk = State1 l stk
1227-
{-# INLINABLE fromDistinctAscList_linkTop #-}
1220+
ascLinkTop :: Stack a -> Int -> Set a -> a -> Stack a
1221+
ascLinkTop (Push x l@(Bin lsz _ _ _) stk) !rsz r y
1222+
| lsz == rsz = ascLinkTop stk sz (Bin sz x l r) y
1223+
where
1224+
sz = lsz + rsz + 1
1225+
ascLinkTop stk !_ r y = Push y r stk
12281226

1229-
fromDistinctAscList_linkAll :: FromDistinctMonoState a -> Set a
1230-
fromDistinctAscList_linkAll (State0 stk) = foldl'Stack (\r x l -> link x l r) Tip stk
1231-
fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r x l -> link x l r) r0 stk
1232-
{-# INLINABLE fromDistinctAscList_linkAll #-}
1227+
ascLinkAll :: Stack a -> Set a
1228+
ascLinkAll stk = foldl'Stack (\r x l -> link x l r) Tip stk
1229+
{-# INLINABLE ascLinkAll #-}
12331230

12341231
-- | \(O(n)\). Build a set from a descending list of distinct elements in linear time.
12351232
-- /The precondition (input list is strictly descending) is not checked./
12361233
--
12371234
-- @since 0.5.8
12381235

1239-
-- For some reason, when 'singleton' is used in fromDistinctDescList or in
1240-
-- create, it is not inlined, so we inline it manually.
1241-
12421236
-- See Note [fromDistinctAscList implementation]
12431237
fromDistinctDescList :: [a] -> Set a
1244-
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
1238+
fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
12451239
where
1246-
next :: FromDistinctMonoState a -> a -> FromDistinctMonoState a
1247-
next (State0 stk) !x = fromDistinctDescList_linkTop (Bin 1 x Tip Tip) stk
1248-
next (State1 r stk) x = State0 (Push x r stk)
1240+
next :: Stack a -> a -> Stack a
1241+
next (Push y Tip stk) !x = descLinkTop x 1 (singleton y) stk
1242+
next stk !y = Push y Tip stk
12491243
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion
12501244

1251-
fromDistinctDescList_linkTop :: Set a -> Stack a -> FromDistinctMonoState a
1252-
fromDistinctDescList_linkTop l@(Bin lsz _ _ _) (Push x r@(Bin rsz _ _ _) stk)
1253-
| lsz == rsz = fromDistinctDescList_linkTop (bin x l r) stk
1254-
fromDistinctDescList_linkTop r stk = State1 r stk
1255-
{-# INLINABLE fromDistinctDescList_linkTop #-}
1256-
1257-
fromDistinctDescList_linkAll :: FromDistinctMonoState a -> Set a
1258-
fromDistinctDescList_linkAll (State0 stk) = foldl'Stack (\l x r -> link x l r) Tip stk
1259-
fromDistinctDescList_linkAll (State1 l0 stk) = foldl'Stack (\l x r -> link x l r) l0 stk
1260-
{-# INLINABLE fromDistinctDescList_linkAll #-}
1245+
descLinkTop :: a -> Int -> Set a -> Stack a -> Stack a
1246+
descLinkTop x !lsz l (Push y r@(Bin rsz _ _ _) stk)
1247+
| lsz == rsz = descLinkTop x sz (Bin sz y l r) stk
1248+
where
1249+
sz = lsz + rsz + 1
1250+
descLinkTop y !_ r stk = Push y r stk
12611251

1262-
data FromDistinctMonoState a
1263-
= State0 !(Stack a)
1264-
| State1 !(Set a) !(Stack a)
1252+
descLinkAll :: Stack a -> Set a
1253+
descLinkAll stk = foldl'Stack (\l x r -> link x l r) Tip stk
1254+
{-# INLINABLE descLinkAll #-}
12651255

12661256
data Stack a = Push !a !(Set a) !(Stack a) | Nada
12671257

@@ -2183,24 +2173,29 @@ validsize t
21832173
-- fromDistinctAscList is implemented by building up perfectly balanced trees
21842174
-- while we consume elements from the list one by one. A stack of
21852175
-- (root, perfectly balanced left branch) pairs is maintained, in increasing
2186-
-- order of size from top to bottom.
2187-
--
2188-
-- When we get an element from the list, we attempt to link it as the right
2189-
-- branch with the top (root, perfect left branch) of the stack to create a new
2190-
-- perfect tree. We can only do this if the left branch has size 1. If we link
2191-
-- it, we get a perfect tree of size 3. We repeat this process, merging with the
2192-
-- top of the stack as long as the sizes match. When we can't link any more, the
2193-
-- perfect tree we built so far is a potential left branch. The next element
2194-
-- we find becomes the root, and we push this new (root, left branch) on the
2195-
-- stack.
2176+
-- order of size from top to bottom. The stack reflects the binary
2177+
-- representation of the total number of elements in it, with every level having
2178+
-- a power of 2 number of elements.
2179+
--
2180+
-- When we get an element from the list, we check the (root, left branch) at the
2181+
-- top of the stack.
2182+
-- If the tree there is not empty, we push the element with an empty left child
2183+
-- on the stack.
2184+
-- If the tree is empty, the root is packed into a singleton tree to act as a
2185+
-- right branch for trees higher up the stack. It is linked with left branches
2186+
-- in the stack, but only when they have equal size. This preserves the
2187+
-- perfectly balanced property. When there is a size mismatch, the tree is
2188+
-- too small to link. It is pushed on the stack as a left branch with the new
2189+
-- element as root, awaiting a right branch which will make it large enough to
2190+
-- be linked further.
21962191
--
21972192
-- When we are out of elements, we link the (root, left branch)s in the stack
21982193
-- top to bottom to get the final tree.
21992194
--
22002195
-- How long does this take? We do O(1) work per element excluding the links.
22012196
-- Over n elements, we build trees with at most n nodes total, and each link is
2202-
-- done in O(1) using `bin`. The final linking of the stack is done in O(log n)
2203-
-- using `link` (proof below). The total time is thus O(n).
2197+
-- done in O(1) using `Bin`. The final linking of the stack is done in O(log n)
2198+
-- using `link` (proof below). The total time is thus O(n).
22042199
--
22052200
-- Additionally, the implemention is written using foldl' over the input list,
22062201
-- which makes it participate as a good consumer in list fusion.

0 commit comments

Comments
 (0)