Skip to content

Commit d5cbecf

Browse files
authored
Inline the common case of balance functions (#1056)
For Set and Map, inline the common case of balance, balanceL, balanceR, as explained in the "Inlining balance" note. A decrease in running time of 10-30% is seen in benchmarks for insert, delete, union, and others.
1 parent 9cc03be commit d5cbecf

File tree

4 files changed

+71
-29
lines changed

4 files changed

+71
-29
lines changed

containers-tests/containers-tests.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ common benchmark-deps
5858
build-depends:
5959
containers-tests
6060
, deepseq >=1.1.0.0 && <1.6
61-
, tasty-bench >=0.3.1 && <0.4
61+
, tasty-bench >=0.3.1 && <0.5
6262

6363
-- Flags recommended by tasty-bench
6464
if impl(ghc >= 8.6)

containers/changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@
6666
* Improved performance for `Data.Set`'s `fromList`, `map` and `Data.Map`'s
6767
`fromList`, `fromListWith`, `fromListWithKey`, `mapKeys`, `mapKeysWith`.
6868

69+
* Improved performance for many `Set` and `Map` modification operations,
70+
including `insert` and `delete`, by inlining part of the balancing
71+
routine. (Soumik Sarkar)
72+
6973
## Unreleased with `@since` annotation for 0.7.1:
7074

7175
### Additions

containers/src/Data/Map/Internal.hs

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4190,7 +4190,14 @@ ratio = 2
41904190
-- It is only written in such a way that every node is pattern-matched only once.
41914191

41924192
balance :: k -> a -> Map k a -> Map k a -> Map k a
4193-
balance k x l r = case l of
4193+
balance k x l r = case (l, r) of
4194+
(Bin ls _ _ _ _, Bin rs _ _ _ _)
4195+
| rs <= delta*ls && ls <= delta*rs -> Bin (1+ls+rs) k x l r
4196+
_ -> balance_ k x l r
4197+
{-# INLINE balance #-} -- See Note [Inlining balance] in Data.Set.Internal
4198+
4199+
balance_ :: k -> a -> Map k a -> Map k a -> Map k a
4200+
balance_ k x l r = case l of
41944201
Tip -> case r of
41954202
Tip -> Bin 1 k x Tip Tip
41964203
(Bin _ _ _ Tip Tip) -> Bin 2 k x Tip r
@@ -4214,13 +4221,12 @@ balance k x l r = case l of
42144221
| rls < ratio*rrs -> Bin (1+ls+rs) rk rx (Bin (1+ls+rls) k x l rl) rr
42154222
| otherwise -> Bin (1+ls+rs) rlk rlx (Bin (1+ls+size rll) k x l rll) (Bin (1+rrs+size rlr) rk rx rlr rr)
42164223
(_, _) -> error "Failure in Data.Map.balance"
4217-
| ls > delta*rs -> case (ll, lr) of
4224+
| {- ls > delta*rs -} otherwise -> case (ll, lr) of
42184225
(Bin lls _ _ _ _, Bin lrs lrk lrx lrl lrr)
42194226
| lrs < ratio*lls -> Bin (1+ls+rs) lk lx ll (Bin (1+rs+lrs) k x lr r)
42204227
| otherwise -> Bin (1+ls+rs) lrk lrx (Bin (1+lls+size lrl) lk lx ll lrl) (Bin (1+rs+size lrr) k x lrr r)
42214228
(_, _) -> error "Failure in Data.Map.balance"
4222-
| otherwise -> Bin (1+ls+rs) k x l r
4223-
{-# NOINLINE balance #-}
4229+
{-# NOINLINE balance_ #-}
42244230

42254231
-- Functions balanceL and balanceR are specialised versions of balance.
42264232
-- balanceL only checks whether the left subtree is too big,
@@ -4229,7 +4235,14 @@ balance k x l r = case l of
42294235
-- balanceL is called when left subtree might have been inserted to or when
42304236
-- right subtree might have been deleted from.
42314237
balanceL :: k -> a -> Map k a -> Map k a -> Map k a
4232-
balanceL k x l r = case r of
4238+
balanceL k x l r = case (l, r) of
4239+
(Bin ls _ _ _ _, Bin rs _ _ _ _)
4240+
| ls <= delta*rs -> Bin (1+ls+rs) k x l r
4241+
_ -> balanceL_ k x l r
4242+
{-# INLINE balanceL #-} -- See Note [Inlining balance] in Data.Set.Internal
4243+
4244+
balanceL_ :: k -> a -> Map k a -> Map k a -> Map k a
4245+
balanceL_ k x l r = case r of
42334246
Tip -> case l of
42344247
Tip -> Bin 1 k x Tip Tip
42354248
(Bin _ _ _ Tip Tip) -> Bin 2 k x l Tip
@@ -4242,19 +4255,24 @@ balanceL k x l r = case r of
42424255
(Bin rs _ _ _ _) -> case l of
42434256
Tip -> Bin (1+rs) k x Tip r
42444257

4245-
(Bin ls lk lx ll lr)
4246-
| ls > delta*rs -> case (ll, lr) of
4258+
(Bin ls lk lx ll lr) -> case (ll, lr) of
42474259
(Bin lls _ _ _ _, Bin lrs lrk lrx lrl lrr)
42484260
| lrs < ratio*lls -> Bin (1+ls+rs) lk lx ll (Bin (1+rs+lrs) k x lr r)
42494261
| otherwise -> Bin (1+ls+rs) lrk lrx (Bin (1+lls+size lrl) lk lx ll lrl) (Bin (1+rs+size lrr) k x lrr r)
4250-
(_, _) -> error "Failure in Data.Map.balanceL"
4251-
| otherwise -> Bin (1+ls+rs) k x l r
4252-
{-# NOINLINE balanceL #-}
4262+
(_, _) -> error "Failure in Data.Map.balanceL_"
4263+
{-# NOINLINE balanceL_ #-}
42534264

42544265
-- balanceR is called when right subtree might have been inserted to or when
42554266
-- left subtree might have been deleted from.
42564267
balanceR :: k -> a -> Map k a -> Map k a -> Map k a
4257-
balanceR k x l r = case l of
4268+
balanceR k x l r = case (l, r) of
4269+
(Bin ls _ _ _ _, Bin rs _ _ _ _)
4270+
| rs <= delta*ls -> Bin (1+ls+rs) k x l r
4271+
_ -> balanceR_ k x l r
4272+
{-# INLINE balanceR #-} -- See Note [Inlining balance] in Data.Set.Internal
4273+
4274+
balanceR_ :: k -> a -> Map k a -> Map k a -> Map k a
4275+
balanceR_ k x l r = case l of
42584276
Tip -> case r of
42594277
Tip -> Bin 1 k x Tip Tip
42604278
(Bin _ _ _ Tip Tip) -> Bin 2 k x Tip r
@@ -4267,14 +4285,12 @@ balanceR k x l r = case l of
42674285
(Bin ls _ _ _ _) -> case r of
42684286
Tip -> Bin (1+ls) k x l Tip
42694287

4270-
(Bin rs rk rx rl rr)
4271-
| rs > delta*ls -> case (rl, rr) of
4288+
(Bin rs rk rx rl rr) -> case (rl, rr) of
42724289
(Bin rls rlk rlx rll rlr, Bin rrs _ _ _ _)
42734290
| rls < ratio*rrs -> Bin (1+ls+rs) rk rx (Bin (1+ls+rls) k x l rl) rr
42744291
| otherwise -> Bin (1+ls+rs) rlk rlx (Bin (1+ls+size rll) k x l rll) (Bin (1+rrs+size rlr) rk rx rlr rr)
4275-
(_, _) -> error "Failure in Data.Map.balanceR"
4276-
| otherwise -> Bin (1+ls+rs) k x l r
4277-
{-# NOINLINE balanceR #-}
4292+
(_, _) -> error "Failure in Data.Map.balanceR_"
4293+
{-# NOINLINE balanceR_ #-}
42784294

42794295

42804296
{--------------------------------------------------------------------

containers/src/Data/Set/Internal.hs

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,10 +1866,29 @@ ratio = 2
18661866
-- balanceL only checks whether the left subtree is too big,
18671867
-- balanceR only checks whether the right subtree is too big.
18681868

1869+
-- Note [Inlining balance]
1870+
-- ~~~~~~~~~~~~~~~~~~~~~~~
1871+
-- Benchmarks show that we benefit from inlining balanceL and balanceR, but
1872+
-- we don't want to cause code bloat from inlining these large functions.
1873+
-- As a compromise, we inline only one case: that of two Bins already balanced
1874+
-- with respect to each other.
1875+
--
1876+
-- This is the most common case for typical scenarios. For instance, for n
1877+
-- inserts there may be O(n log n) calls to balanceL/balanceR but at most O(n)
1878+
-- of them actually require rebalancing. So, inlining this common case provides
1879+
-- most of the potential benefits of inlining the full function.
1880+
18691881
-- balanceL is called when left subtree might have been inserted to or when
18701882
-- right subtree might have been deleted from.
18711883
balanceL :: a -> Set a -> Set a -> Set a
1872-
balanceL x l r = case r of
1884+
balanceL x l r = case (l, r) of
1885+
(Bin ls _ _ _, Bin rs _ _ _)
1886+
| ls <= delta*rs -> Bin (1+ls+rs) x l r
1887+
_ -> balanceL_ x l r
1888+
{-# INLINE balanceL #-} -- See Note [Inlining balance]
1889+
1890+
balanceL_ :: a -> Set a -> Set a -> Set a
1891+
balanceL_ x l r = case r of
18731892
Tip -> case l of
18741893
Tip -> Bin 1 x Tip Tip
18751894
(Bin _ _ Tip Tip) -> Bin 2 x l Tip
@@ -1882,19 +1901,24 @@ balanceL x l r = case r of
18821901
(Bin rs _ _ _) -> case l of
18831902
Tip -> Bin (1+rs) x Tip r
18841903

1885-
(Bin ls lx ll lr)
1886-
| ls > delta*rs -> case (ll, lr) of
1904+
(Bin ls lx ll lr) -> case (ll, lr) of
18871905
(Bin lls _ _ _, Bin lrs lrx lrl lrr)
18881906
| lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
18891907
| otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1890-
(_, _) -> error "Failure in Data.Set.balanceL"
1891-
| otherwise -> Bin (1+ls+rs) x l r
1892-
{-# NOINLINE balanceL #-}
1908+
(_, _) -> error "Failure in Data.Set.balanceL_"
1909+
{-# NOINLINE balanceL_ #-}
18931910

18941911
-- balanceR is called when right subtree might have been inserted to or when
18951912
-- left subtree might have been deleted from.
18961913
balanceR :: a -> Set a -> Set a -> Set a
1897-
balanceR x l r = case l of
1914+
balanceR x l r = case (l, r) of
1915+
(Bin ls _ _ _, Bin rs _ _ _)
1916+
| rs <= delta*ls -> Bin (1+ls+rs) x l r
1917+
_ -> balanceR_ x l r
1918+
{-# INLINE balanceR #-} -- See Note [Inlining balance]
1919+
1920+
balanceR_ :: a -> Set a -> Set a -> Set a
1921+
balanceR_ x l r = case l of
18981922
Tip -> case r of
18991923
Tip -> Bin 1 x Tip Tip
19001924
(Bin _ _ Tip Tip) -> Bin 2 x Tip r
@@ -1907,14 +1931,12 @@ balanceR x l r = case l of
19071931
(Bin ls _ _ _) -> case r of
19081932
Tip -> Bin (1+ls) x l Tip
19091933

1910-
(Bin rs rx rl rr)
1911-
| rs > delta*ls -> case (rl, rr) of
1934+
(Bin rs rx rl rr) -> case (rl, rr) of
19121935
(Bin rls rlx rll rlr, Bin rrs _ _ _)
19131936
| rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
19141937
| otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1915-
(_, _) -> error "Failure in Data.Set.balanceR"
1916-
| otherwise -> Bin (1+ls+rs) x l r
1917-
{-# NOINLINE balanceR #-}
1938+
(_, _) -> error "Failure in Data.Set.balanceR_"
1939+
{-# NOINLINE balanceR_ #-}
19181940

19191941
{--------------------------------------------------------------------
19201942
The bin constructor maintains the size of the tree

0 commit comments

Comments
 (0)