Skip to content

Commit 200d7ae

Browse files
authored
Improve isSubsetOf (#615)
* Add recursive size tests to `Data.Set.isSubsetOf`. * Add a special case for singleton subsets to avoid extra splits at all the leaves. * Do the same for `isSubmapOf`. * Add the singleton special case to `disjoint`. * Tighten advertised bounds and improve documentation. Closes #614
1 parent 880fa44 commit 200d7ae

File tree

2 files changed

+66
-11
lines changed

2 files changed

+66
-11
lines changed

Data/Map/Internal.hs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,22 +2727,33 @@ isSubmapOf m1 m2 = isSubmapOfBy (==) m1 m2
27272727
> isSubmapOfBy (<) (fromList [('a',1)]) (fromList [('a',1),('b',2)])
27282728
> isSubmapOfBy (==) (fromList [('a',1),('b',2)]) (fromList [('a',1)])
27292729
2730+
Note that @isSubmapOfBy (\_ _ -> True) m1 m2@ tests whether all the keys
2731+
in @m1@ are also keys in @m2@.
27302732
27312733
-}
27322734
isSubmapOfBy :: Ord k => (a->b->Bool) -> Map k a -> Map k b -> Bool
27332735
isSubmapOfBy f t1 t2
2734-
= (size t1 <= size t2) && (submap' f t1 t2)
2736+
= size t1 <= size t2 && submap' f t1 t2
27352737
#if __GLASGOW_HASKELL__
27362738
{-# INLINABLE isSubmapOfBy #-}
27372739
#endif
27382740

2741+
-- Test whether a map is a submap of another without the *initial*
2742+
-- size test. See Data.Set.Internal.isSubsetOfX for notes on
2743+
-- implementation and analysis.
27392744
submap' :: Ord a => (b -> c -> Bool) -> Map a b -> Map a c -> Bool
27402745
submap' _ Tip _ = True
27412746
submap' _ _ Tip = False
2747+
submap' f (Bin 1 kx x _ _) t
2748+
= case lookup kx t of
2749+
Just y -> f x y
2750+
Nothing -> False
27422751
submap' f (Bin _ kx x l r) t
27432752
= case found of
27442753
Nothing -> False
2745-
Just y -> f x y && submap' f l lt && submap' f r gt
2754+
Just y -> f x y
2755+
&& size l <= size lt && size r <= size gt
2756+
&& submap' f l lt && submap' f r gt
27462757
where
27472758
(lt,found,gt) = splitLookup kx t
27482759
#if __GLASGOW_HASKELL__
@@ -2778,7 +2789,7 @@ isProperSubmapOf m1 m2
27782789
-}
27792790
isProperSubmapOfBy :: Ord k => (a -> b -> Bool) -> Map k a -> Map k b -> Bool
27802791
isProperSubmapOfBy f t1 t2
2781-
= (size t1 < size t2) && (submap' f t1 t2)
2792+
= size t1 < size t2 && submap' f t1 t2
27822793
#if __GLASGOW_HASKELL__
27832794
{-# INLINABLE isProperSubmapOfBy #-}
27842795
#endif

Data/Set/Internal.hs

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -596,29 +596,67 @@ delete = go
596596
{--------------------------------------------------------------------
597597
Subset
598598
--------------------------------------------------------------------}
599-
-- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
599+
-- | /O(m*log(n\/m + 1)), m <= n/.
600+
-- @(s1 \`isProperSubsetOf\` s2)@ indicates whether @s1@ is a
601+
-- proper subset of @s2@.
602+
--
603+
-- @
604+
-- s1 \`isProperSubsetOf\` s2 = s1 ``isSubsetOf`` s2 && s1 /= s2
605+
-- @
600606
isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
601607
isProperSubsetOf s1 s2
602-
= (size s1 < size s2) && (isSubsetOf s1 s2)
608+
= size s1 < size s2 && isSubsetOfX s1 s2
603609
#if __GLASGOW_HASKELL__
604610
{-# INLINABLE isProperSubsetOf #-}
605611
#endif
606612

607613

608-
-- | /O(n+m)/. Is this a subset?
609-
-- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@.
614+
-- | /O(m*log(n\/m + 1)), m <= n/.
615+
-- @(s1 \`isSubsetOf\` s2)@ indicates whether @s1@ is a subset of @s2@.
616+
--
617+
-- @
618+
-- s1 \`isSubsetOf\` s2 = all (``member`` s2) s1
619+
-- s1 \`isSubsetOf\` s2 = null (s1 ``difference`` s2)
620+
-- s1 \`isSubsetOf\` s2 = s1 ``union`` s2 == s2
621+
-- s1 \`isSubsetOf\` s2 = s1 ``intersection`` s2 == s1
622+
-- @
610623
isSubsetOf :: Ord a => Set a -> Set a -> Bool
611624
isSubsetOf t1 t2
612-
= (size t1 <= size t2) && (isSubsetOfX t1 t2)
625+
= size t1 <= size t2 && isSubsetOfX t1 t2
613626
#if __GLASGOW_HASKELL__
614627
{-# INLINABLE isSubsetOf #-}
615628
#endif
616629

630+
-- Test whether a set is a subset of another without the *initial*
631+
-- size test.
632+
--
633+
-- This function is structured very much like `difference`, `union`,
634+
-- and `intersection`. Whereas the bounds proofs for those in Blelloch
635+
-- et al needed to accound for both "split work" and "merge work", we
636+
-- only have to worry about split work here, which is the same as in
637+
-- those functions.
617638
isSubsetOfX :: Ord a => Set a -> Set a -> Bool
618639
isSubsetOfX Tip _ = True
619640
isSubsetOfX _ Tip = False
641+
-- Skip the final split when we hit a singleton.
642+
isSubsetOfX (Bin 1 x _ _) t = member x t
620643
isSubsetOfX (Bin _ x l r) t
621-
= found && isSubsetOfX l lt && isSubsetOfX r gt
644+
= found &&
645+
-- Cheap size checks can sometimes save expensive recursive calls when the
646+
-- result will be False. Suppose we check whether [1..10] (with root 4) is
647+
-- a subset of [0..9]. After the first split, we have to check if [1..3] is
648+
-- a subset of [0..3] and if [5..10] is a subset of [5..9]. But we can bail
649+
-- immediately because size [5..10] > size [5..9].
650+
--
651+
-- Why not just call `isSubsetOf` on each side to do the size checks?
652+
-- Because that could make a recursive call on the left even though the
653+
-- size check would fail on the right. In principle, we could take this to
654+
-- extremes by maintaining a queue of pairs of sets to be checked, working
655+
-- through the tree level-wise. But that would impose higher administrative
656+
-- costs without obvious benefits. It might be worth considering if we find
657+
-- a way to use it to tighten the bounds in some useful/comprehensible way.
658+
size l <= size lt && size r <= size gt &&
659+
isSubsetOfX l lt && isSubsetOfX r gt
622660
where
623661
(lt,found,gt) = splitMember x t
624662
#if __GLASGOW_HASKELL__
@@ -628,19 +666,25 @@ isSubsetOfX (Bin _ x l r) t
628666
{--------------------------------------------------------------------
629667
Disjoint
630668
--------------------------------------------------------------------}
631-
-- | /O(n+m)/. Check whether two sets are disjoint (i.e. their intersection
632-
-- is empty).
669+
-- | /O(m*log(n\/m + 1)), m <= n/. Check whether two sets are disjoint
670+
-- (i.e., their intersection is empty).
633671
--
634672
-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True
635673
-- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False
636674
-- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False
637675
-- > disjoint (fromList []) (fromList []) == True
638676
--
677+
-- @
678+
-- xs ``disjoint`` ys = null (xs ``intersection`` ys)
679+
-- @
680+
--
639681
-- @since 0.5.11
640682

641683
disjoint :: Ord a => Set a -> Set a -> Bool
642684
disjoint Tip _ = True
643685
disjoint _ Tip = True
686+
-- Avoid a split for the singleton case.
687+
disjoint (Bin 1 x _ _) t = x `notMember` t
644688
disjoint (Bin _ x l r) t
645689
-- Analogous implementation to `subsetOfX`
646690
= not found && disjoint l lt && disjoint r gt

0 commit comments

Comments
 (0)