diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index e5b732cd..41f0d738 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -163,6 +163,7 @@ import Data.Functor.Identity (Identity (..)) import Data.Hashable (Hashable) import Data.Hashable.Lifted (Hashable1, Hashable2) import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare) +import Data.Maybe (isNothing) import Data.Semigroup (Semigroup (..), stimesIdempotentMonoid) import GHC.Exts (Int (..), Int#, TYPE, (==#)) import GHC.Stack (HasCallStack) @@ -1102,56 +1103,60 @@ delete k m = delete' (hash k) k m {-# INLINABLE delete #-} delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v -delete' h0 k0 m0 = go h0 k0 0 m0 - where - go !_ !_ !_ Empty = Empty - go h k _ t@(Leaf hy (L ky _)) - | hy == h && ky == k = Empty - | otherwise = t - go h k s t@(BitmapIndexed b ary) - | b .&. m == 0 = t - | otherwise = - let !st = A.index ary i - !st' = go h k (nextShift s) st - in if st' `ptrEq` st - then t - else case st' of - Empty | A.length ary == 1 -> Empty - | A.length ary == 2 -> - case (i, A.index ary 0, A.index ary 1) of - (0, _, l) | isLeafOrCollision l -> l - (1, l, _) | isLeafOrCollision l -> l - _ -> bIndexed - | otherwise -> bIndexed - where - bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) - l | isLeafOrCollision l && A.length ary == 1 -> l - _ -> BitmapIndexed b (A.update ary i st') - where m = mask h s - i = sparseIndex b m - go h k s t@(Full ary) = - let !st = A.index ary i - !st' = go h k (nextShift s) st +delete' h0 k0 m0 = deleteSubTree h0 k0 0 m0 +{-# INLINABLE delete' #-} + +-- | This version of 'delete' can be used on subtrees when a the +-- corresponding 'Shift' argument is supplied. +deleteSubTree :: Eq k => Hash -> k -> Shift -> HashMap k v -> HashMap k v +deleteSubTree !_ !_ !_ Empty = Empty +deleteSubTree h k _ t@(Leaf hy (L ky _)) + | hy == h && ky == k = Empty + | otherwise = t +deleteSubTree h k s t@(BitmapIndexed b ary) + | b .&. m == 0 = t + | otherwise = + let !st = A.index ary i + !st' = deleteSubTree h k (nextShift s) st in if st' `ptrEq` st then t else case st' of - Empty -> - let ary' = A.delete ary i - bm = fullBitmap .&. complement (1 `unsafeShiftL` i) - in BitmapIndexed bm ary' - _ -> Full (A.update ary i st') - where i = index h s - go h k _ t@(Collision hy v) - | h == hy = case indexOf k v of - Just i - | A.length v == 2 -> - if i == 0 - then Leaf h (A.index v 1) - else Leaf h (A.index v 0) - | otherwise -> Collision h (A.delete v i) - Nothing -> t - | otherwise = t -{-# INLINABLE delete' #-} + Empty | A.length ary == 1 -> Empty + | A.length ary == 2 -> + case (i, A.index ary 0, A.index ary 1) of + (0, _, l) | isLeafOrCollision l -> l + (1, l, _) | isLeafOrCollision l -> l + _ -> bIndexed + | otherwise -> bIndexed + where + bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) + l | isLeafOrCollision l && A.length ary == 1 -> l + _ -> BitmapIndexed b (A.update ary i st') + where m = mask h s + i = sparseIndex b m +deleteSubTree h k s t@(Full ary) = + let !st = A.index ary i + !st' = deleteSubTree h k (nextShift s) st + in if st' `ptrEq` st + then t + else case st' of + Empty -> + let ary' = A.delete ary i + bm = fullBitmap .&. complement (1 `unsafeShiftL` i) + in BitmapIndexed bm ary' + _ -> Full (updateFullArray ary i st') + where i = index h s +deleteSubTree h k _ t@(Collision hy v) + | h == hy = case indexOf k v of + Just i + | A.length v == 2 -> + if i == 0 + then Leaf h (A.index v 1) + else Leaf h (A.index v 0) + | otherwise -> Collision h (A.delete v i) + Nothing -> t + | otherwise = t +{-# INLINABLE deleteSubTree #-} -- | Delete optimized for the case when we know the key is in the map. -- @@ -1188,7 +1193,7 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 let ary' = A.delete ary i bm = fullBitmap .&. complement (1 `unsafeShiftL` i) in BitmapIndexed bm ary' - _ -> Full (A.update ary i st') + _ -> Full (updateFullArray ary i st') where i = indexSH shiftedHash go collPos _shiftedHash _k (Collision h v) | A.length v == 2 @@ -1780,14 +1785,150 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] -- | \(O(n \log m)\) Difference of two maps. Return elements of the first map -- not existing in the second. -difference :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v -difference a b = foldlWithKey' go empty a +difference :: Eq k => HashMap k v -> HashMap k w -> HashMap k v +difference = go 0 where - go m k v = case lookup k b of - Nothing -> unsafeInsert k v m - _ -> m +{- Somehow we get repeated "cases of" on the Hashmap arguments: + + $wgo1 + = \ (ww :: Int#) (ds :: HashMap k v) (ds1 :: HashMap k w) -> + case ds of wild { + __DEFAULT -> + case ds1 of wild1 { + __DEFAULT -> + case wild of wild2 { + BitmapIndexed bx bx1 -> + case wild1 of { + BitmapIndexed bx2 bx3 -> + +Maybe don't force the first !_?! + +Or maybe this helps avoid more evaluations later on? (Check Cmm) +-} + go !_s Empty !_ = Empty + go s t1@(Leaf h1 (L k1 _)) t2 + = lookupCont (\_ -> t1) (\_ _ -> Empty) h1 k1 s t2 + go _ t1 Empty = t1 + go s t1 (Leaf h2 (L k2 _)) = deleteSubTree h2 k2 s t1 + + go s t1@(BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) + = differenceArrays go s b1 ary1 t1 b2 ary2 + go s t1@(Full ary1) (BitmapIndexed b2 ary2) + = differenceArrays go s fullBitmap ary1 t1 b2 ary2 + go s t1@(BitmapIndexed b1 ary1) (Full ary2) + = differenceArrays go s b1 ary1 t1 fullBitmap ary2 + go s t1@(Full ary1) (Full ary2) + = differenceArrays go s fullBitmap ary1 t1 fullBitmap ary2 + + go s t1@(Collision h1 _) (BitmapIndexed b2 ary2) + | b2 .&. m == 0 = t1 + | otherwise = go (nextShift s) t1 (A.index ary2 (sparseIndex b2 m)) + where m = mask h1 s + go s t1@(Collision h1 _) (Full ary2) + = go (nextShift s) t1 (A.index ary2 (index h1 s)) + + go s t1@(BitmapIndexed b1 ary1) t2@(Collision h2 _) + | b1 .&. m == 0 = t1 + | otherwise = + let !st = A.index ary1 i1 + in case go (nextShift s) st t2 of + Empty {- | A.length ary1 == 1 -> Empty -- Impossible! -} + | A.length ary1 == 2 -> + case (i1, A.index ary1 0, A.index ary1 1) of + (0, _, l) | isLeafOrCollision l -> l + (1, l, _) | isLeafOrCollision l -> l + _ -> bIndexed + | otherwise -> bIndexed + where + bIndexed = BitmapIndexed (b1 .&. complement m) (A.delete ary1 i1) + st' | isLeafOrCollision st' && A.length ary1 == 1 -> st' + | st `ptrEq` st' -> t1 + | otherwise -> BitmapIndexed b1 (A.update ary1 i1 st') + where + m = mask h2 s + i1 = sparseIndex b1 m + go s t1@(Full ary1) t2@(Collision h2 _) + = let !st = A.index ary1 i + in case go (nextShift s) st t2 of + Empty -> + let ary1' = A.delete ary1 i + bm = fullBitmap .&. complement (1 `unsafeShiftL` i) + in BitmapIndexed bm ary1' + st' | st `ptrEq` st' -> t1 + | otherwise -> Full (updateFullArray ary1 i st') + where i = index h2 s + + -- TODO: Why does $wdifferenceCollisions appear three times in the Core + -- for difference, and not just once? + go _ t1@(Collision h1 ary1) (Collision h2 ary2) + = differenceCollisions h1 ary1 t1 h2 ary2 {-# INLINABLE difference #-} +differenceArrays :: (Shift -> HashMap k1 v1 -> HashMap k1 v2 -> HashMap k1 v1) -> Shift -> Bitmap -> A.Array (HashMap k1 v1) -> HashMap k1 v1 -> Bitmap -> A.Array (HashMap k1 v2) -> HashMap k1 v1 +differenceArrays diff !s !b1 !ary1 !t1 !b2 !ary2 + | b1 .&. b2 == 0 = t1 + | {- b1 == b2 && -} A.unsafeSameArray ary1 ary2 = Empty + | otherwise = runST $ do + mary <- A.new_ $ A.length ary1 + + -- TODO: i == popCount bResult. Not sure if that would be faster. + -- Also i1 is in some relation with b1' + -- + -- TODO: Depending on sameAs1 the Core contains jumps to either + -- $s$wgo or $s$wgo1. Maybe it would be better to keep track of + -- the "sameness" as an Int?! + let go !i !i1 !b1' !bResult !sameAs1 + | b1' == 0 = pure (bResult, sameAs1) + | otherwise = do + !st1 <- A.indexM ary1 i1 + case m .&. b2 of + 0 -> do + A.write mary i st1 + go (i + 1) (i1 + 1) nextB1' (bResult .|. m) sameAs1 + _ -> do + !st2 <- A.indexM ary2 (sparseIndex b2 m) + case diff (nextShift s) st1 st2 of + Empty -> go i (i1 + 1) nextB1' bResult False + st -> do + A.write mary i st + let same = st `ptrEq` st1 + go (i + 1) (i1 + 1) nextB1' (bResult .|. m) (sameAs1 && same) + where + m = b1' .&. negate b1' + nextB1' = b1' .&. complement m + + (bResult, sameAs1) <- go 0 0 b1 0 True -- FIXME: Does this allocate a tuple? + if sameAs1 + then pure t1 + else case popCount bResult of + 0 -> pure Empty + 1 -> do + l <- A.read mary 0 + if isLeafOrCollision l + then pure l + else BitmapIndexed bResult <$> (A.unsafeFreeze =<< A.shrink mary 1) + n -> bitmapIndexedOrFull bResult <$> (A.unsafeFreeze =<< A.shrink mary n) +{-# INLINABLE differenceArrays #-} + +-- TODO: This could be faster if we would keep track of which elements of ary2 +-- we've already matched. Those could be skipped when we check the following +-- elements of ary1. +-- +-- TODO: Get ary1 unboxed somehow?! The reboxing is quite weird. +-- Maybe try a different order of arguments?! +differenceCollisions :: Eq k => Hash -> A.Array (Leaf k v1) -> HashMap k v1 -> Hash -> A.Array (Leaf k v2) -> HashMap k v1 +differenceCollisions !h1 !ary1 t1 !h2 !ary2 + | h1 == h2 = + -- TODO: This actually allocates Maybes! + let ary = A.filter (\(L k1 _) -> isNothing (indexOf k1 ary2)) ary1 + in case A.length ary of + 0 -> Empty + 1 -> Leaf h1 (A.index ary 0) + n | A.length ary1 == n -> t1 + | otherwise -> Collision h1 ary + | otherwise = t1 +{-# INLINABLE differenceCollisions #-} + -- | \(O(n \log m)\) Difference with a combining function. When two equal keys are -- encountered, the combining function is applied to the values of these keys. -- If it returns 'Nothing', the element is discarded (proper set difference). If diff --git a/Data/HashMap/Internal/Array.hs b/Data/HashMap/Internal/Array.hs index 3e47b0c4..2c023638 100644 --- a/Data/HashMap/Internal/Array.hs +++ b/Data/HashMap/Internal/Array.hs @@ -72,6 +72,7 @@ module Data.HashMap.Internal.Array , thaw , map , map' + , filter , traverse , traverse' , toList @@ -113,12 +114,14 @@ import qualified Prelude if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else # define CHECK_OP(_func_,_op_,_lhs_,_rhs_) \ if not ((_lhs_) _op_ (_rhs_)) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": Check failed: _lhs_ _op_ _rhs_ (" ++ show (_lhs_) ++ " vs. " ++ show (_rhs_) ++ ")") else +# define CHECK_GE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>=,_lhs_,_rhs_) # define CHECK_GT(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>,_lhs_,_rhs_) # define CHECK_LE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,<=,_lhs_,_rhs_) # define CHECK_EQ(_func_,_lhs_,_rhs_) CHECK_OP(_func_,==,_lhs_,_rhs_) #else # define CHECK_BOUNDS(_func_,_len_,_k_) # define CHECK_OP(_func_,_op_,_lhs_,_rhs_) +# define CHECK_GE(_func_,_lhs_,_rhs_) # define CHECK_GT(_func_,_lhs_,_rhs_) # define CHECK_LE(_func_,_lhs_,_rhs_) # define CHECK_EQ(_func_,_lhs_,_rhs_) @@ -221,7 +224,7 @@ new_ n = new n undefinedElem -- | The returned array is the same as the array given, as it is shrunk in place. shrink :: MArray s a -> Int -> ST s (MArray s a) shrink mary _n@(I# n#) = - CHECK_GT("shrink", _n, (0 :: Int)) + CHECK_GE("shrink", _n, (0 :: Int)) CHECK_LE("shrink", _n, (unsafeLengthM mary)) ST $ \s -> case Exts.shrinkSmallMutableArray# (unMArray mary) n# s of s' -> (# s', mary #) @@ -496,6 +499,25 @@ map' f = \ ary -> go ary mary (i+1) n {-# INLINE map' #-} +filter :: (a -> Bool) -> Array a -> Array a +filter f = \ ary -> + let !n = length ary + in run $ do + mary <- new_ n + len <- go ary mary 0 0 n + shrink mary len + where + go ary mary iAry iMary n + | iAry >= n = return iMary + | otherwise = do + x <- indexM ary iAry + if f x + then do + write mary iMary x + go ary mary (iAry + 1) (iMary + 1) n + else go ary mary (iAry + 1) iMary n +{-# INLINE filter #-} + fromList :: Int -> [a] -> Array a fromList n xs0 = CHECK_EQ("fromList", n, Prelude.length xs0)