Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 150 additions & 8 deletions Data/HashMap/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ import Data.Functor.Identity (Identity (..))
import Data.Hashable (Hashable)
import Data.Hashable.Lifted (Hashable1, Hashable2)
import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare)
import Data.Maybe (isNothing)
import Data.Semigroup (Semigroup (..), stimesIdempotentMonoid)
import GHC.Exts (Int (..), Int#, TYPE, (==#))
import GHC.Stack (HasCallStack)
Expand Down Expand Up @@ -1102,7 +1103,12 @@ delete k m = delete' (hash k) k m
{-# INLINABLE delete #-}

delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v
delete' h0 k0 m0 = go h0 k0 0 m0
delete' h0 k0 m0 = deleteSubTree h0 k0 0 m0

-- | This version of 'delete' can be used on subtrees when a the
-- corresponding 'Shift' argument is supplied.
deleteSubTree :: Eq k => Hash -> k -> Shift -> HashMap k v -> HashMap k v
deleteSubTree = go
where
go !_ !_ !_ Empty = Empty
go h k _ t@(Leaf hy (L ky _))
Expand Down Expand Up @@ -1139,7 +1145,7 @@ delete' h0 k0 m0 = go h0 k0 0 m0
let ary' = A.delete ary i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary'
_ -> Full (A.update ary i st')
_ -> Full (updateFullArray ary i st')
where i = index h s
go h k _ t@(Collision hy v)
| h == hy = case indexOf k v of
Expand Down Expand Up @@ -1188,7 +1194,7 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0
let ary' = A.delete ary i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary'
_ -> Full (A.update ary i st')
_ -> Full (updateFullArray ary i st')
where i = indexSH shiftedHash
go collPos _shiftedHash _k (Collision h v)
| A.length v == 2
Expand Down Expand Up @@ -1780,14 +1786,150 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []

-- | \(O(n \log m)\) Difference of two maps. Return elements of the first map
-- not existing in the second.
difference :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v
difference a b = foldlWithKey' go empty a
difference :: Eq k => HashMap k v -> HashMap k w -> HashMap k v
difference = go 0
where
go m k v = case lookup k b of
Nothing -> unsafeInsert k v m
_ -> m
{- Somehow we get repeated "cases of" on the Hashmap arguments:

$wgo1
= \ (ww :: Int#) (ds :: HashMap k v) (ds1 :: HashMap k w) ->
case ds of wild {
__DEFAULT ->
case ds1 of wild1 {
__DEFAULT ->
case wild of wild2 {
BitmapIndexed bx bx1 ->
case wild1 of {
BitmapIndexed bx2 bx3 ->

Maybe don't force the first !_?!

Or maybe this helps avoid more evaluations later on? (Check Cmm)
-}
go !_s Empty !_ = Empty
go s t1@(Leaf h1 (L k1 _)) t2
= lookupCont (\_ -> t1) (\_ _ -> Empty) h1 k1 s t2
go _ t1 Empty = t1
go s t1 (Leaf h2 (L k2 _)) = deleteSubTree h2 k2 s t1

go s t1@(BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2)
= differenceArrays go s b1 ary1 t1 b2 ary2
go s t1@(Full ary1) (BitmapIndexed b2 ary2)
= differenceArrays go s fullBitmap ary1 t1 b2 ary2
go s t1@(BitmapIndexed b1 ary1) (Full ary2)
= differenceArrays go s b1 ary1 t1 fullBitmap ary2
go s t1@(Full ary1) (Full ary2)
= differenceArrays go s fullBitmap ary1 t1 fullBitmap ary2

go s t1@(Collision h1 _) (BitmapIndexed b2 ary2)
| b2 .&. m == 0 = t1
| otherwise = go (nextShift s) t1 (A.index ary2 (sparseIndex b2 m))
where m = mask h1 s
go s t1@(Collision h1 _) (Full ary2)
= go (nextShift s) t1 (A.index ary2 (index h1 s))

go s t1@(BitmapIndexed b1 ary1) t2@(Collision h2 _)
| b1 .&. m == 0 = t1
| otherwise =
let !st = A.index ary1 i1
in case go (nextShift s) st t2 of
Empty {- | A.length ary1 == 1 -> Empty -- Impossible! -}
| A.length ary1 == 2 ->
case (i1, A.index ary1 0, A.index ary1 1) of
(0, _, l) | isLeafOrCollision l -> l
(1, l, _) | isLeafOrCollision l -> l
_ -> bIndexed
| otherwise -> bIndexed
where
bIndexed = BitmapIndexed (b1 .&. complement m) (A.delete ary1 i1)
st' | isLeafOrCollision st' && A.length ary1 == 1 -> st'
| st `ptrEq` st' -> t1
| otherwise -> BitmapIndexed b1 (A.update ary1 i1 st')
where
m = mask h2 s
i1 = sparseIndex b1 m
go s t1@(Full ary1) t2@(Collision h2 _)
= let !st = A.index ary1 i
in case go (nextShift s) st t2 of
Empty ->
let ary1' = A.delete ary1 i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary1'
st' | st `ptrEq` st' -> t1
| otherwise -> Full (updateFullArray ary1 i st')
where i = index h2 s

-- TODO: Why does $wdifferenceCollisions appear three times in the Core
-- for difference, and not just once?
go _ t1@(Collision h1 ary1) (Collision h2 ary2)
= differenceCollisions h1 ary1 t1 h2 ary2
{-# INLINABLE difference #-}

differenceArrays :: (Shift -> HashMap k1 v1 -> HashMap k1 v2 -> HashMap k1 v1) -> Shift -> Bitmap -> A.Array (HashMap k1 v1) -> HashMap k1 v1 -> Bitmap -> A.Array (HashMap k1 v2) -> HashMap k1 v1
differenceArrays diff !s !b1 !ary1 !t1 !b2 !ary2
| b1 .&. b2 == 0 = t1
| {- b1 == b2 && -} A.unsafeSameArray ary1 ary2 = Empty
| otherwise = runST $ do
mary <- A.new_ $ A.length ary1

-- TODO: i == popCount bResult. Not sure if that would be faster.
-- Also i1 is in some relation with b1'
--
-- TODO: Depending on sameAs1 the Core contains jumps to either
-- $s$wgo or $s$wgo1. Maybe it would be better to keep track of
-- the "sameness" as an Int?!
let go !i !i1 !b1' !bResult !sameAs1
| b1' == 0 = pure (bResult, sameAs1)
| otherwise = do
!st1 <- A.indexM ary1 i1
case m .&. b2 of
0 -> do
A.write mary i st1
go (i + 1) (i1 + 1) nextB1' (bResult .|. m) sameAs1
_ -> do
!st2 <- A.indexM ary2 (sparseIndex b2 m)
case diff (nextShift s) st1 st2 of
Empty -> go i (i1 + 1) nextB1' bResult False
st -> do
A.write mary i st
let same = st `ptrEq` st1
go (i + 1) (i1 + 1) nextB1' (bResult .|. m) (sameAs1 && same)
where
m = b1' .&. negate b1'
nextB1' = b1' .&. complement m

(bResult, sameAs1) <- go 0 0 b1 0 True -- FIXME: Does this allocate a tuple?
if sameAs1
then pure t1
else case popCount bResult of
0 -> pure Empty
1 -> do
l <- A.read mary 0
if isLeafOrCollision l
then pure l
else BitmapIndexed bResult <$> (A.unsafeFreeze =<< A.shrink mary 1)
n -> bitmapIndexedOrFull bResult <$> (A.unsafeFreeze =<< A.shrink mary n)
{-# INLINABLE differenceArrays #-}

-- TODO: This could be faster if we would keep track of which elements of ary2
-- we've already matched. Those could be skipped when we check the following
-- elements of ary1.
--
-- TODO: Get ary1 unboxed somehow?! The reboxing is quite weird.
-- Maybe try a different order of arguments?!
differenceCollisions :: Eq k => Hash -> A.Array (Leaf k v1) -> HashMap k v1 -> Hash -> A.Array (Leaf k v2) -> HashMap k v1
differenceCollisions !h1 !ary1 t1 !h2 !ary2
| h1 == h2 =
-- TODO: This actually allocates Maybes!
let ary = A.filter (\(L k1 _) -> isNothing (indexOf k1 ary2)) ary1
in case A.length ary of
0 -> Empty
1 -> Leaf h1 (A.index ary 0)
n | A.length ary1 == n -> t1
| otherwise -> Collision h1 ary
| otherwise = t1
{-# INLINABLE differenceCollisions #-}

-- | \(O(n \log m)\) Difference with a combining function. When two equal keys are
-- encountered, the combining function is applied to the values of these keys.
-- If it returns 'Nothing', the element is discarded (proper set difference). If
Expand Down
24 changes: 23 additions & 1 deletion Data/HashMap/Internal/Array.hs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ module Data.HashMap.Internal.Array
, thaw
, map
, map'
, filter
, traverse
, traverse'
, toList
Expand Down Expand Up @@ -113,12 +114,14 @@ import qualified Prelude
if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else
# define CHECK_OP(_func_,_op_,_lhs_,_rhs_) \
if not ((_lhs_) _op_ (_rhs_)) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": Check failed: _lhs_ _op_ _rhs_ (" ++ show (_lhs_) ++ " vs. " ++ show (_rhs_) ++ ")") else
# define CHECK_GE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>=,_lhs_,_rhs_)
# define CHECK_GT(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>,_lhs_,_rhs_)
# define CHECK_LE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,<=,_lhs_,_rhs_)
# define CHECK_EQ(_func_,_lhs_,_rhs_) CHECK_OP(_func_,==,_lhs_,_rhs_)
#else
# define CHECK_BOUNDS(_func_,_len_,_k_)
# define CHECK_OP(_func_,_op_,_lhs_,_rhs_)
# define CHECK_GE(_func_,_lhs_,_rhs_)
# define CHECK_GT(_func_,_lhs_,_rhs_)
# define CHECK_LE(_func_,_lhs_,_rhs_)
# define CHECK_EQ(_func_,_lhs_,_rhs_)
Expand Down Expand Up @@ -221,7 +224,7 @@ new_ n = new n undefinedElem
-- | The returned array is the same as the array given, as it is shrunk in place.
shrink :: MArray s a -> Int -> ST s (MArray s a)
shrink mary _n@(I# n#) =
CHECK_GT("shrink", _n, (0 :: Int))
CHECK_GE("shrink", _n, (0 :: Int))
CHECK_LE("shrink", _n, (unsafeLengthM mary))
ST $ \s -> case Exts.shrinkSmallMutableArray# (unMArray mary) n# s of
s' -> (# s', mary #)
Expand Down Expand Up @@ -496,6 +499,25 @@ map' f = \ ary ->
go ary mary (i+1) n
{-# INLINE map' #-}

filter :: (a -> Bool) -> Array a -> Array a
filter f = \ ary ->
let !n = length ary
in run $ do
mary <- new_ n
len <- go ary mary 0 0 n
shrink mary len
where
go ary mary iAry iMary n
| iAry >= n = return iMary
| otherwise = do
x <- indexM ary iAry
if f x
then do
write mary iMary x
go ary mary (iAry + 1) (iMary + 1) n
else go ary mary (iAry + 1) iMary n
{-# INLINE filter #-}

fromList :: Int -> [a] -> Array a
fromList n xs0 =
CHECK_EQ("fromList", n, Prelude.length xs0)
Expand Down