From 4c2f48c80266242748919e521f5f804c5cd2dd39 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 5 Oct 2025 19:12:03 +0200 Subject: [PATCH 1/7] two: Optimize computation of idx2 further This reduces the core size for the inner loop of two by 10 terms. --- Data/HashMap/Internal.hs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 6cad1028..29081931 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -164,7 +164,7 @@ import Data.Hashable (Hashable) import Data.Hashable.Lifted (Hashable1, Hashable2) import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare) import Data.Semigroup (Semigroup (..), stimesIdempotentMonoid) -import GHC.Exts (Int (..), Int#, TYPE, (==#)) +import GHC.Exts (Int (..), Int#, TYPE, Word (..), (==#)) import GHC.Stack (HasCallStack) import Prelude hiding (Foldable (..), filter, lookup, map, pred) @@ -978,11 +978,9 @@ two = go ary <- A.unsafeFreeze mary return $ BitmapIndexed (bp1 .|. bp2) ary where - bp1 = mask h1 s - bp2 = mask h2 s - !(I# i1) = index h1 s - !(I# i2) = index h2 s - idx2 = I# (i1 Exts.<# i2) + !bp1@(W# bp1#) = mask h1 s + !bp2@(W# bp2#) = mask h2 s + idx2 = I# (bp1# `Exts.ltWord#` bp2#) -- This way of computing idx2 saves us a branch compared to the previous approach: -- -- idx2 | index h1 s < index h2 s = 1 From 19bf8b609dbb7044e7077e65a94243abe2090672 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 16 Oct 2025 16:58:18 +0200 Subject: [PATCH 2/7] Refactor `two` * Use the "shifted hash" approach. * Create a variant `two'` that can be used to address #468. --- Data/HashMap/Internal.hs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 29081931..49787890 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -965,21 +965,25 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) -- because it's already in WHNF (having just been matched) and we -- just put it directly in an array. two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) -two = go +two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) +{-# INLINE two #-} + +two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) +two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 where - go s h1 k1 v1 h2 t2 + go !sh1 t1 !sh2 t2 | bp1 == bp2 = do - st <- go (nextShift s) h1 k1 v1 h2 t2 + st <- go (shiftHash sh1 bitsPerSubkey) t1 (shiftHash sh2 bitsPerSubkey) t2 ary <- A.singletonM st return $ BitmapIndexed bp1 ary | otherwise = do - mary <- A.new 2 $! Leaf h1 (L k1 v1) + mary <- A.new 2 t1 A.write mary idx2 t2 ary <- A.unsafeFreeze mary return $ BitmapIndexed (bp1 .|. bp2) ary where - !bp1@(W# bp1#) = mask h1 s - !bp2@(W# bp2#) = mask h2 s + !bp1@(W# bp1#) = mask' sh1 + !bp2@(W# bp2#) = mask' sh2 idx2 = I# (bp1# `Exts.ltWord#` bp2#) -- This way of computing idx2 saves us a branch compared to the previous approach: -- @@ -987,7 +991,13 @@ two = go -- | otherwise = 0 -- -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 -{-# INLINE two #-} + + shiftHash :: Hash -> Shift -> Word -- type ShiftedHash + shiftHash h n = h `unsafeShiftR` n + + mask' :: Word -> Bitmap + mask' w = 1 `unsafeShiftL` fromIntegral (w .&. subkeyMask) +{-# INLINE two' #-} -- Really?! -- | \(O(\log n)\) Associate the value with the key in this map. If -- this map previously contained a mapping for the key, the old value From 4ae9dfa4867bd5aaf89adb1782fe3ab8d6756105 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 16 Oct 2025 17:26:02 +0200 Subject: [PATCH 3/7] Update docs and remove outdated note Closes #335. --- Data/HashMap/Internal.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 49787890..7f95590c 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -956,18 +956,20 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) {-# INLINABLE unsafeInsert #-} --- | Create a map from two key-value pairs which hashes don't collide. To --- enhance sharing, the second key-value pair is represented by the hash of its --- key and a singleton HashMap pairing its key with its value. --- --- Note: to avoid silly thunks, this function must be strict in the --- key. See issue #232. We don't need to force the HashMap argument --- because it's already in WHNF (having just been matched) and we --- just put it directly in an array. +-- | Create a map from a key-value pair and a 'Leaf' or 'Collision' node with a +-- different hash. +-- +-- It is the caller's responsibility to ensure that the HashMap argument is in +-- WHNF. two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) {-# INLINE two #-} +-- | Create a map from two 'Leaf' or 'Collision' nodes whose hashes are +-- distinct. +-- +-- It is the caller's responsibility to ensure that both HashMap arguments are +-- in WHNF. two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 where From f689f4954cfb264a3523e62c26ff07db8e70e7a5 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 15:59:29 +0200 Subject: [PATCH 4/7] Introduce `ShiftedHash` --- Data/HashMap/Internal.hs | 87 ++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 47 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 7f95590c..a2f78cef 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -884,33 +884,19 @@ insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0 = Leaf h (L k x) go collPos shiftedHash k x (BitmapIndexed b ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k x st + !st' = go collPos (nextSH shiftedHash) k x st in BitmapIndexed b (A.update ary i st') - where m = mask' shiftedHash + where m = maskSH shiftedHash i = sparseIndex b m go collPos shiftedHash k x (Full ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k x st + !st' = go collPos (nextSH shiftedHash) k x st in Full (updateFullArray ary i st') - where i = index' shiftedHash + where i = indexSH shiftedHash go collPos _shiftedHash k x (Collision h v) | collPos >= 0 = Collision h (setAtPosition collPos k x v) | otherwise = Empty -- error "Internal error: go {collPos negative}" go _ _ _ _ Empty = Empty -- error "Internal error: go Empty" - - -- Customized version of 'index' that doesn't require a 'Shift'. - index' :: Hash -> Int - index' w = fromIntegral $ w .&. subkeyMask - {-# INLINE index' #-} - - -- Customized version of 'mask' that doesn't require a 'Shift'. - mask' :: Word -> Bitmap - mask' w = 1 `unsafeShiftL` index' w - {-# INLINE mask' #-} - - shiftHash h = h `unsafeShiftR` bitsPerSubkey - {-# INLINE shiftHash #-} - {-# NOINLINE insertKeyExists #-} -- | Replace the ith Leaf with Leaf k v. @@ -971,11 +957,11 @@ two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) -- It is the caller's responsibility to ensure that both HashMap arguments are -- in WHNF. two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) -two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 +two' s h1 lc1 h2 lc2 = go (shiftHash s h1) lc1 (shiftHash s h2) lc2 where go !sh1 t1 !sh2 t2 | bp1 == bp2 = do - st <- go (shiftHash sh1 bitsPerSubkey) t1 (shiftHash sh2 bitsPerSubkey) t2 + st <- go (nextSH sh1) t1 (nextSH sh2) t2 ary <- A.singletonM st return $ BitmapIndexed bp1 ary | otherwise = do @@ -984,8 +970,8 @@ two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 ary <- A.unsafeFreeze mary return $ BitmapIndexed (bp1 .|. bp2) ary where - !bp1@(W# bp1#) = mask' sh1 - !bp2@(W# bp2#) = mask' sh2 + !bp1@(W# bp1#) = maskSH sh1 + !bp2@(W# bp2#) = maskSH sh2 idx2 = I# (bp1# `Exts.ltWord#` bp2#) -- This way of computing idx2 saves us a branch compared to the previous approach: -- @@ -993,12 +979,6 @@ two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 -- | otherwise = 0 -- -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 - - shiftHash :: Hash -> Shift -> Word -- type ShiftedHash - shiftHash h n = h `unsafeShiftR` n - - mask' :: Word -> Bitmap - mask' w = 1 `unsafeShiftL` fromIntegral (w .&. subkeyMask) {-# INLINE two' #-} -- Really?! -- | \(O(\log n)\) Associate the value with the key in this map. If @@ -1188,11 +1168,11 @@ delete' h0 k0 m0 = go h0 k0 0 m0 deleteKeyExists :: Int -> Hash -> k -> HashMap k v -> HashMap k v deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 where - go :: Int -> Word -> k -> HashMap k v -> HashMap k v + go :: Int -> ShiftedHash -> k -> HashMap k v -> HashMap k v go !_collPos !_shiftedHash !_k (Leaf _ _) = Empty go collPos shiftedHash k (BitmapIndexed b ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k st + !st' = go collPos (nextSH shiftedHash) k st in case st' of Empty | A.length ary == 1 -> Empty | A.length ary == 2 -> @@ -1205,18 +1185,18 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) l | isLeafOrCollision l && A.length ary == 1 -> l _ -> BitmapIndexed b (A.update ary i st') - where m = mask' shiftedHash + where m = maskSH shiftedHash i = sparseIndex b m go collPos shiftedHash k (Full ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k st + !st' = go collPos (nextSH shiftedHash) k st in case st' of Empty -> let ary' = A.delete ary i bm = fullBitmap .&. complement (1 `unsafeShiftL` i) in BitmapIndexed bm ary' _ -> Full (A.update ary i st') - where i = index' shiftedHash + where i = indexSH shiftedHash go collPos _shiftedHash _k (Collision h v) | A.length v == 2 = if collPos == 0 @@ -1224,20 +1204,6 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 else Leaf h (A.index v 0) | otherwise = Collision h (A.delete v collPos) go !_ !_ !_ Empty = Empty -- error "Internal error: deleteKeyExists empty" - - -- Customized version of 'index' that doesn't require a 'Shift'. - index' :: Hash -> Int - index' w = fromIntegral $ w .&. subkeyMask - {-# INLINE index' #-} - - -- Customized version of 'mask' that doesn't require a 'Shift'. - mask' :: Word -> Bitmap - mask' w = 1 `unsafeShiftL` index' w - {-# INLINE mask' #-} - - shiftHash h = h `unsafeShiftR` bitsPerSubkey - {-# INLINE shiftHash #-} - {-# NOINLINE deleteKeyExists #-} -- | \(O(\log n)\) Adjust the value tied to a given key in this map only @@ -2520,6 +2486,33 @@ nextShift :: Shift -> Shift nextShift s = s + bitsPerSubkey {-# INLINE nextShift #-} +------------------------------------------------------------------------ +-- ShiftedHash + +-- | Sometimes it's more efficient to right-shift the hashes directly instead +-- of keeping track of an additional 'Shift' value. +type ShiftedHash = Hash + +-- | Construct a 'ShiftedHash' from a 'Shift' and a 'Hash'. +shiftHash :: Shift -> Hash -> ShiftedHash +shiftHash s h = h `unsafeShiftR` s +{-# INLINE shiftHash #-} + +-- | Update a 'ShiftedHash' for the next level of the tree. +nextSH :: ShiftedHash -> ShiftedHash +nextSH sh = sh `unsafeShiftR` bitsPerSubkey +{-# INLINE nextSH #-} + +-- | Version of 'index' for use with @'ShiftedHash'es@. +indexSH :: ShiftedHash -> Int +indexSH sh = fromIntegral $ sh .&. subkeyMask +{-# INLINE indexSH #-} + +-- | Version of 'mask' for use with @'ShiftedHash'es@. +maskSH :: ShiftedHash -> Bitmap +maskSH sh = 1 `unsafeShiftL` indexSH sh +{-# INLINE maskSH #-} + ------------------------------------------------------------------------ -- Pointer equality From 189b26182f5961976cae3e0c6e63b6839ce88749 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 18:31:42 +0200 Subject: [PATCH 5/7] Remove comment --- Data/HashMap/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index a2f78cef..d74edfda 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -979,7 +979,7 @@ two' s h1 lc1 h2 lc2 = go (shiftHash s h1) lc1 (shiftHash s h2) lc2 -- | otherwise = 0 -- -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 -{-# INLINE two' #-} -- Really?! +{-# INLINE two' #-} -- | \(O(\log n)\) Associate the value with the key in this map. If -- this map previously contained a mapping for the key, the old value From 0e0b74d6881fa3d100fee3d2914f4cab7d60c8fb Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 18:31:51 +0200 Subject: [PATCH 6/7] two: Force the fresh Leaf Just to be safe. --- Data/HashMap/Internal.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index d74edfda..cbbb038a 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -948,7 +948,8 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) -- It is the caller's responsibility to ensure that the HashMap argument is in -- WHNF. two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) -two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) +two s h1 k1 v1 = two' s h1 l + where !l = Leaf h1 (L k1 v1) {-# INLINE two #-} -- | Create a map from two 'Leaf' or 'Collision' nodes whose hashes are From bd0b04f40135b306461447b5f0e39f31035b41ad Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 20 Oct 2025 10:22:20 +0200 Subject: [PATCH 7/7] s/map/subtree --- Data/HashMap/Internal.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index cbbb038a..d8fe88e3 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -942,8 +942,8 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) {-# INLINABLE unsafeInsert #-} --- | Create a map from a key-value pair and a 'Leaf' or 'Collision' node with a --- different hash. +-- | Create a subtree from a key-value pair and a 'Leaf' or 'Collision' node +-- with a different hash. -- -- It is the caller's responsibility to ensure that the HashMap argument is in -- WHNF. @@ -952,7 +952,7 @@ two s h1 k1 v1 = two' s h1 l where !l = Leaf h1 (L k1 v1) {-# INLINE two #-} --- | Create a map from two 'Leaf' or 'Collision' nodes whose hashes are +-- | Create a subtree from two 'Leaf' or 'Collision' nodes whose hashes are -- distinct. -- -- It is the caller's responsibility to ensure that both HashMap arguments are