33module Database.LSMTree.Internal.Lookup (
44 ResolveSerialisedValue
55 , LookupAcc
6+ , lookupsIOWithWriteBuffer
67 , lookupsIO
7- , lookupsIOWithoutWriteBuffer
88 -- * Errors
99 , TableCorruptedError (.. )
1010 -- * Internal: exposed for tests and benchmarks
@@ -14,7 +14,8 @@ module Database.LSMTree.Internal.Lookup (
1414 , prepLookups
1515 , bloomQueries
1616 , indexSearches
17- , intraPageLookups
17+ , intraPageLookupsWithWriteBuffer
18+ , intraPageLookupsOn
1819 ) where
1920
2021import Data.Arena (Arena , ArenaManager , allocateFromArena , withArena )
@@ -114,7 +115,7 @@ type ResolveSerialisedValue = SerialisedValue -> SerialisedValue -> SerialisedVa
114115
115116type LookupAcc m h = V. Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h )))
116117
117- {-# SPECIALIZE lookupsIO ::
118+ {-# SPECIALIZE lookupsIOWithWriteBuffer ::
118119 HasBlockIO IO h
119120 -> ArenaManager RealWorld
120121 -> ResolveSerialisedValue
@@ -127,13 +128,8 @@ type LookupAcc m h = V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))
127128 -> V.Vector SerialisedKey
128129 -> IO (LookupAcc IO h)
129130 #-}
130- -- | Batched lookups in I\/O.
131- --
132- -- See Note [Batched lookups, buffer strategy and restrictions]
133- --
134- -- PRECONDITION: the vectors of bloom filters, indexes and file handles
135- -- should pointwise match with the vectors of runs.
136- lookupsIO ::
131+ -- | Like 'lookupsIO', but takes a write buffer into account.
132+ lookupsIOWithWriteBuffer ::
137133 forall m h . (MonadThrow m , MonadST m )
138134 => HasBlockIO m h
139135 -> ArenaManager (PrimState m )
@@ -146,12 +142,12 @@ lookupsIO ::
146142 -> V. Vector (Handle h ) -- ^ The file handles to the key\/value files inside @rs@
147143 -> V. Vector SerialisedKey
148144 -> m (LookupAcc m h )
149- lookupsIO ! hbio ! mgr ! resolveV ! wb ! wbblobs ! rs ! blooms ! indexes ! kopsFiles ! ks =
145+ lookupsIOWithWriteBuffer ! hbio ! mgr ! resolveV ! wb ! wbblobs ! rs ! blooms ! indexes ! kopsFiles ! ks =
150146 assert precondition $
151147 withArena mgr $ \ arena -> do
152148 (rkixs, ioops) <- ST. stToIO $ prepLookups arena blooms indexes kopsFiles ks
153149 ioress <- submitIO hbio ioops
154- intraPageLookups resolveV wb wbblobs rs ks rkixs ioops ioress
150+ intraPageLookupsWithWriteBuffer resolveV wb wbblobs rs ks rkixs ioops ioress
155151 where
156152 -- we check only that the lengths match, because checking the contents is
157153 -- too expensive.
@@ -161,7 +157,7 @@ lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks
161157 assert (V. length rs == V. length kopsFiles) $
162158 True
163159
164- {-# SPECIALIZE lookupsIOWithoutWriteBuffer ::
160+ {-# SPECIALIZE lookupsIO ::
165161 HasBlockIO IO h
166162 -> ArenaManager RealWorld
167163 -> ResolveSerialisedValue
@@ -174,11 +170,9 @@ lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks
174170 #-}
175171-- | Batched lookups in I\/O.
176172--
177- -- See Note [Batched lookups, buffer strategy and restrictions]
178- --
179173-- PRECONDITION: the vectors of bloom filters, indexes and file handles
180174-- should pointwise match with the vectors of runs.
181- lookupsIOWithoutWriteBuffer ::
175+ lookupsIO ::
182176 forall m h . (MonadThrow m , MonadST m )
183177 => HasBlockIO m h
184178 -> ArenaManager (PrimState m )
@@ -189,7 +183,7 @@ lookupsIOWithoutWriteBuffer ::
189183 -> V. Vector (Handle h ) -- ^ The file handles to the key\/value files inside @rs@
190184 -> V. Vector SerialisedKey
191185 -> m (LookupAcc m h )
192- lookupsIOWithoutWriteBuffer ! hbio ! mgr ! resolveV ! rs ! blooms ! indexes ! kopsFiles ! ks =
186+ lookupsIO ! hbio ! mgr ! resolveV ! rs ! blooms ! indexes ! kopsFiles ! ks =
193187 assert precondition $
194188 withArena mgr $ \ arena -> do
195189 (rkixs, ioops) <- ST. stToIO $ prepLookups arena blooms indexes kopsFiles ks
@@ -204,7 +198,7 @@ lookupsIOWithoutWriteBuffer !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles
204198 assert (V. length rs == V. length kopsFiles) $
205199 True
206200
207- {-# SPECIALIZE intraPageLookups ::
201+ {-# SPECIALIZE intraPageLookupsWithWriteBuffer ::
208202 ResolveSerialisedValue
209203 -> WB.WriteBuffer
210204 -> Ref (WBB.WriteBufferBlobs IO h)
@@ -215,14 +209,10 @@ lookupsIOWithoutWriteBuffer !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles
215209 -> VU.Vector IOResult
216210 -> IO (LookupAcc IO h)
217211 #-}
218- -- | Intra-page lookups, and combining lookup results from multiple runs and
219- -- the write buffer.
220- --
221- -- This function assumes that @rkixs@ is ordered such that newer runs are
222- -- handled first. The order matters for resolving cases where we find the same
223- -- key in multiple runs.
212+ -- | Like 'intraPageLookupsOn', but uses the write buffer as the initial
213+ -- accumulator.
224214--
225- intraPageLookups ::
215+ intraPageLookupsWithWriteBuffer ::
226216 forall m h . (PrimMonad m , MonadThrow m )
227217 => ResolveSerialisedValue
228218 -> WB. WriteBuffer
@@ -233,21 +223,9 @@ intraPageLookups ::
233223 -> V. Vector (IOOp (PrimState m ) h )
234224 -> VU. Vector IOResult
235225 -> m (LookupAcc m h )
236- intraPageLookups ! resolveV ! wb ! wbblobs ! rs ! ks ! rkixs ! ioops ! ioress = do
237- -- We accumulate results into the 'res' vector. When there are several
238- -- lookup hits for the same key then we combine the results. The combining
239- -- operator is associative but not commutative, so we must do this in the
240- -- right order. We start with the write buffer lookup results and then go
241- -- through the run lookup results in rkixs, which must be ordered by run.
242- --
243- -- TODO: reassess the representation of the result vector to try to reduce
244- -- intermediate allocations. For example use a less convenient
245- -- representation with several vectors (e.g. separate blob info) and
246- -- convert to the final convenient representation in a single pass near
247- -- the surface API so that all the conversions can be done in one pass
248- -- without intermediate allocations.
249- --
250-
226+ intraPageLookupsWithWriteBuffer ! resolveV ! wb ! wbblobs ! rs ! ks ! rkixs ! ioops ! ioress = do
227+ -- The most recent values are in the write buffer, so we use it to
228+ -- initialise the accumulator.
251229 acc0 <-
252230 V. generateM (V. length ks) $ \ ki ->
253231 case WB. lookup wb (V. unsafeIndex ks ki) of
@@ -278,7 +256,7 @@ data TableCorruptedError
278256 -> IO (LookupAcc IO h)
279257 #-}
280258-- | Intra-page lookups, and combining lookup results from multiple runs and
281- -- the write buffer.
259+ -- a potential initial accumulator (e.g. from the write buffer) .
282260--
283261-- This function assumes that @rkixs@ is ordered such that newer runs are
284262-- handled first. The order matters for resolving cases where we find the same
@@ -287,7 +265,7 @@ data TableCorruptedError
287265intraPageLookupsOn ::
288266 forall m h . (PrimMonad m , MonadThrow m )
289267 => ResolveSerialisedValue
290- -> LookupAcc m h
268+ -> LookupAcc m h -- initial acc
291269 -> V. Vector (Ref (Run m h ))
292270 -> V. Vector SerialisedKey
293271 -> VP. Vector RunIxKeyIx
0 commit comments