1- {-# LANGUAGE BangPatterns #-}
2- {-# LANGUAGE CPP #-}
3- {-# LANGUAGE DeriveAnyClass #-}
4- {-# LANGUAGE DeriveFunctor #-}
5- {-# LANGUAGE DerivingStrategies #-}
6- {-# LANGUAGE FlexibleContexts #-}
7- {-# LANGUAGE NamedFieldPuns #-}
8- {-# LANGUAGE ScopedTypeVariables #-}
9- {-# LANGUAGE TupleSections #-}
1+ {-# LANGUAGE CPP #-}
102
113module Database.LSMTree.Internal.Lookup (
124 ResolveSerialisedValue
135 , ByteCountDiscrepancy (.. )
6+ , LookupAcc
147 , lookupsIO
8+ , lookupsIOWithoutWriteBuffer
159 -- * Internal: exposed for tests and benchmarks
1610 , RunIx
1711 , KeyIx
@@ -33,7 +27,7 @@ import qualified Data.Vector.Unboxed as VU
3327
3428import Control.Exception (Exception , assert )
3529import Control.Monad
36- import Control.Monad.Class.MonadST as Class
30+ import Control.Monad.Class.MonadST as ST
3731import Control.Monad.Class.MonadThrow (MonadThrow (.. ))
3832import Control.Monad.Primitive
3933import Control.Monad.ST.Strict
@@ -154,6 +148,8 @@ data ByteCountDiscrepancy = ByteCountDiscrepancy {
154148 deriving stock (Show , Eq )
155149 deriving anyclass (Exception )
156150
151+ type LookupAcc m h = V. Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h )))
152+
157153{-# SPECIALIZE lookupsIO ::
158154 HasBlockIO IO h
159155 -> ArenaManager RealWorld
@@ -165,7 +161,7 @@ data ByteCountDiscrepancy = ByteCountDiscrepancy {
165161 -> V.Vector Index
166162 -> V.Vector (Handle h)
167163 -> V.Vector SerialisedKey
168- -> IO (V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef IO h))) )
164+ -> IO (LookupAcc IO h)
169165 #-}
170166-- | Batched lookups in I\/O.
171167--
@@ -185,11 +181,11 @@ lookupsIO ::
185181 -> V. Vector Index -- ^ The indexes inside @rs@
186182 -> V. Vector (Handle h ) -- ^ The file handles to the key\/value files inside @rs@
187183 -> V. Vector SerialisedKey
188- -> m (V. Vector ( Maybe ( Entry SerialisedValue ( WeakBlobRef m h ))) )
184+ -> m (LookupAcc m h )
189185lookupsIO ! hbio ! mgr ! resolveV ! wb ! wbblobs ! rs ! blooms ! indexes ! kopsFiles ! ks =
190186 assert precondition $
191187 withArena mgr $ \ arena -> do
192- (rkixs, ioops) <- Class . stToIO $ prepLookups arena blooms indexes kopsFiles ks
188+ (rkixs, ioops) <- ST . stToIO $ prepLookups arena blooms indexes kopsFiles ks
193189 ioress <- submitIO hbio ioops
194190 intraPageLookups resolveV wb wbblobs rs ks rkixs ioops ioress
195191 where
@@ -201,6 +197,49 @@ lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks
201197 assert (V. length rs == V. length kopsFiles) $
202198 True
203199
200+ {-# SPECIALIZE lookupsIOWithoutWriteBuffer ::
201+ HasBlockIO IO h
202+ -> ArenaManager RealWorld
203+ -> ResolveSerialisedValue
204+ -> V.Vector (Ref (Run IO h))
205+ -> V.Vector (Bloom SerialisedKey)
206+ -> V.Vector Index
207+ -> V.Vector (Handle h)
208+ -> V.Vector SerialisedKey
209+ -> IO (LookupAcc IO h)
210+ #-}
211+ -- | Batched lookups in I\/O.
212+ --
213+ -- See Note [Batched lookups, buffer strategy and restrictions]
214+ --
215+ -- PRECONDITION: the vectors of bloom filters, indexes and file handles
216+ -- should pointwise match with the vectors of runs.
217+ lookupsIOWithoutWriteBuffer ::
218+ forall m h . (MonadThrow m , MonadST m )
219+ => HasBlockIO m h
220+ -> ArenaManager (PrimState m )
221+ -> ResolveSerialisedValue
222+ -> V. Vector (Ref (Run m h )) -- ^ Runs @rs@
223+ -> V. Vector (Bloom SerialisedKey ) -- ^ The bloom filters inside @rs@
224+ -> V. Vector Index -- ^ The indexes inside @rs@
225+ -> V. Vector (Handle h ) -- ^ The file handles to the key\/value files inside @rs@
226+ -> V. Vector SerialisedKey
227+ -> m (LookupAcc m h )
228+ lookupsIOWithoutWriteBuffer ! hbio ! mgr ! resolveV ! rs ! blooms ! indexes ! kopsFiles ! ks =
229+ assert precondition $
230+ withArena mgr $ \ arena -> do
231+ (rkixs, ioops) <- ST. stToIO $ prepLookups arena blooms indexes kopsFiles ks
232+ ioress <- submitIO hbio ioops
233+ intraPageLookupsOn resolveV (V. map (const Nothing ) ks) rs ks rkixs ioops ioress
234+ where
235+ -- we check only that the lengths match, because checking the contents is
236+ -- too expensive.
237+ precondition =
238+ assert (V. length rs == V. length blooms) $
239+ assert (V. length rs == V. length indexes) $
240+ assert (V. length rs == V. length kopsFiles) $
241+ True
242+
204243{-# SPECIALIZE intraPageLookups ::
205244 ResolveSerialisedValue
206245 -> WB.WriteBuffer
@@ -210,7 +249,7 @@ lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks
210249 -> VP.Vector RunIxKeyIx
211250 -> V.Vector (IOOp RealWorld h)
212251 -> VU.Vector IOResult
213- -> IO (V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef IO h))) )
252+ -> IO (LookupAcc IO h)
214253 #-}
215254-- | Intra-page lookups, and combining lookup results from multiple runs and
216255-- the write buffer.
@@ -229,7 +268,7 @@ intraPageLookups ::
229268 -> VP. Vector RunIxKeyIx
230269 -> V. Vector (IOOp (PrimState m ) h )
231270 -> VU. Vector IOResult
232- -> m (V. Vector ( Maybe ( Entry SerialisedValue ( WeakBlobRef m h ))) )
271+ -> m (LookupAcc m h )
233272intraPageLookups ! resolveV ! wb ! wbblobs ! rs ! ks ! rkixs ! ioops ! ioress = do
234273 -- We accumulate results into the 'res' vector. When there are several
235274 -- lookup hits for the same key then we combine the results. The combining
@@ -244,12 +283,59 @@ intraPageLookups !resolveV !wb !wbblobs !rs !ks !rkixs !ioops !ioress = do
244283 -- the surface API so that all the conversions can be done in one pass
245284 -- without intermediate allocations.
246285 --
247- res <- VM. generateM (V. length ks) $ \ ki ->
248- case WB. lookup wb (V. unsafeIndex ks ki) of
249- Nothing -> pure Nothing
250- Just e -> pure $! Just $! fmap (WBB. mkWeakBlobRef wbblobs) e
251- -- TODO: ^^ we should be able to avoid this allocation by
252- -- combining the conversion with other later conversions.
286+
287+ acc0 <-
288+ V. generateM (V. length ks) $ \ ki ->
289+ case WB. lookup wb (V. unsafeIndex ks ki) of
290+ Nothing -> pure Nothing
291+ Just e -> pure $! Just $! fmap (WBB. mkWeakBlobRef wbblobs) e
292+ -- TODO: ^^ we should be able to avoid this allocation by
293+ -- combining the conversion with other later conversions.
294+ intraPageLookupsOn resolveV acc0 rs ks rkixs ioops ioress
295+
296+ {-# SPECIALIZE intraPageLookupsOn ::
297+ ResolveSerialisedValue
298+ -> LookupAcc IO h
299+ -> V.Vector (Ref (Run IO h))
300+ -> V.Vector SerialisedKey
301+ -> VP.Vector RunIxKeyIx
302+ -> V.Vector (IOOp RealWorld h)
303+ -> VU.Vector IOResult
304+ -> IO (LookupAcc IO h)
305+ #-}
306+ -- | Intra-page lookups, and combining lookup results from multiple runs and
307+ -- the write buffer.
308+ --
309+ -- This function assumes that @rkixs@ is ordered such that newer runs are
310+ -- handled first. The order matters for resolving cases where we find the same
311+ -- key in multiple runs.
312+ --
313+ intraPageLookupsOn ::
314+ forall m h . (PrimMonad m , MonadThrow m )
315+ => ResolveSerialisedValue
316+ -> LookupAcc m h
317+ -> V. Vector (Ref (Run m h ))
318+ -> V. Vector SerialisedKey
319+ -> VP. Vector RunIxKeyIx
320+ -> V. Vector (IOOp (PrimState m ) h )
321+ -> VU. Vector IOResult
322+ -> m (LookupAcc m h )
323+ intraPageLookupsOn ! resolveV ! acc0 ! rs ! ks ! rkixs ! ioops ! ioress =
324+ assert (V. length acc0 == V. length ks) $ do
325+ -- We accumulate results into the 'res' vector. When there are several
326+ -- lookup hits for the same key then we combine the results. The combining
327+ -- operator is associative but not commutative, so we must do this in the
328+ -- right order. We start with the write buffer lookup results and then go
329+ -- through the run lookup results in rkixs, which must be ordered by run.
330+ --
331+ -- TODO: reassess the representation of the result vector to try to reduce
332+ -- intermediate allocations. For example use a less convenient
333+ -- representation with several vectors (e.g. separate blob info) and
334+ -- convert to the final convenient representation in a single pass near
335+ -- the surface API so that all the conversions can be done in one pass
336+ -- without intermediate allocations.
337+ --
338+ res <- V. unsafeThaw acc0
253339 loop res 0
254340 V. unsafeFreeze res
255341 where
0 commit comments