Skip to content

Commit 63e5e06

Browse files
authored
Merge pull request #460 from IntersectMBO/jeltsch/common-chunk-type
Make the different index types use a common chunk type
2 parents 48e22c4 + aa81901 commit 63e5e06

File tree

8 files changed

+72
-59
lines changed

8 files changed

+72
-59
lines changed

src/Database/LSMTree/Internal/Chunk.hs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ module Database.LSMTree.Internal.Chunk
55
(
66
-- * Chunks
77
Chunk (Chunk),
8-
fromChunk,
8+
toByteVector,
9+
toByteString,
910

1011
-- * Balers
1112
Baler,
@@ -19,23 +20,31 @@ import Prelude hiding (length)
1920

2021
import Control.Exception (assert)
2122
import Control.Monad.ST.Strict (ST)
23+
import Data.ByteString (ByteString)
2224
import Data.List (scanl')
2325
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
2426
writePrimVar)
25-
import Data.Vector.Primitive (Vector, length, unsafeCopy,
27+
import Data.Vector.Primitive (Vector (Vector), length, unsafeCopy,
2628
unsafeFreeze)
2729
import Data.Vector.Primitive.Mutable (MVector)
2830
import qualified Data.Vector.Primitive.Mutable as Mutable (drop, length, slice,
2931
take, unsafeCopy, unsafeNew)
3032
import Data.Word (Word8)
33+
import Database.LSMTree.Internal.ByteString (byteArrayToByteString)
3134

3235
-- * Chunks
3336

3437
-- | A chunk of bytes, typically output during incremental index serialisation.
35-
newtype Chunk = Chunk (Vector Word8)
38+
newtype Chunk = Chunk (Vector Word8) deriving stock (Eq, Show)
3639

37-
fromChunk :: Chunk -> Vector Word8
38-
fromChunk (Chunk content) = content
40+
-- | Yields the contents of a chunk as a byte vector.
41+
toByteVector :: Chunk -> Vector Word8
42+
toByteVector (Chunk byteVector) = byteVector
43+
44+
-- | Yields the contents of a chunk as a (strict) byte string.
45+
toByteString :: Chunk -> ByteString
46+
toByteString (Chunk (Vector vecOffset vecLength byteArray))
47+
= byteArrayToByteString vecOffset vecLength byteArray
3948

4049
-- * Balers
4150

src/Database/LSMTree/Internal/IndexCompact.hs

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@ module Database.LSMTree.Internal.IndexCompact (
1414
, toLBS
1515
-- * Incremental serialisation
1616
-- $incremental-serialisation
17-
, Chunk (..)
1817
, headerLBS
19-
, chunkToBS
2018
, finalLBS
19+
, word64VectorToChunk
2120
-- * Deserialisation
2221
, fromSBS
2322
) where
@@ -27,7 +26,6 @@ import Control.Monad (when)
2726
import Control.Monad.ST
2827
import Data.Bit hiding (flipBit)
2928
import Data.Bits (unsafeShiftR, (.&.))
30-
import qualified Data.ByteString as BS
3129
import qualified Data.ByteString.Builder as BB
3230
import qualified Data.ByteString.Builder.Extra as BB
3331
import qualified Data.ByteString.Lazy as LBS
@@ -46,8 +44,9 @@ import qualified Data.Vector.Unboxed as VU
4644
import qualified Data.Vector.Unboxed.Base as VU
4745
import Data.Word
4846
import Database.LSMTree.Internal.BitMath
49-
import Database.LSMTree.Internal.ByteString (byteArrayFromTo,
50-
byteArrayToByteString)
47+
import Database.LSMTree.Internal.ByteString (byteArrayFromTo)
48+
import Database.LSMTree.Internal.Chunk (Chunk (Chunk))
49+
import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString)
5150
import Database.LSMTree.Internal.Entry (NumEntries (..))
5251
import Database.LSMTree.Internal.Page
5352
import Database.LSMTree.Internal.Serialise
@@ -456,7 +455,7 @@ sizeInPages = NumPages . toEnum . VU.length . icPrimary
456455
toLBS :: NumEntries -> IndexCompact -> LBS.ByteString
457456
toLBS numEntries index =
458457
headerLBS
459-
<> LBS.fromStrict (chunkToBS (Chunk (icPrimary index)))
458+
<> LBS.fromStrict (Chunk.toByteString (word64VectorToChunk (icPrimary index)))
460459
<> finalLBS numEntries index
461460

462461
{-------------------------------------------------------------------------------
@@ -467,8 +466,8 @@ toLBS numEntries index =
467466
468467
To incrementally serialise a compact index as it is being constructed, start
469468
by using 'headerLBS'. Each yielded chunk can then be written using
470-
'chunkToBS'. Once construction is completed, 'finalLBS' will serialise
471-
the remaining parts of the compact index.
469+
'Chunk.toByteString'. Once construction is completed, 'finalLBS' will
470+
serialise the remaining parts of the compact index.
472471
Also see module "Database.LSMTree.Internal.IndexCompactAcc".
473472
-}
474473

@@ -485,17 +484,8 @@ headerLBS =
485484
BB.toLazyByteStringWith (BB.safeStrategy 4 BB.smallChunkSize) mempty $
486485
BB.word32Host supportedTypeAndVersion <> BB.word32Host 0
487486

488-
-- | A chunk of the primary array, which can be constructed incrementally.
489-
data Chunk = Chunk { cPrimary :: !(VU.Vector Word64) }
490-
deriving stock (Show, Eq)
491-
492-
-- | 64 bit aligned.
493-
chunkToBS :: Chunk -> BS.ByteString
494-
chunkToBS (Chunk (VU.V_Word64 (VP.Vector off len ba))) =
495-
byteArrayToByteString (mul8 off) (mul8 len) ba
496-
497487
-- | Writes everything after the primary array, which is assumed to have already
498-
-- been written using 'chunkToBS'.
488+
-- been written using 'Chunk.toByteString'.
499489
finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString
500490
finalLBS (NumEntries numEntries) IndexCompact {..} =
501491
-- use a builder, since it is all relatively small
@@ -508,6 +498,11 @@ finalLBS (NumEntries numEntries) IndexCompact {..} =
508498
where
509499
numPages = VU.length icPrimary
510500

501+
-- | Constructs a chunk containing the contents of a vector of 64-bit words.
502+
word64VectorToChunk :: VU.Vector Word64 -> Chunk
503+
word64VectorToChunk (VU.V_Word64 (VP.Vector off len ba)) =
504+
Chunk (mkPrimVector (mul8 off) (mul8 len) ba)
505+
511506
-- | Padded to 64 bit.
512507
--
513508
-- Assumes that the bitvector has a byte-aligned offset.

src/Database/LSMTree/Internal/IndexCompactAcc.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ module Database.LSMTree.Internal.IndexCompactAcc (
1515
IndexCompactAcc (..)
1616
, new
1717
, Append (..)
18-
, Chunk (..)
1918
, append
2019
, appendSingle
2120
, appendMulti
@@ -49,6 +48,7 @@ import qualified Data.Vector.Unboxed as VU
4948
import qualified Data.Vector.Unboxed.Mutable as VUM
5049
import Data.Word
5150
import Database.LSMTree.Internal.BitMath
51+
import Database.LSMTree.Internal.Chunk (Chunk)
5252
import Database.LSMTree.Internal.IndexCompact
5353
import Database.LSMTree.Internal.Page
5454
import Database.LSMTree.Internal.Serialise
@@ -243,7 +243,7 @@ yield IndexCompactAcc{..} = do
243243
modifySTRef' icaPrimary . NE.cons =<< newPinnedMVec64 icaMaxChunkSize
244244
modifySTRef' icaClashes . NE.cons =<< VUM.new icaMaxChunkSize
245245
modifySTRef' icaLargerThanPage . NE.cons =<< VUM.new icaMaxChunkSize
246-
pure $ Just (Chunk primaryChunk)
246+
pure $ Just (word64VectorToChunk primaryChunk)
247247
else -- the current chunk is not yet full
248248
pure Nothing
249249

@@ -268,7 +268,7 @@ unsafeEnd IndexCompactAcc{..} = do
268268
-- Only slice out a chunk if there are entries in the chunk
269269
let mchunk = if ix == 0
270270
then Nothing
271-
else Just (Chunk (head chunksPrimary))
271+
else Just (word64VectorToChunk (head chunksPrimary))
272272

273273
let icPrimary = VU.concat . reverse $ chunksPrimary
274274
let icClashes = VU.concat . reverse $ chunksClashes

src/Database/LSMTree/Internal/RunAcc.hs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar,
4242
import Data.Word (Word64)
4343
import Database.LSMTree.Internal.Assertions (fromIntegralChecked)
4444
import Database.LSMTree.Internal.BlobRef (BlobSpan (..))
45+
import Database.LSMTree.Internal.Chunk (Chunk)
4546
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
4647
import Database.LSMTree.Internal.IndexCompact (IndexCompact)
47-
import qualified Database.LSMTree.Internal.IndexCompact as Index
4848
import Database.LSMTree.Internal.IndexCompactAcc (IndexCompactAcc)
4949
import qualified Database.LSMTree.Internal.IndexCompactAcc as Index
5050
import Database.LSMTree.Internal.PageAcc (PageAcc)
@@ -116,7 +116,7 @@ new (NumEntries nentries) alloc = do
116116
unsafeFinalise ::
117117
RunAcc s
118118
-> ST s ( Maybe RawPage
119-
, Maybe Index.Chunk
119+
, Maybe Chunk
120120
, Bloom SerialisedKey
121121
, IndexCompact
122122
, NumEntries
@@ -130,9 +130,9 @@ unsafeFinalise racc@RunAcc {..} = do
130130
!mchunk = selectChunk mpagemchunk mchunk'
131131
pure (mpage, mchunk, bloom, index, NumEntries numEntries)
132132
where
133-
selectChunk :: Maybe (RawPage, Maybe Index.Chunk)
134-
-> Maybe Index.Chunk
135-
-> Maybe Index.Chunk
133+
selectChunk :: Maybe (RawPage, Maybe Chunk)
134+
-> Maybe Chunk
135+
-> Maybe Chunk
136136
selectChunk (Just (_page, Just _chunk)) (Just _chunk') =
137137
-- If flushing the page accumulator gives us an index chunk then
138138
-- the index can't have any more chunks when we finalise the index.
@@ -154,13 +154,13 @@ addKeyOp
154154
:: RunAcc s
155155
-> SerialisedKey
156156
-> Entry SerialisedValue BlobSpan -- ^ the full value, not just a prefix
157-
-> ST s ([RawPage], [RawOverflowPage], [Index.Chunk])
157+
-> ST s ([RawPage], [RawOverflowPage], [Chunk])
158158
addKeyOp racc k e
159159
| PageAcc.entryWouldFitInPage k e = smallToLarge <$> addSmallKeyOp racc k e
160160
| otherwise = addLargeKeyOp racc k e
161161
where
162-
smallToLarge :: Maybe (RawPage, Maybe Index.Chunk)
163-
-> ([RawPage], [RawOverflowPage], [Index.Chunk])
162+
smallToLarge :: Maybe (RawPage, Maybe Chunk)
163+
-> ([RawPage], [RawOverflowPage], [Chunk])
164164
smallToLarge Nothing = ([], [], [])
165165
smallToLarge (Just (page, Nothing)) = ([page], [], [])
166166
smallToLarge (Just (page, Just chunk)) = ([page], [], [chunk])
@@ -179,7 +179,7 @@ addSmallKeyOp
179179
:: RunAcc s
180180
-> SerialisedKey
181181
-> Entry SerialisedValue BlobSpan
182-
-> ST s (Maybe (RawPage, Maybe Index.Chunk))
182+
-> ST s (Maybe (RawPage, Maybe Chunk))
183183
addSmallKeyOp racc@RunAcc{..} k e =
184184
assert (PageAcc.entryWouldFitInPage k e) $ do
185185
modifyPrimVar entryCount (+1)
@@ -225,7 +225,7 @@ addLargeKeyOp
225225
:: RunAcc s
226226
-> SerialisedKey
227227
-> Entry SerialisedValue BlobSpan -- ^ the full value, not just a prefix
228-
-> ST s ([RawPage], [RawOverflowPage], [Index.Chunk])
228+
-> ST s ([RawPage], [RawOverflowPage], [Chunk])
229229
addLargeKeyOp racc@RunAcc{..} k e =
230230
assert (not (PageAcc.entryWouldFitInPage k e)) $ do
231231
modifyPrimVar entryCount (+1)
@@ -276,7 +276,7 @@ addLargeSerialisedKeyOp
276276
-- first page of a multi-page representation of a single
277277
-- key\/op /without/ a 'BlobSpan'.
278278
-> [RawOverflowPage] -- ^ The overflow pages for this key\/op
279-
-> ST s ([RawPage], [RawOverflowPage], [Index.Chunk])
279+
-> ST s ([RawPage], [RawOverflowPage], [Chunk])
280280
addLargeSerialisedKeyOp racc@RunAcc{..} k page overflowPages =
281281
assert (RawPage.rawPageNumKeys page == 1) $
282282
assert (RawPage.rawPageHasBlobSpanAt page 0 == 0) $
@@ -299,7 +299,7 @@ addLargeSerialisedKeyOp racc@RunAcc{..} k page overflowPages =
299299
--
300300
-- Returns @Nothing@ if the page accumulator was empty.
301301
--
302-
flushPageIfNonEmpty :: RunAcc s -> ST s (Maybe (RawPage, Maybe Index.Chunk))
302+
flushPageIfNonEmpty :: RunAcc s -> ST s (Maybe (RawPage, Maybe Chunk))
303303
flushPageIfNonEmpty RunAcc{mpageacc, mindex} = do
304304
nkeys <- PageAcc.keysCountPageAcc mpageacc
305305
if nkeys > 0
@@ -320,10 +320,10 @@ flushPageIfNonEmpty RunAcc{mpageacc, mindex} = do
320320
-- Combine the result of 'flushPageIfNonEmpty' with extra pages and index
321321
-- chunks.
322322
--
323-
selectPagesAndChunks :: Maybe (RawPage, Maybe Index.Chunk)
323+
selectPagesAndChunks :: Maybe (RawPage, Maybe Chunk)
324324
-> RawPage
325-
-> [Index.Chunk]
326-
-> ([RawPage], [Index.Chunk])
325+
-> [Chunk]
326+
-> ([RawPage], [Chunk])
327327
selectPagesAndChunks mpagemchunkPre page chunks =
328328
case mpagemchunkPre of
329329
Nothing -> ( [page], chunks)

src/Database/LSMTree/Internal/RunBuilder.hs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import Data.Word (Word64)
2525
import Database.LSMTree.Internal.BlobRef (BlobSpan (..), RawBlobRef)
2626
import qualified Database.LSMTree.Internal.BlobRef as BlobRef
2727
import Database.LSMTree.Internal.BloomFilter (bloomFilterToLBS)
28+
import Database.LSMTree.Internal.Chunk (Chunk)
29+
import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString)
2830
import Database.LSMTree.Internal.CRC32C (CRC32C)
2931
import qualified Database.LSMTree.Internal.CRC32C as CRC
3032
import Database.LSMTree.Internal.Entry
@@ -313,16 +315,16 @@ writeIndexHeader RunBuilder {..} =
313315

314316
{-# SPECIALISE writeIndexChunk ::
315317
RunBuilder IO h
316-
-> Index.Chunk
318+
-> Chunk
317319
-> IO () #-}
318320
writeIndexChunk ::
319321
(MonadSTM m, PrimMonad m)
320322
=> RunBuilder m h
321-
-> Index.Chunk
323+
-> Chunk
322324
-> m ()
323325
writeIndexChunk RunBuilder {..} chunk =
324326
writeToHandle runBuilderHasFS (forRunIndex runBuilderHandles) $
325-
BSL.fromStrict $ Index.chunkToBS chunk
327+
BSL.fromStrict $ Chunk.toByteString chunk
326328

327329
{-# SPECIALISE writeIndexFinal ::
328330
RunBuilder IO h

test/Test/Database/LSMTree/Internal/Chunk.hs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import Data.Word (Word8)
1111
import Database.LSMTree.Extras.Generators ()
1212
-- for @Arbitrary@ instantiation of @Vector@
1313
import Database.LSMTree.Internal.Chunk (Chunk, createBaler, feedBaler,
14-
fromChunk, unsafeEndBaler)
14+
toByteVector, unsafeEndBaler)
1515
import Test.QuickCheck (Arbitrary (arbitrary, shrink),
1616
NonEmptyList (NonEmpty), Positive (Positive, getPositive),
1717
Property, Small (Small, getSmall), Testable, scale,
@@ -94,7 +94,8 @@ prop_contentIsPreserved (MinChunkSize minChunkSize) food
9494
input = concat (List.concat food)
9595

9696
output :: Vector Word8
97-
output = concat (fromChunk <$> catMaybes (commonChunks ++ [remnant]))
97+
output = concat $
98+
toByteVector <$> catMaybes (commonChunks ++ [remnant])
9899

99100
in input === output
100101

@@ -108,12 +109,13 @@ prop_noRemnantAfterOutput (MinChunkSize minChunkSize) (NonEmpty food)
108109
prop_commonChunksAreLarge :: MinChunkSize -> [[Vector Word8]] -> Property
109110
prop_commonChunksAreLarge (MinChunkSize minChunkSize) food
110111
= withBalingOutput minChunkSize food $ \ commonChunks _ ->
111-
all (fromChunk >>> length >>> (>= minChunkSize)) (catMaybes commonChunks)
112+
all (toByteVector >>> length >>> (>= minChunkSize)) $
113+
catMaybes commonChunks
112114

113115
remnantChunkSizeIs :: (Int -> Bool) -> Int -> [[Vector Word8]] -> Property
114116
remnantChunkSizeIs constraint minChunkSize food
115117
= withBalingOutput minChunkSize food $ \ _ remnant ->
116-
isJust remnant ==> constraint (length (fromChunk (fromJust remnant)))
118+
isJust remnant ==> constraint (length (toByteVector (fromJust remnant)))
117119

118120
prop_remnantChunkIsNonEmpty :: MinChunkSize -> [[Vector Word8]] -> Property
119121
prop_remnantChunkIsNonEmpty (MinChunkSize minChunkSize)

0 commit comments

Comments
 (0)