Skip to content

Commit 59801ee

Browse files
authored
Merge pull request #483 from IntersectMBO/jeltsch/polymorphic-index-periphery
Make `headerLBS` and `finalLBS` index-type-polymorphic
2 parents 82df7e4 + 0d0eab5 commit 59801ee

File tree

6 files changed

+97
-33
lines changed

6 files changed

+97
-33
lines changed

src/Database/LSMTree/Internal/ChecksumHandle.hs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
{-# LANGUAGE MagicHash #-}
2+
13
module Database.LSMTree.Internal.ChecksumHandle
24
(
35
-- * Checksum handles
@@ -44,6 +46,7 @@ import qualified Database.LSMTree.Internal.RawOverflowPage as RawOverflowPage
4446
import Database.LSMTree.Internal.RawPage (RawPage)
4547
import qualified Database.LSMTree.Internal.RawPage as RawPage
4648
import Database.LSMTree.Internal.Serialise
49+
import GHC.Exts (Proxy#)
4750
import qualified System.FS.API as FS
4851
import System.FS.API
4952
import qualified System.FS.BlockIO.API as FS
@@ -204,15 +207,17 @@ writeFilter hfs filterHandle bf =
204207
{-# SPECIALISE writeIndexHeader ::
205208
HasFS IO h
206209
-> ForIndex (ChecksumHandle RealWorld h)
210+
-> Proxy# IndexCompact
207211
-> IO () #-}
208212
writeIndexHeader ::
209213
(MonadSTM m, PrimMonad m)
210214
=> HasFS m h
211215
-> ForIndex (ChecksumHandle (PrimState m) h)
216+
-> Proxy# IndexCompact
212217
-> m ()
213-
writeIndexHeader hfs indexHandle =
218+
writeIndexHeader hfs indexHandle indexTypeProxy =
214219
writeToHandle hfs (unForIndex indexHandle) $
215-
Index.headerLBS
220+
Index.headerLBS indexTypeProxy
216221

217222
{-# SPECIALISE writeIndexChunk ::
218223
HasFS IO h

src/Database/LSMTree/Internal/Index.hs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE MagicHash #-}
12
{-# LANGUAGE TypeFamilies #-}
23

34
{-|
@@ -6,20 +7,35 @@
67
-}
78
module Database.LSMTree.Internal.Index
89
(
9-
Index (search, sizeInPages, fromSBS),
10+
Index (search, sizeInPages, headerLBS, finalLBS, fromSBS),
1011
IndexAcc (ResultingIndex, appendSingle, appendMulti, unsafeEnd)
1112
)
1213
where
1314

1415
import Control.Monad.ST.Strict (ST)
16+
import Data.ByteString.Lazy (LazyByteString)
1517
import Data.ByteString.Short (ShortByteString)
1618
import Data.Word (Word32)
1719
import Database.LSMTree.Internal.Chunk (Chunk)
1820
import Database.LSMTree.Internal.Entry (NumEntries)
1921
import Database.LSMTree.Internal.Page (NumPages, PageSpan)
2022
import Database.LSMTree.Internal.Serialise (SerialisedKey)
23+
import GHC.Exts (Proxy#)
2124

22-
-- | The class of index types.
25+
{-|
26+
The class of index types.
27+
28+
This class contains also methods for the non-incremental parts of otherwise
29+
incremental serialisation. To completely serialise an index interleaved with
30+
its construction, proceed as follows:
31+
32+
1. Use 'headerLBS' to generate the header of the serialised index.
33+
34+
2. Incrementally construct the index using the methods of 'IndexAcc', and
35+
assemble the body of the serialised index from the generated chunks.
36+
37+
3. Use 'finalLBS' to generate the footer of the serialised index.
38+
-}
2339
class Index i where
2440

2541
{-|
@@ -33,6 +49,21 @@ class Index i where
3349
-- | Yields the number of pages covered by an index.
3450
sizeInPages :: i -> NumPages
3551

52+
{-|
53+
Yields the header of the serialised form of an index.
54+
55+
See the documentation of the 'Index' class for how to generate a
56+
complete serialised index.
57+
-}
58+
headerLBS :: Proxy# i -> LazyByteString
59+
60+
{-|
61+
Yields the footer of the serialised form of an index.
62+
63+
See the documentation of the 'Index' class for how to generate a
64+
complete serialised index.
65+
-}
66+
finalLBS :: NumEntries -> i -> LazyByteString
3667
{-|
3768
Reads an index along with the number of entries of the respective run
3869
from a byte string.

src/Database/LSMTree/Internal/Index/Compact.hs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
{-# LANGUAGE MagicHash #-}
2+
13
-- | A compact fence-pointer index for uniformly distributed keys.
24
--
35
-- TODO: add utility functions for clash probability calculations
@@ -13,9 +15,8 @@ module Database.LSMTree.Internal.Index.Compact (
1315
-- * Non-incremental serialisation
1416
, toLBS
1517
-- * Incremental serialisation
16-
-- $incremental-serialisation
17-
, headerLBS
18-
, finalLBS
18+
, Index.headerLBS
19+
, Index.finalLBS
1920
, word64VectorToChunk
2021
-- * Deserialisation
2122
, Index.fromSBS
@@ -49,12 +50,13 @@ import Database.LSMTree.Internal.Chunk (Chunk (Chunk))
4950
import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString)
5051
import Database.LSMTree.Internal.Entry (NumEntries (..))
5152
import Database.LSMTree.Internal.Index (Index)
52-
import qualified Database.LSMTree.Internal.Index as Index (fromSBS, search,
53-
sizeInPages)
53+
import qualified Database.LSMTree.Internal.Index as Index (finalLBS, fromSBS,
54+
headerLBS, search, sizeInPages)
5455
import Database.LSMTree.Internal.Page
5556
import Database.LSMTree.Internal.Serialise
5657
import Database.LSMTree.Internal.Unsliced
5758
import Database.LSMTree.Internal.Vector
59+
import GHC.Exts (Proxy#, proxy#)
5860

5961
{- $compact
6062
@@ -460,38 +462,34 @@ sizeInPages = NumPages . toEnum . VU.length . icPrimary
460462
-- | Serialises a compact index in one go.
461463
toLBS :: NumEntries -> IndexCompact -> LBS.ByteString
462464
toLBS numEntries index =
463-
headerLBS
465+
headerLBS (proxy# @IndexCompact)
464466
<> LBS.fromStrict (Chunk.toByteString (word64VectorToChunk (icPrimary index)))
465467
<> finalLBS numEntries index
466468

467469
{-------------------------------------------------------------------------------
468470
Incremental serialisation
469471
-------------------------------------------------------------------------------}
470472

471-
{- $incremental-serialisation
472-
473-
To incrementally serialise a compact index as it is being constructed, start
474-
by using 'headerLBS'. Each yielded chunk can then be written using
475-
'Chunk.toByteString'. Once construction is completed, 'finalLBS' will
476-
serialise the remaining parts of the compact index.
477-
Also see module "Database.LSMTree.Internal.Index.CompactAcc".
478-
-}
479-
480473
-- | By writing out the type–version indicator in host endianness, we also
481474
-- indicate endianness. During deserialisation, we would discover an endianness
482475
-- mismatch.
483476
supportedTypeAndVersion :: Word32
484477
supportedTypeAndVersion = 0x0001
485478

486-
-- | 64 bits, to be used before writing any other parts of the serialised file!
487-
headerLBS :: LBS.ByteString
488-
headerLBS =
479+
{-|
480+
For a specification of this operation, see the documentation of [its
481+
polymorphic version]('Index.headerLBS').
482+
-}
483+
headerLBS :: Proxy# IndexCompact -> LBS.ByteString
484+
headerLBS _ =
489485
-- create a single 4 byte chunk
490486
BB.toLazyByteStringWith (BB.safeStrategy 4 BB.smallChunkSize) mempty $
491487
BB.word32Host supportedTypeAndVersion <> BB.word32Host 0
492488

493-
-- | Writes everything after the primary array, which is assumed to have already
494-
-- been written using 'Chunk.toByteString'.
489+
{-|
490+
For a specification of this operation, see the documentation of [its
491+
polymorphic version]('Index.finalLBS').
492+
-}
495493
finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString
496494
finalLBS (NumEntries numEntries) IndexCompact {..} =
497495
-- use a builder, since it is all relatively small
@@ -690,6 +688,12 @@ instance Index IndexCompact where
690688
sizeInPages :: IndexCompact -> NumPages
691689
sizeInPages = sizeInPages
692690

691+
headerLBS :: Proxy# IndexCompact -> LBS.ByteString
692+
headerLBS = headerLBS
693+
694+
finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString
695+
finalLBS = finalLBS
696+
693697
fromSBS :: ShortByteString -> Either String (NumEntries, IndexCompact)
694698
fromSBS = fromSBS
695699

src/Database/LSMTree/Internal/Index/Ordinary.hs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
{-# LANGUAGE MagicHash #-}
2+
13
{- HLINT ignore "Avoid restricted alias" -}
24

35
-- | A general-purpose fence pointer index.
@@ -15,6 +17,9 @@ import Prelude hiding (drop, last, length)
1517

1618
import Control.Exception (assert)
1719
import Control.Monad (when)
20+
import Data.ByteString.Builder (toLazyByteString)
21+
import Data.ByteString.Builder.Extra (word32Host, word64Host)
22+
import Data.ByteString.Lazy (LazyByteString)
1823
import Data.ByteString.Short (ShortByteString (SBS))
1924
import qualified Data.ByteString.Short as ShortByteString (length)
2025
import Data.Primitive.ByteArray (ByteArray (ByteArray),
@@ -24,14 +29,16 @@ import Data.Vector (Vector, drop, findIndex, findIndexR, fromList,
2429
import qualified Data.Vector.Primitive as Primitive (Vector (Vector), drop,
2530
force, length, null, splitAt, take)
2631
import Data.Word (Word16, Word32, Word64, Word8, byteSwap32)
27-
import Database.LSMTree.Internal.Entry (NumEntries (NumEntries))
32+
import Database.LSMTree.Internal.Entry (NumEntries (NumEntries),
33+
unNumEntries)
2834
import Database.LSMTree.Internal.Index
29-
(Index (fromSBS, search, sizeInPages))
35+
(Index (finalLBS, fromSBS, headerLBS, search, sizeInPages))
3036
import Database.LSMTree.Internal.Page (NumPages (NumPages),
3137
PageNo (PageNo), PageSpan (PageSpan))
3238
import Database.LSMTree.Internal.Serialise
3339
(SerialisedKey (SerialisedKey'))
3440
import Database.LSMTree.Internal.Vector (binarySearchL, mkPrimVector)
41+
import GHC.Exts (Proxy#)
3542

3643
{-|
3744
The type–version indicator for the ordinary index and its serialisation
@@ -99,6 +106,18 @@ instance Index IndexOrdinary where
99106
sizeInPages (IndexOrdinary lastKeys)
100107
= NumPages $ fromIntegral (length lastKeys)
101108

109+
headerLBS :: Proxy# IndexOrdinary -> LazyByteString
110+
headerLBS _ = toLazyByteString $
111+
word32Host $
112+
supportedTypeAndVersion
113+
114+
finalLBS :: NumEntries -> IndexOrdinary -> LazyByteString
115+
finalLBS entryCount _ = toLazyByteString $
116+
word64Host $
117+
fromIntegral $
118+
unNumEntries $
119+
entryCount
120+
102121
fromSBS :: ShortByteString -> Either String (NumEntries, IndexOrdinary)
103122
fromSBS shortByteString@(SBS unliftedByteArray)
104123
| fullSize < 12
@@ -123,12 +142,12 @@ instance Index IndexOrdinary where
123142
typeAndVersion :: Word32
124143
typeAndVersion = indexByteArray byteArray 0
125144

126-
postVersionBytes :: Primitive.Vector Word8
127-
postVersionBytes = Primitive.drop 4 fullBytes
145+
postTypeAndVersionBytes :: Primitive.Vector Word8
146+
postTypeAndVersionBytes = Primitive.drop 4 fullBytes
128147

129148
lastKeysBytes, entryCountBytes :: Primitive.Vector Word8
130149
(lastKeysBytes, entryCountBytes)
131-
= Primitive.splitAt (fullSize - 12) postVersionBytes
150+
= Primitive.splitAt (fullSize - 12) postTypeAndVersionBytes
132151

133152
entryCount :: Either String NumEntries
134153
entryCount

src/Database/LSMTree/Internal/RunBuilder.hs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
{-# LANGUAGE MagicHash #-}
2+
13
-- | A mutable run ('RunBuilder') that is under construction.
24
--
35
module Database.LSMTree.Internal.RunBuilder (
@@ -30,6 +32,7 @@ import Database.LSMTree.Internal.RawPage (RawPage)
3032
import Database.LSMTree.Internal.RunAcc (RunAcc, RunBloomFilterAlloc)
3133
import qualified Database.LSMTree.Internal.RunAcc as RunAcc
3234
import Database.LSMTree.Internal.Serialise
35+
import GHC.Exts (proxy#)
3336
import qualified System.FS.API as FS
3437
import System.FS.API (HasFS)
3538
import qualified System.FS.BlockIO.API as FS
@@ -90,7 +93,7 @@ new hfs hbio runBuilderFsPaths numEntries alloc = do
9093
runBuilderHandles <- traverse (makeHandle hfs) (pathsForRunFiles runBuilderFsPaths)
9194

9295
let builder = RunBuilder { runBuilderHasFS = hfs, runBuilderHasBlockIO = hbio, .. }
93-
writeIndexHeader hfs (forRunIndex runBuilderHandles)
96+
writeIndexHeader hfs (forRunIndex runBuilderHandles) (proxy# @IndexCompact)
9497
return builder
9598

9699
{-# SPECIALISE addKeyOp ::

test/Test/Database/LSMTree/Internal/Index/Compact.hs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{-# LANGUAGE LambdaCase #-}
2+
{-# LANGUAGE MagicHash #-}
23
{-# LANGUAGE RecordWildCards #-}
34
{-# OPTIONS_GHC -Wno-orphans #-}
45
{- HLINT ignore "Eta reduce" -}
@@ -37,6 +38,7 @@ import Database.LSMTree.Internal.Index.CompactAcc as IndexCompact
3738
import Database.LSMTree.Internal.Page (PageNo (PageNo), PageSpan,
3839
multiPage, singlePage)
3940
import Database.LSMTree.Internal.Serialise
41+
import GHC.Exts (proxy#)
4042
import Numeric (showHex)
4143
import Prelude hiding (max, min, pi)
4244
import qualified Test.QuickCheck as QC
@@ -99,7 +101,7 @@ tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [
99101
, 7, 0
100102
]
101103

102-
let header = LBS.unpack headerLBS
104+
let header = LBS.unpack (headerLBS (proxy# @IndexCompact))
103105
let primary = LBS.unpack $
104106
LBS.fromChunks (map Chunk.toByteString chunks)
105107
let rest = LBS.unpack (finalLBS (NumEntries 7) index)
@@ -231,7 +233,7 @@ prop_roundtrip_chunks (Chunks chunks index) numEntries =
231233
counterexample ("rest:\n" <> showBS bsRest) $
232234
Right (numEntries, index) === fromSBS sbs
233235
where
234-
bsVersion = headerLBS
236+
bsVersion = headerLBS (proxy# @IndexCompact)
235237
bsPrimary = LBS.fromChunks $
236238
map (Chunk.toByteString . word64VectorToChunk) chunks
237239
bsRest = finalLBS numEntries index
@@ -293,7 +295,7 @@ writeIndexCompact numEntries (ChunkSize csize) ps = runST $ do
293295
cs <- mapM (`append` ica) (toAppends ps)
294296
(c, index) <- unsafeEnd ica
295297
return
296-
( headerLBS
298+
( headerLBS (proxy# @IndexCompact)
297299
, LBS.fromChunks $
298300
foldMap (map Chunk.toByteString) $ cs <> pure (toList c)
299301
, finalLBS numEntries index

0 commit comments

Comments
 (0)