Skip to content

Commit c361efb

Browse files
authored
Merge pull request #419 from IntersectMBO/jeltsch/growing-vector
Enable ordinary-index building without a page count bound
2 parents 2e76991 + cce6276 commit c361efb

File tree

4 files changed

+144
-45
lines changed

4 files changed

+144
-45
lines changed

lsm-tree.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ library
157157
Database.LSMTree.Internal.UniqCounter
158158
Database.LSMTree.Internal.Unsliced
159159
Database.LSMTree.Internal.Vector
160+
Database.LSMTree.Internal.Vector.Growing
160161
Database.LSMTree.Internal.WriteBuffer
161162
Database.LSMTree.Internal.WriteBufferBlobs
162163
Database.LSMTree.Monoidal

src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,6 @@ import Prelude hiding (take)
1717

1818
import Control.Exception (assert)
1919
import Control.Monad.ST.Strict (ST)
20-
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
21-
writePrimVar)
22-
import Data.Vector (force, take, unsafeFreeze)
23-
import Data.Vector.Mutable (MVector)
24-
import qualified Data.Vector.Mutable as Mutable (unsafeNew, write)
2520
import qualified Data.Vector.Primitive as Primitive (Vector, length)
2621
import Data.Word (Word16, Word8)
2722
import Database.LSMTree.Internal.Chunk (Baler, Chunk, createBaler,
@@ -33,33 +28,28 @@ import Database.LSMTree.Internal.IndexOrdinary
3328
import Database.LSMTree.Internal.Serialise
3429
(SerialisedKey (SerialisedKey'))
3530
import Database.LSMTree.Internal.Vector (byteVectorFromPrim)
31+
import Database.LSMTree.Internal.Vector.Growing (GrowingVector)
32+
import qualified Database.LSMTree.Internal.Vector.Growing as Growing (append,
33+
freeze, new)
3634

3735
{-|
3836
A general-purpose fence pointer index under incremental construction.
3937
40-
A value @IndexOrdinaryAcc buffer keyCountRef baler@ denotes a partially
41-
constructed index with the following properties:
42-
43-
* The keys that the index assigns to pages are stored as a prefix of the
44-
mutable vector @buffer@.
45-
* The reference @keyCountRef@ points to the number of those keys.
46-
* The @baler@ object is used by the index for incremental output of the
47-
serialised key list.
38+
A value @IndexOrdinaryAcc lastKeys baler@ denotes a partially constructed
39+
index that assigns keys to pages according to @lastKeys@ and uses @baler@
40+
for incremental output of the serialised key list.
4841
-}
4942
data IndexOrdinaryAcc s = IndexOrdinaryAcc
50-
!(MVector s SerialisedKey)
51-
!(PrimVar s Int)
43+
!(GrowingVector s SerialisedKey)
5244
!(Baler s)
5345

5446
-- | Creates a new, initially empty, index.
55-
new :: Int -- ^ Maximum number of keys
47+
new :: Int -- ^ Initial size of the key buffer
5648
-> Int -- ^ Minimum chunk size in bytes
5749
-> ST s (IndexOrdinaryAcc s) -- ^ Construction of the index
58-
new maxKeyCount minChunkSize = assert (maxKeyCount >= 0) $
59-
IndexOrdinaryAcc <$>
60-
Mutable.unsafeNew maxKeyCount <*>
61-
newPrimVar 0 <*>
62-
createBaler minChunkSize
50+
new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$>
51+
Growing.new initialKeyBufferSize <*>
52+
createBaler minChunkSize
6353

6454
{-|
6555
Appends keys to the key list of an index and outputs newly available chunks
@@ -69,26 +59,18 @@ new maxKeyCount minChunkSize = assert (maxKeyCount >= 0) $
6959
word may result in a corrupted serialised key list.
7060
-}
7161
append :: Append -> IndexOrdinaryAcc s -> ST s (Maybe Chunk)
72-
append instruction (IndexOrdinaryAcc buffer keyCountRef baler)
62+
append instruction (IndexOrdinaryAcc lastKeys baler)
7363
= case instruction of
7464
AppendSinglePage _ key -> do
75-
keyCount <- readPrimVar keyCountRef
76-
Mutable.write buffer keyCount key
77-
writePrimVar keyCountRef (succ keyCount)
65+
Growing.append lastKeys 1 key
7866
feedBaler (keyListElem key) baler
7967
AppendMultiPage key overflowPageCount -> do
80-
keyCount <- readPrimVar keyCountRef
8168
let
8269

8370
pageCount :: Int
8471
!pageCount = succ (fromIntegral overflowPageCount)
8572

86-
keyCount' :: Int
87-
!keyCount' = keyCount + pageCount
88-
89-
mapM_ (flip (Mutable.write buffer) key)
90-
[keyCount .. pred keyCount']
91-
writePrimVar keyCountRef keyCount'
73+
Growing.append lastKeys pageCount key
9274
feedBaler (concat (replicate pageCount (keyListElem key))) baler
9375
where
9476

@@ -112,8 +94,7 @@ append instruction (IndexOrdinaryAcc buffer keyCountRef baler)
11294
@index@ is not used afterwards.
11395
-}
11496
unsafeEnd :: IndexOrdinaryAcc s -> ST s (Maybe Chunk, IndexOrdinary)
115-
unsafeEnd (IndexOrdinaryAcc buffer keyCountRef baler) = do
116-
keyCount <- readPrimVar keyCountRef
117-
keys <- force <$> take keyCount <$> unsafeFreeze buffer
97+
unsafeEnd (IndexOrdinaryAcc lastKeys baler) = do
98+
keys <- Growing.freeze lastKeys
11899
remnant <- unsafeEndBaler baler
119100
return (remnant, IndexOrdinary keys)
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
2+
3+
{- HLINT ignore "Avoid restricted alias" -}
4+
5+
-- | Vectors with support for appending elements.
6+
module Database.LSMTree.Internal.Vector.Growing
7+
(
8+
GrowingVector,
9+
new,
10+
append,
11+
freeze
12+
)
13+
where
14+
15+
import Prelude hiding (init, last, length)
16+
17+
import Control.Monad (when)
18+
import Control.Monad.ST.Strict (ST)
19+
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
20+
writePrimVar)
21+
import Data.STRef.Strict (STRef, newSTRef, readSTRef, writeSTRef)
22+
import Data.Vector (Vector)
23+
import qualified Data.Vector as Mutable (freeze)
24+
import Data.Vector.Mutable (MVector)
25+
import qualified Data.Vector.Mutable as Mutable (grow, length, new, set, slice,
26+
take)
27+
28+
{-|
29+
A vector with support for appending elements.
30+
31+
Internally, the elements of a growing vector are stored in a buffer. This
32+
buffer is automatically enlarged whenever this is needed for storing
33+
additional elements. On each such enlargement, the size of the buffer is
34+
multiplied by a power of 2, whose exponent is chosen just big enough to make
35+
the final buffer size at least as high as the new vector length.
36+
37+
Note that, while buffer sizes and vector lengths are represented as 'Int'
38+
values internally, the above-described buffer enlargement scheme has the
39+
consequence that the largest possible buffer size and thus the largest
40+
possible vector length may not be the maximum 'Int' value. That said, they
41+
are always greater than half the maximum 'Int' value, which should be enough
42+
for all practical purposes.
43+
-}
44+
data GrowingVector s a = GrowingVector
45+
!(STRef s (MVector s a)) -- Reference to the buffer
46+
!(PrimVar s Int) -- Reference to the length
47+
48+
-- | Creates a new, initially empty, vector.
49+
new :: Int -- ^ Initial buffer size
50+
-> ST s (GrowingVector s a) -- ^ Construction of the vector
51+
new illegalInitialBufferSize | illegalInitialBufferSize <= 0
52+
= error "Initial buffer size not positive"
53+
new initialBufferSize
54+
= do
55+
buffer <- Mutable.new initialBufferSize
56+
bufferRef <- newSTRef $! buffer
57+
lengthRef <- newPrimVar 0
58+
return (GrowingVector bufferRef lengthRef)
59+
60+
{-|
61+
Appends a value a certain number of times to a vector. If a negative number
62+
is provided as the count, the vector is not changed.
63+
-}
64+
append :: forall s a . GrowingVector s a -> Int -> a -> ST s ()
65+
append _ pseudoCount _ | pseudoCount <= 0
66+
= return ()
67+
append (GrowingVector bufferRef lengthRef) count val
68+
= do
69+
length <- readPrimVar lengthRef
70+
makeRoom
71+
buffer' <- readSTRef bufferRef
72+
Mutable.set (Mutable.slice length count buffer') val
73+
where
74+
75+
makeRoom :: ST s ()
76+
makeRoom = do
77+
length <- readPrimVar lengthRef
78+
when (count > maxBound - length) (error "New length too large")
79+
buffer <- readSTRef bufferRef
80+
let
81+
82+
bufferSize :: Int
83+
!bufferSize = Mutable.length buffer
84+
85+
length' :: Int
86+
!length' = length + count
87+
88+
when (bufferSize < length') $ do
89+
let
90+
91+
higherBufferSizes :: [Int]
92+
higherBufferSizes = tail (init ++ [last]) where
93+
94+
init :: [Int]
95+
last :: Int
96+
(init, last : _) = span (<= maxBound `div` 2) $
97+
iterate (* 2) bufferSize
98+
{-NOTE:
99+
In order to prevent overflow, we have to start with the
100+
current buffer size here, although we know that it is
101+
not sufficient.
102+
-}
103+
104+
sufficientBufferSizes :: [Int]
105+
sufficientBufferSizes = dropWhile (< length') higherBufferSizes
106+
107+
case sufficientBufferSizes of
108+
[]
109+
-> error "No sufficient buffer size available"
110+
bufferSize' : _
111+
-> Mutable.grow buffer (bufferSize' - bufferSize) >>=
112+
(writeSTRef bufferRef $!)
113+
writePrimVar lengthRef length'
114+
115+
-- | Turns a growing vector into an ordinary vector.
116+
freeze :: GrowingVector s a -> ST s (Vector a)
117+
freeze (GrowingVector bufferRef lengthRef) = do
118+
buffer <- readSTRef bufferRef
119+
length <- readPrimVar lengthRef
120+
Mutable.freeze (Mutable.take length buffer)

test/Test/Database/LSMTree/Internal/IndexOrdinary.hs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,6 @@ serialisedIndex entryCount lastKeys
225225

226226
-- ** Construction via appending
227227

228-
-- Yields the number of keys that an append operation adds to an index.
229-
appendedKeysCount :: Append -> Int
230-
appendedKeysCount (AppendSinglePage _ _)
231-
= 1
232-
appendedKeysCount (AppendMultiPage _ overflowPageCount)
233-
= succ (fromIntegral overflowPageCount)
234-
235228
-- Yields the keys that an append operation adds to an index.
236229
appendedKeys :: Append -> [SerialisedKey]
237230
appendedKeys (AppendSinglePage _ lastKey)
@@ -265,7 +258,7 @@ lastKeysBlockFromAppends appends = lastKeysBlock where
265258
-}
266259
incrementalConstruction :: [Append] -> (IndexOrdinary, Primitive.Vector Word8)
267260
incrementalConstruction appends = runST $ do
268-
acc <- new keyCount minChunkSize
261+
acc <- new initialKeyBufferSize minChunkSize
269262
commonChunks <- mapM (flip append acc) appends
270263
(remnant, unserialised) <- unsafeEnd acc
271264
let
@@ -277,8 +270,12 @@ incrementalConstruction appends = runST $ do
277270
return (unserialised, serialised)
278271
where
279272

280-
keyCount :: Int
281-
keyCount = sum (map appendedKeysCount appends)
273+
{-
274+
We do not need to vary the initial key buffer size, since we are not
275+
testing growing vectors here.
276+
-}
277+
initialKeyBufferSize :: Int
278+
initialKeyBufferSize = 0x100
282279

283280
{-
284281
We do not need to vary the minimum chunk size, since we are not testing

0 commit comments

Comments
 (0)