Skip to content

Commit 2918e32

Browse files
committed
Lift the 8-byte key restriction for the compact index
Now that `topBits64` is fully safe, remove the 8-byte key constraint when using the compact index. Instead, the config option for the index type includes a hint not to use the compact index if their keys are too small, because that will lead to bad performance.
1 parent b0e251b commit 2918e32

File tree

11 files changed

+25
-76
lines changed

11 files changed

+25
-76
lines changed

bench/micro/Bench/Database/LSMTree/Internal/Index.hs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@ import Criterion.Main (Benchmark, Benchmarkable, bench, bgroup, env,
1111
import Data.List (foldl')
1212
-- foldl' is included in the Prelude from base 4.20 onwards
1313
#endif
14-
import Database.LSMTree.Extras.Generators (getKeyForIndexCompact,
15-
mkPages, toAppends)
14+
import Database.LSMTree.Extras.Generators (mkPages, toAppends)
1615
-- also for @Arbitrary@ instantiation of @SerialisedKey@
1716
import Database.LSMTree.Extras.Index (Append, append)
1817
import Database.LSMTree.Internal.Index (Index,
1918
IndexType (Compact, Ordinary), newWithDefaults, search,
2019
unsafeEnd)
21-
import Database.LSMTree.Internal.Serialise
22-
(SerialisedKey (SerialisedKey))
20+
import Database.LSMTree.Internal.Serialise (SerialisedKey)
2321
import Test.QuickCheck (choose, vector)
2422
import Test.QuickCheck.Gen (Gen (MkGen))
2523
import Test.QuickCheck.Random (mkQCGen)
@@ -61,8 +59,7 @@ generated (MkGen exec) = exec (mkQCGen 411) 30
6159
keysForIndexCompact :: Int -- ^ Number of keys
6260
-> [SerialisedKey] -- ^ Constructed keys
6361
keysForIndexCompact = vector >>>
64-
generated >>>
65-
map (getKeyForIndexCompact >>> SerialisedKey)
62+
generated
6663

6764
{-|
6865
Constructs append operations whose serialised keys conform to the key size

doc/format-run.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,9 @@ big-endian.
198198
The compact index type is designed to work with keys that are large
199199
cryptographic hashes, e.g. 32 bytes. In particular it requires:
200200

201-
* keys must be uniformly distributed
202-
* keys must be at least 8 bytes (64bits), but can otherwise be variable length
201+
* keys must be uniformly distributed;
202+
* keys can be of variable length;
203+
* keys less than 8 bytes (64bits) are padded with zeros (in LSB position).
203204

204205
For this important special case, we can do significantly better than storing a
205206
whole key per page: we can typically store just 8 bytes (64bits) per page. This

src-extras/Database/LSMTree/Extras/Generators.hs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ module Database.LSMTree.Extras.Generators (
3333
, genRawBytesSized
3434
, packRawBytesPinnedOrUnpinned
3535
, LargeRawBytes (..)
36-
, isKeyForIndexCompact
37-
, KeyForIndexCompact (..)
3836
, BiasedKey (..)
3937
-- * helpers
4038
, shrinkVec
@@ -510,28 +508,6 @@ instance Arbitrary LargeRawBytes where
510508

511509
deriving newtype instance SerialiseValue LargeRawBytes
512510

513-
-- Serialised keys for the compact index must be at least 8 bytes long.
514-
515-
genKeyForIndexCompact :: Gen RawBytes
516-
genKeyForIndexCompact =
517-
genRawBytesN =<< QC.sized (\s -> QC.chooseInt (8, s + 8))
518-
519-
isKeyForIndexCompact :: RawBytes -> Bool
520-
isKeyForIndexCompact rb = RB.size rb >= 8
521-
522-
newtype KeyForIndexCompact =
523-
KeyForIndexCompact { getKeyForIndexCompact :: RawBytes }
524-
deriving stock (Eq, Ord, Show)
525-
526-
instance Arbitrary KeyForIndexCompact where
527-
arbitrary =
528-
KeyForIndexCompact <$> genKeyForIndexCompact
529-
shrink (KeyForIndexCompact rawBytes) =
530-
[KeyForIndexCompact rawBytes' | rawBytes' <- shrink rawBytes,
531-
isKeyForIndexCompact rawBytes']
532-
533-
deriving newtype instance SerialiseKey KeyForIndexCompact
534-
535511
-- we try to make collisions and close keys more likely (very crudely)
536512
arbitraryBiasedKey :: (RawBytes -> k) -> Gen RawBytes -> Gen k
537513
arbitraryBiasedKey fromRB genUnbiased = fromRB <$> frequency

src/Database/LSMTree.hs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ module Database.LSMTree (
149149
serialiseKeyIdentity,
150150
serialiseKeyIdentityUpToSlicing,
151151
serialiseKeyPreservesOrdering,
152-
serialiseKeyMinimalSize,
153152
serialiseValueIdentity,
154153
serialiseValueIdentityUpToSlicing,
155154
packSlice,
@@ -227,8 +226,7 @@ import Database.LSMTree.Internal.Config
227226
DiskCachePolicy (..), FencePointerIndexType (..),
228227
LevelNo (..), MergeBatchSize (..), MergePolicy (..),
229228
MergeSchedule (..), SizeRatio (..), TableConfig (..),
230-
WriteBufferAlloc (..), defaultTableConfig,
231-
serialiseKeyMinimalSize)
229+
WriteBufferAlloc (..), defaultTableConfig)
232230
import Database.LSMTree.Internal.Config.Override
233231
(TableConfigOverride (..), noTableConfigOverride)
234232
import Database.LSMTree.Internal.Entry (NumEntries (..))

src/Database/LSMTree/Internal/Config.hs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ module Database.LSMTree.Internal.Config (
2020
-- * Fence pointer index
2121
, FencePointerIndexType (..)
2222
, indexTypeForRun
23-
, serialiseKeyMinimalSize
2423
-- * Disk cache policy
2524
, DiskCachePolicy (..)
2625
, diskCachePolicyForLevel
@@ -36,11 +35,9 @@ import Database.LSMTree.Internal.Index (IndexType)
3635
import qualified Database.LSMTree.Internal.Index as Index
3736
(IndexType (Compact, Ordinary))
3837
import qualified Database.LSMTree.Internal.MergingRun as MR
39-
import qualified Database.LSMTree.Internal.RawBytes as RB
4038
import Database.LSMTree.Internal.Run (RunDataCaching (..))
4139
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
4240
import Database.LSMTree.Internal.RunBuilder (RunParams (..))
43-
import Database.LSMTree.Internal.Serialise.Class (SerialiseKey (..))
4441

4542
newtype LevelNo = LevelNo Int
4643
deriving stock (Show, Eq, Ord)
@@ -321,12 +318,11 @@ data FencePointerIndexType =
321318
| {- |
322319
Compact indexes are designed for the case where the keys in the database are uniformly distributed, e.g., when the keys are hashes.
323320
324-
When using a compact index, the 'Database.LSMTree.Internal.Serialise.Class.serialiseKey' function must satisfy the following additional law:
321+
When using a compact index, some requirements apply to serialised keys:
325322
326-
[Minimal size]
327-
@'Database.LSMTree.Internal.RawBytes.size' ('Database.LSMTree.Internal.Serialise.Class.serialiseKey' x) >= 8@
328-
329-
Use 'serialiseKeyMinimalSize' to test this law.
323+
* keys must be uniformly distributed;
324+
* keys can be of variable length;
325+
* keys less than 8 bytes (64bits) are padded with zeros (in LSB position).
330326
-}
331327
CompactIndex
332328
deriving stock (Eq, Show)
@@ -339,10 +335,6 @@ indexTypeForRun :: FencePointerIndexType -> IndexType
339335
indexTypeForRun CompactIndex = Index.Compact
340336
indexTypeForRun OrdinaryIndex = Index.Ordinary
341337

342-
-- | Test the __Minimal size__ law for the 'CompactIndex' option.
343-
serialiseKeyMinimalSize :: SerialiseKey k => k -> Bool
344-
serialiseKeyMinimalSize x = RB.size (serialiseKey x) >= 8
345-
346338
{-------------------------------------------------------------------------------
347339
Disk cache policy
348340
-------------------------------------------------------------------------------}

src/Database/LSMTree/Simple.hs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ module Database.LSMTree.Simple (
134134
serialiseKeyIdentity,
135135
serialiseKeyIdentityUpToSlicing,
136136
serialiseKeyPreservesOrdering,
137-
serialiseKeyMinimalSize,
138137
serialiseValueIdentity,
139138
serialiseValueIdentityUpToSlicing,
140139
packSlice,
@@ -182,9 +181,8 @@ import Database.LSMTree (BloomFilterAlloc, CursorClosedError (..),
182181
UnionCredits (..), UnionDebt (..), WriteBufferAlloc,
183182
isValidSnapshotName, noTableConfigOverride, packSlice,
184183
serialiseKeyIdentity, serialiseKeyIdentityUpToSlicing,
185-
serialiseKeyMinimalSize, serialiseKeyPreservesOrdering,
186-
serialiseValueIdentity, serialiseValueIdentityUpToSlicing,
187-
toSnapshotName)
184+
serialiseKeyPreservesOrdering, serialiseValueIdentity,
185+
serialiseValueIdentityUpToSlicing, toSnapshotName)
188186
import qualified Database.LSMTree as LSMT
189187
import qualified Database.LSMTree.Internal.Types as LSMT
190188
import qualified Database.LSMTree.Internal.Unsafe as Internal

test/Test/Database/LSMTree/Generators.hs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,6 @@ tests = testGroup "Test.Database.LSMTree.Generators" [
6464
prop_arbitraryAndShrinkPreserveInvariant
6565
(\(LargeRawBytes rb) -> labelRawBytes rb)
6666
(deepseqInvariant @LargeRawBytes)
67-
, testGroup "KeyForIndexCompact" $
68-
prop_arbitraryAndShrinkPreserveInvariant noTags $
69-
isKeyForIndexCompact . getKeyForIndexCompact
7067
, testGroup "BiasedKey" $
7168
prop_arbitraryAndShrinkPreserveInvariant
7269
(labelTestKOps @BiasedKey)

test/Test/Database/LSMTree/Internal/Index/Compact.hs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import Data.Word
2828
import Database.LSMTree.Extras
2929
import Database.LSMTree.Extras.Generators (ChunkSize (..),
3030
LogicalPageSummaries, LogicalPageSummary (..), Pages (..),
31-
genRawBytes, isKeyForIndexCompact, labelPages, toAppends)
31+
genRawBytes, labelPages, toAppends)
3232
import Database.LSMTree.Extras.Index (Append (..), appendToCompact)
3333
import Database.LSMTree.Internal.BitMath
3434
import Database.LSMTree.Internal.Chunk as Chunk (toByteString)
@@ -54,9 +54,7 @@ import Text.Printf (printf)
5454

5555
tests :: TestTree
5656
tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [
57-
testGroup "TestKey" $
58-
prop_arbitraryAndShrinkPreserveInvariant @TestKey noTags isTestKey
59-
, testProperty "prop_distribution @TestKey" $
57+
testProperty "prop_distribution @TestKey" $
6058
prop_distribution @TestKey
6159
, testProperty "prop_searchMinMaxKeysAfterConstruction" $
6260
prop_searchMinMaxKeysAfterConstruction @TestKey 100
@@ -173,15 +171,12 @@ instance Arbitrary TestKey where
173171
-- Shrink keys extensively: most failures will occur in small counterexamples,
174172
-- so we don't have to limit the number of shrinks as much.
175173
shrink (TestKey bytes) = [
176-
TestKey bytes'
174+
testkey'
177175
| let RawBytes vec = bytes
178176
, vec' <- VP.fromList <$> shrink (VP.toList vec)
179-
, let bytes' = RawBytes vec'
180-
, isKeyForIndexCompact bytes'
177+
, let testkey' = TestKey $ RawBytes vec'
181178
]
182179

183-
isTestKey :: TestKey -> Bool
184-
isTestKey (TestKey bytes) = isKeyForIndexCompact bytes
185180

186181
{-------------------------------------------------------------------------------
187182
Properties

test/Test/Database/LSMTree/Internal/Lookup.hs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ import qualified Database.LSMTree.Internal.Run as Run
6060
import Database.LSMTree.Internal.RunAcc as Run
6161
import Database.LSMTree.Internal.RunBuilder
6262
(RunDataCaching (CacheRunData), RunParams (RunParams))
63-
import Database.LSMTree.Internal.Serialise
63+
import Database.LSMTree.Internal.Serialise as Serialise
6464
import Database.LSMTree.Internal.Serialise.Class
6565
import Database.LSMTree.Internal.UniqCounter
6666
import qualified Database.LSMTree.Internal.WriteBuffer as WB
@@ -569,14 +569,10 @@ liftShrink3InMemLookupData shrinkKey shrinkValue shrinkBlob InMemLookupData{ run
569569
shrinkEntry = liftShrink2 shrinkValue shrinkBlob
570570

571571
genSerialisedKey :: Gen SerialisedKey
572-
genSerialisedKey = frequency [
573-
(9, arbitrary `suchThat` (\k -> sizeofKey k >= 8))
574-
, (1, do x <- getSmall <$> arbitrary
575-
pure $ SerialisedKey (RB.pack [0,0,0,0,0,0,0, x]))
576-
]
572+
genSerialisedKey = Serialise.serialiseKey <$> arbitraryBoundedIntegral @Word64
577573

578574
shrinkSerialisedKey :: SerialisedKey -> [SerialisedKey]
579-
shrinkSerialisedKey k = [k' | k' <- shrink k, sizeofKey k' >= 8]
575+
shrinkSerialisedKey k = Serialise.serialiseKey <$> shrink (Serialise.deserialiseKey k :: Word64)
580576

581577
genSerialisedValue :: Gen SerialisedValue
582578
genSerialisedValue = frequency [ (50, arbitrary), (1, genLongValue) ]

test/Test/Database/LSMTree/Internal/RunAcc.hs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,7 @@ fromProtoValue (Proto.Value bs) = SerialisedValue . RB.fromShortByteString $ SBS
143143
fromProtoBlobRef :: Proto.BlobRef -> BlobSpan
144144
fromProtoBlobRef (Proto.BlobRef x y) = BlobSpan x y
145145

146-
-- | Wrapper around 'PageLogical' that generates nearly-full pages, and
147-
-- keys that are always large enough (>= 8 bytes) for the compact index.
146+
-- | Wrapper around 'PageLogical' that generates nearly-full pages.
148147
newtype PageLogical' = PageLogical' { getPrototypeKOps :: [(Proto.Key, Proto.Operation)] }
149148
deriving stock Show
150149

@@ -153,7 +152,7 @@ getRealKOps = fmap fromProtoKOp . getPrototypeKOps
153152

154153
instance Arbitrary PageLogical' where
155154
arbitrary = PageLogical' <$>
156-
Proto.genPageContentFits Proto.DiskPage4k (Proto.MinKeySize 8)
155+
Proto.genPageContentFits Proto.DiskPage4k Proto.noMinKeySize
157156
shrink (PageLogical' page) =
158157
[ PageLogical' page' | page' <- shrink page ]
159158

0 commit comments

Comments
 (0)