Skip to content

Commit 3b3add1

Browse files
authored
Merge pull request #336 from IntersectMBO/jeltsch/general-purpose-chunks/tests
Add tests for general-purpose chunk handling
2 parents 5fc92eb + ad562cb commit 3b3add1

File tree

3 files changed

+176
-0
lines changed

3 files changed

+176
-0
lines changed

lsm-tree.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ test-suite lsm-tree-test
295295
Test.Database.LSMTree.Generators
296296
Test.Database.LSMTree.Internal
297297
Test.Database.LSMTree.Internal.BloomFilter
298+
Test.Database.LSMTree.Internal.Chunk
298299
Test.Database.LSMTree.Internal.CRC32C
299300
Test.Database.LSMTree.Internal.Entry
300301
Test.Database.LSMTree.Internal.IndexCompact

test/Main.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import qualified Test.Database.LSMTree.Class.Normal
99
import qualified Test.Database.LSMTree.Generators
1010
import qualified Test.Database.LSMTree.Internal
1111
import qualified Test.Database.LSMTree.Internal.BloomFilter
12+
import qualified Test.Database.LSMTree.Internal.Chunk
1213
import qualified Test.Database.LSMTree.Internal.CRC32C
1314
import qualified Test.Database.LSMTree.Internal.Entry
1415
import qualified Test.Database.LSMTree.Internal.IndexCompact
@@ -41,6 +42,7 @@ main = defaultMain $ testGroup "lsm-tree"
4142
, Test.Database.LSMTree.Generators.tests
4243
, Test.Database.LSMTree.Internal.tests
4344
, Test.Database.LSMTree.Internal.BloomFilter.tests
45+
, Test.Database.LSMTree.Internal.Chunk.tests
4446
, Test.Database.LSMTree.Internal.CRC32C.tests
4547
, Test.Database.LSMTree.Internal.Entry.tests
4648
, Test.Database.LSMTree.Internal.Lookup.tests
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
{-# OPTIONS_GHC -Wno-orphans #-}
2+
3+
module Test.Database.LSMTree.Internal.Chunk (tests) where
4+
5+
import Prelude hiding (concat, drop, length)
6+
7+
import Control.Arrow ((>>>))
8+
import Control.Monad.ST.Strict (runST)
9+
import qualified Data.List as List (concat, drop, length)
10+
import Data.Maybe (catMaybes, fromJust, isJust, isNothing)
11+
import Data.Vector.Primitive (Vector, concat, fromList, length,
12+
toList)
13+
import Data.Word (Word8)
14+
import Database.LSMTree.Internal.Chunk (Chunk, createBaler, feedBaler,
15+
fromChunk, unsafeEndBaler)
16+
import Test.QuickCheck (Arbitrary (arbitrary, shrink),
17+
NonEmptyList (NonEmpty), Positive (Positive, getPositive),
18+
Property, Small (Small, getSmall), Testable, scale,
19+
shrinkMap, tabulate, (===), (==>))
20+
import Test.Tasty (TestTree, testGroup)
21+
import Test.Tasty.QuickCheck (testProperty)
22+
23+
-- * Tests
24+
25+
tests :: TestTree
26+
tests = testGroup "Test.Database.LSMTree.Internal.Chunk" $
27+
[
28+
testProperty "Content is preserved"
29+
prop_contentIsPreserved,
30+
testProperty "No remnant after output"
31+
prop_noRemnantAfterOutput,
32+
testProperty "Common chunks are large"
33+
prop_commonChunksAreLarge,
34+
testProperty "Remnant chunk is non-empty"
35+
prop_remnantChunkIsNonEmpty,
36+
testProperty "Remnant chunk is small"
37+
prop_remnantChunkIsSmall
38+
]
39+
40+
-- * Properties to test
41+
42+
{-
43+
Feeds a freshly created baler a sequence of data portions and ends it
44+
afterwards, yielding all output.
45+
-}
46+
balingOutput :: Int -- Minimum chunk size
47+
-> [[Vector Word8]] -- Data portions to be fed
48+
-> ([Maybe Chunk], Maybe Chunk) -- Feeding output and remnant
49+
balingOutput minChunkSize food = runST $ do
50+
baler <- createBaler minChunkSize
51+
commonChunks <- mapM (flip feedBaler baler) food
52+
remnant <- unsafeEndBaler baler
53+
return (commonChunks, remnant)
54+
55+
{-
56+
Supplies the output of a complete baler run for constructing a property.
57+
58+
The resulting property additionally provides statistics about the lengths of
59+
buildup phases, where a buildup phase is a sequence of feedings that does
60+
not result in chunks and is followed by an ultimate chunk production, which
61+
happens either due to another feeding or due to the baler run ending and
62+
producing a remnant chunk.
63+
-}
64+
withBalingOutput
65+
:: Testable prop
66+
=> Int -- Minimum chunk size
67+
-> [[Vector Word8]] -- Data portions to be fed
68+
-> ([Maybe Chunk] -> Maybe Chunk -> prop) -- Property from baler output
69+
-> Property -- Resulting property
70+
withBalingOutput minChunkSize food consumer
71+
= tabulate "Lengths of buildup phases"
72+
(map show (buildupPhasesLengths commonChunks))
73+
(consumer commonChunks remnant)
74+
where
75+
76+
commonChunks :: [Maybe Chunk]
77+
remnant :: Maybe Chunk
78+
(commonChunks, remnant) = balingOutput minChunkSize food
79+
80+
buildupPhasesLengths :: [Maybe Chunk] -> [Int]
81+
buildupPhasesLengths [] = []
82+
buildupPhasesLengths chunks = List.length buildupOutput :
83+
buildupPhasesLengths (List.drop 1 followUp)
84+
where
85+
86+
buildupOutput, followUp :: [Maybe Chunk]
87+
(buildupOutput, followUp) = span isNothing chunks
88+
89+
prop_contentIsPreserved :: MinChunkSize -> [[Vector Word8]] -> Property
90+
prop_contentIsPreserved (MinChunkSize minChunkSize) food
91+
= withBalingOutput minChunkSize food $ \ commonChunks remnant ->
92+
let
93+
94+
input :: Vector Word8
95+
input = concat (List.concat food)
96+
97+
output :: Vector Word8
98+
output = concat (fromChunk <$> catMaybes (commonChunks ++ [remnant]))
99+
100+
in input === output
101+
102+
prop_noRemnantAfterOutput :: MinChunkSize
103+
-> NonEmptyList [Vector Word8]
104+
-> Property
105+
prop_noRemnantAfterOutput (MinChunkSize minChunkSize) (NonEmpty food)
106+
= withBalingOutput minChunkSize food $ \ commonChunks remnant ->
107+
isJust (last commonChunks) ==> isNothing remnant
108+
109+
prop_commonChunksAreLarge :: MinChunkSize -> [[Vector Word8]] -> Property
110+
prop_commonChunksAreLarge (MinChunkSize minChunkSize) food
111+
= withBalingOutput minChunkSize food $ \ commonChunks _ ->
112+
all (fromChunk >>> length >>> (>= minChunkSize)) (catMaybes commonChunks)
113+
114+
remnantChunkSizeIs :: (Int -> Bool) -> Int -> [[Vector Word8]] -> Property
115+
remnantChunkSizeIs constraint minChunkSize food
116+
= withBalingOutput minChunkSize food $ \ _ remnant ->
117+
isJust remnant ==> constraint (length (fromChunk (fromJust remnant)))
118+
119+
prop_remnantChunkIsNonEmpty :: MinChunkSize -> [[Vector Word8]] -> Property
120+
prop_remnantChunkIsNonEmpty (MinChunkSize minChunkSize)
121+
= remnantChunkSizeIs (> 0) minChunkSize
122+
123+
prop_remnantChunkIsSmall :: MinChunkSize -> [[Vector Word8]] -> Property
124+
prop_remnantChunkIsSmall (MinChunkSize minChunkSize)
125+
= remnantChunkSizeIs (< minChunkSize) minChunkSize
126+
127+
-- * Test case generation and shrinking
128+
129+
instance Arbitrary (Vector Word8) where
130+
131+
arbitrary = fromList <$> arbitrary
132+
133+
shrink = shrinkMap fromList toList
134+
135+
{-
136+
The type of minimum chunk sizes.
137+
138+
This type is isomorphic to 'Int' but has a different way of generating test
139+
cases. Only small, positive integers are generated, and they are generated
140+
using \(2 \cdot s^{2}\) as the size parameter, where \(s\) refers to the
141+
original size parameter.
142+
143+
The reasons for the modification of the size parameter in the
144+
above-mentioned way are somewhat subtle.
145+
146+
First, we want the ratio between the average minimum chunk size and the
147+
average size of a data portion that we feed to a baler to be independent of
148+
the size parameter. Each data portion is a list of primitive vectors of
149+
bytes, and arbitrarily generated lists and byte vectors have lengths that
150+
are small, positive integers. Such integers are \(s/2\) on average. As a
151+
result, the average size of data fed to a baler is \(s^{2}/4\). By
152+
generating minimum chunk sizes with \(a \cdot s^{2}\) as the size parameter
153+
for some constant \(a\), the average minimum chunk size is \(a/2 \cdot
154+
s^{2}\) and therefore $2a$ times the average size of a data portion fed,
155+
independently of \(s\).
156+
157+
Second, we want prompt chunk generation as well as chunk generation after
158+
only two or more feedings to occur reasonably often. To achieve this to some
159+
degree, we can tune the parameter \(a\). It appears that \(a\) being \(2\)
160+
leads to reasonable results.
161+
-}
162+
newtype MinChunkSize = MinChunkSize Int deriving stock Show
163+
164+
fromMinChunkSize :: MinChunkSize -> Int
165+
fromMinChunkSize (MinChunkSize minChunkSize) = minChunkSize
166+
167+
instance Arbitrary MinChunkSize where
168+
169+
arbitrary = scale (\ size -> 2 * size ^ (2 :: Int)) $
170+
MinChunkSize <$> getSmall <$> getPositive <$> arbitrary
171+
172+
shrink = shrinkMap (MinChunkSize . getSmall . getPositive)
173+
(Positive . Small . fromMinChunkSize)

0 commit comments

Comments
 (0)