Skip to content

Commit 0dd182f

Browse files
mheinzeljorisdral
andcommitted
prototype: carefully bound size of arbitrary trees
Previously, occasionally a tree would get very large, leading to test timeouts. Now, the number of runs in a tree (as well as the number of tree constructors!) is bounded by the size parameter. Also tests that generator and shrinker respect the invariants and prints information about the distribution of generated values. Co-authored-by: Joris Dral <[email protected]>
1 parent 50f2ae8 commit 0dd182f

File tree

1 file changed

+137
-21
lines changed

1 file changed

+137
-21
lines changed

prototypes/ScheduledMergesTest.hs

Lines changed: 137 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import Control.Monad (replicateM_, when)
55
import Control.Monad.ST
66
import Control.Tracer (Tracer (Tracer))
77
import qualified Control.Tracer as Tracer
8-
import Data.Foldable (traverse_)
9-
import qualified Data.Map as Map
8+
import Data.Foldable (find, traverse_)
9+
import Data.Maybe (fromJust)
1010
import Data.STRef
11+
import Text.Printf (printf)
1112

1213
import ScheduledMerges as LSM
1314

@@ -16,13 +17,20 @@ import Test.QuickCheck (Arbitrary (arbitrary, shrink), Property)
1617
import Test.QuickCheck.Exception (isDiscard)
1718
import Test.Tasty
1819
import Test.Tasty.HUnit (HasCallStack, testCase)
19-
import Test.Tasty.QuickCheck (testProperty, (=/=), (===))
20+
import Test.Tasty.QuickCheck (QuickCheckMaxSize (..),
21+
QuickCheckTests (..), testProperty, (=/=), (===))
2022

2123
tests :: TestTree
2224
tests = testGroup "Unit and property tests"
2325
[ testCase "test_regression_empty_run" test_regression_empty_run
2426
, testCase "test_merge_again_with_incoming" test_merge_again_with_incoming
2527
, testProperty "prop_union" prop_union
28+
, testGroup "T"
29+
[ localOption (QuickCheckTests 1000) $ -- super quick, run more
30+
testProperty "Arbitrary satisfies invariant" prop_arbitrarySatisfiesInvariant
31+
, localOption (QuickCheckMaxSize 60) $ -- many shrinks for huge trees
32+
testProperty "Shrinking satisfies invariant" prop_shrinkSatisfiesInvariant
33+
]
2634
, testProperty "prop_MergingTree" prop_MergingTree
2735
]
2836

@@ -237,7 +245,7 @@ data T = TCompleted Run
237245
deriving stock Show
238246

239247
-- simplified non-ST version of PreExistingRun
240-
data P = PRun Run
248+
data P = PRun Run
241249
| PMergingRun (M LevelMergeType)
242250
deriving stock Show
243251

@@ -253,6 +261,41 @@ data M t = MCompleted t MergeDebt Run
253261
newtype NonEmptyRun = NonEmptyRun { getNonEmptyRun :: Run }
254262
deriving stock Show
255263

264+
invariantT :: T -> Either String ()
265+
invariantT t = runST $ do
266+
tree <- fromT t
267+
evalInvariant (treeInvariant tree)
268+
269+
-- | Size is the number of T and P constructors.
270+
sizeT :: T -> Int
271+
sizeT (TCompleted _) = 1
272+
sizeT (TOngoing _) = 1
273+
sizeT (TPendingLevel ps mt) = sum (fmap sizeP ps) + maybe 0 sizeT mt
274+
sizeT (TPendingUnion ts) = sum (fmap sizeT ts)
275+
276+
sizeP :: P -> Int
277+
sizeP (PRun _) = 1
278+
sizeP (PMergingRun _) = 1
279+
280+
-- | Depth is the longest path through the tree from the root to a leaf using T
281+
-- and P constructors.
282+
depthT :: T -> Int
283+
depthT (TCompleted _) = 0
284+
depthT (TOngoing _) = 0
285+
depthT (TPendingLevel ps mt) =
286+
let depthPs = case ps of
287+
[] -> 0
288+
_ -> maximum (fmap depthP ps)
289+
depthMt = maybe 0 depthT mt
290+
in 1 + max depthPs depthMt
291+
depthT (TPendingUnion ts) = case ts of
292+
[] -> 0
293+
_ -> 1 + maximum (fmap depthT ts)
294+
295+
depthP :: P -> Int
296+
depthP (PRun _) = 0
297+
depthP (PMergingRun _) = 0
298+
256299
fromT :: T -> ST s (MergingTree s)
257300
fromT t = do
258301
state <- case t of
@@ -293,21 +336,55 @@ completeM :: IsMergeType t => M t -> Run
293336
completeM (MCompleted _ _ r) = r
294337
completeM (MOngoing mt _ _ rs) = mergek mt (map getNonEmptyRun rs)
295338

339+
-------------------------------------------------------------------------------
340+
-- Generators
341+
--
342+
296343
instance Arbitrary T where
297-
arbitrary = QC.frequency
298-
[ (1, TCompleted <$> arbitrary)
299-
, (1, TOngoing <$> arbitrary)
300-
, (1, do
301-
(incoming, tree) <- arbitrary
302-
`QC.suchThat` (\(i, t) -> length i + length t > 0)
303-
return (TPendingLevel incoming tree))
304-
, (1, do
305-
n <- QC.frequency
306-
[ (3, pure 2)
307-
, (1, QC.chooseInt (3, 8))
344+
arbitrary = QC.sized $ \s -> do
345+
n <- QC.chooseInt (1, max 1 s)
346+
go n
347+
where
348+
-- n is the number of constructors of T and P
349+
go n | n < 1 = error ("arbitrary T: n == " <> show n)
350+
go n | n == 1 =
351+
QC.frequency
352+
[ (1, TCompleted <$> arbitrary)
353+
, (1, TOngoing <$> arbitrary)
354+
]
355+
go n =
356+
QC.frequency
357+
[ (1, do
358+
-- pending level merge without child
359+
preExisting <- QC.vector (n - 1) -- 1 for constructor itself
360+
return (TPendingLevel preExisting Nothing))
361+
, (1, do
362+
-- pending level merge with child
363+
numPreExisting <- QC.chooseInt (0, min 20 (n - 2))
364+
preExisting <- QC.vector numPreExisting
365+
tree <- go (n - numPreExisting - 1)
366+
return (TPendingLevel preExisting (Just tree)))
367+
, (2, do
368+
-- pending union merge
369+
ns <- QC.shuffle =<< arbitraryPartition2 n
370+
TPendingUnion <$> traverse go ns)
308371
]
309-
TPendingUnion <$> QC.vectorOf n (QC.scale (`div` n) arbitrary))
310-
]
372+
373+
-- Split into at least two smaller positive numbers. The input needs to be
374+
-- greater than or equal to 2.
375+
arbitraryPartition2 :: Int -> QC.Gen [Int]
376+
arbitraryPartition2 n = assert (n >= 2) $ do
377+
first <- QC.chooseInt (1, n-1)
378+
(first :) <$> arbitraryPartition (n - first)
379+
380+
-- Split into smaller positive numbers.
381+
arbitraryPartition :: Int -> QC.Gen [Int]
382+
arbitraryPartition n
383+
| n < 1 = return []
384+
| n == 1 = return [1]
385+
| otherwise = do
386+
first <- QC.chooseInt (1, n)
387+
(first :) <$> arbitraryPartition (n - first)
311388

312389
shrink (TCompleted r) =
313390
[ TCompleted r'
@@ -410,12 +487,36 @@ shrinkMergeCreditForRuns rs' MergeCredit {spentCredits, unspentCredits} =
410487
]
411488

412489
instance Arbitrary NonEmptyRun where
413-
arbitrary = do
414-
s <- QC.getSize
415-
n <- QC.chooseInt (1, min s 40 + 1)
416-
NonEmptyRun . Map.fromList <$> QC.vector n
490+
arbitrary = NonEmptyRun <$> (arbitrary `QC.suchThat` (not . null))
417491
shrink (NonEmptyRun r) = [NonEmptyRun r' | r' <- shrink r, not (null r')]
418492

493+
prop_arbitrarySatisfiesInvariant :: T -> Property
494+
prop_arbitrarySatisfiesInvariant t =
495+
QC.tabulate "Tree size" [showPowersOf 2 $ sizeT t] $
496+
QC.tabulate "Tree depth" [showPowersOf 2 $ depthT t] $
497+
Right () === invariantT t
498+
499+
prop_shrinkSatisfiesInvariant :: T -> Property
500+
prop_shrinkSatisfiesInvariant t =
501+
QC.forAll (genShrinkTrace 4 t) $ \trace ->
502+
QC.tabulate "Trace length" [showPowersOf 2 $ length trace] $
503+
QC.conjoin $ flip map trace $ \(numAlternatives, t') ->
504+
QC.tabulate "Shrink alternatives" [showPowersOf 2 numAlternatives] $
505+
Right () === invariantT t'
506+
507+
-- | Iterative shrinks, and how many alternatives were possible at each point.
508+
genShrinkTrace :: Arbitrary a => Int -> a -> QC.Gen [(Int, a)]
509+
genShrinkTrace !n x
510+
| n <= 0 = return []
511+
| otherwise =
512+
case shrink x of
513+
[] -> return []
514+
xs -> do
515+
-- like QC.elements, but we want access to the length
516+
let len = length xs
517+
x' <- (xs !!) <$> QC.chooseInt (0, len - 1)
518+
((len, x') :) <$> genShrinkTrace (n - 1) x'
519+
419520
-------------------------------------------------------------------------------
420521
-- tracing and expectations on LSM shape
421522
--
@@ -458,3 +559,18 @@ hasUnionWith p rep = do
458559
case shape of
459560
Nothing -> False
460561
Just t -> p t
562+
563+
-------------------------------------------------------------------------------
564+
-- Printing utils
565+
--
566+
567+
-- | Copied from @lsm-tree:extras.Database.LSMTree.Extras@
568+
showPowersOf :: Int -> Int -> String
569+
showPowersOf factor n
570+
| factor <= 1 = error "showPowersOf: factor must be larger than 1"
571+
| n < 0 = "n < 0"
572+
| n == 0 = "n == 0"
573+
| otherwise = printf "%d <= n < %d" lb ub
574+
where
575+
ub = fromJust (find (n <) (iterate (* factor) factor))
576+
lb = ub `div` factor

0 commit comments

Comments
 (0)