|
| 1 | +{-# OPTIONS_GHC -Wno-orphans #-} |
| 2 | + |
| 3 | +module Test.Database.LSMTree.Internal.Chunk (tests) where |
| 4 | + |
| 5 | +import Prelude hiding (concat, drop, length) |
| 6 | + |
| 7 | +import Control.Arrow ((>>>)) |
| 8 | +import Control.Monad.ST.Strict (runST) |
| 9 | +import qualified Data.List as List (concat, drop, length) |
| 10 | +import Data.Maybe (catMaybes, fromJust, isJust, isNothing) |
| 11 | +import Data.Vector.Primitive (Vector, concat, fromList, length, |
| 12 | + toList) |
| 13 | +import Data.Word (Word8) |
| 14 | +import Database.LSMTree.Internal.Chunk (Chunk, createBaler, feedBaler, |
| 15 | + fromChunk, unsafeEndBaler) |
| 16 | +import Test.QuickCheck (Arbitrary (arbitrary, shrink), |
| 17 | + NonEmptyList (NonEmpty), Positive (Positive, getPositive), |
| 18 | + Property, Small (Small, getSmall), Testable, scale, |
| 19 | + shrinkMap, tabulate, (===), (==>)) |
| 20 | +import Test.Tasty (TestTree, testGroup) |
| 21 | +import Test.Tasty.QuickCheck (testProperty) |
| 22 | + |
| 23 | +-- * Tests |
| 24 | + |
| 25 | +tests :: TestTree |
| 26 | +tests = testGroup "Test.Database.LSMTree.Internal.Chunk" $ |
| 27 | + [ |
| 28 | + testProperty "Content is preserved" |
| 29 | + prop_contentIsPreserved, |
| 30 | + testProperty "No remnant after output" |
| 31 | + prop_noRemnantAfterOutput, |
| 32 | + testProperty "Common chunks are large" |
| 33 | + prop_commonChunksAreLarge, |
| 34 | + testProperty "Remnant chunk is non-empty" |
| 35 | + prop_remnantChunkIsNonEmpty, |
| 36 | + testProperty "Remnant chunk is small" |
| 37 | + prop_remnantChunkIsSmall |
| 38 | + ] |
| 39 | + |
| 40 | +-- * Properties to test |
| 41 | + |
| 42 | +{- |
| 43 | + Feeds a freshly created baler a sequence of data portions and ends it |
| 44 | + afterwards, yielding all output. |
| 45 | +-} |
| 46 | +balingOutput :: Int -- Minimum chunk size |
| 47 | + -> [[Vector Word8]] -- Data portions to be fed |
| 48 | + -> ([Maybe Chunk], Maybe Chunk) -- Feeding output and remnant |
| 49 | +balingOutput minChunkSize food = runST $ do |
| 50 | + baler <- createBaler minChunkSize |
| 51 | + commonChunks <- mapM (flip feedBaler baler) food |
| 52 | + remnant <- unsafeEndBaler baler |
| 53 | + return (commonChunks, remnant) |
| 54 | + |
| 55 | +{- |
| 56 | + Supplies the output of a complete baler run for constructing a property. |
| 57 | +
|
| 58 | + The resulting property additionally provides statistics about the lengths of |
| 59 | + buildup phases, where a buildup phase is a sequence of feedings that does |
| 60 | + not result in chunks and is followed by an ultimate chunk production, which |
| 61 | + happens either due to another feeding or due to the baler run ending and |
| 62 | + producing a remnant chunk. |
| 63 | +-} |
| 64 | +withBalingOutput |
| 65 | + :: Testable prop |
| 66 | + => Int -- Minimum chunk size |
| 67 | + -> [[Vector Word8]] -- Data portions to be fed |
| 68 | + -> ([Maybe Chunk] -> Maybe Chunk -> prop) -- Property from baler output |
| 69 | + -> Property -- Resulting property |
| 70 | +withBalingOutput minChunkSize food consumer |
| 71 | + = tabulate "Lengths of buildup phases" |
| 72 | + (map show (buildupPhasesLengths commonChunks)) |
| 73 | + (consumer commonChunks remnant) |
| 74 | + where |
| 75 | + |
| 76 | + commonChunks :: [Maybe Chunk] |
| 77 | + remnant :: Maybe Chunk |
| 78 | + (commonChunks, remnant) = balingOutput minChunkSize food |
| 79 | + |
| 80 | + buildupPhasesLengths :: [Maybe Chunk] -> [Int] |
| 81 | + buildupPhasesLengths [] = [] |
| 82 | + buildupPhasesLengths chunks = List.length buildupOutput : |
| 83 | + buildupPhasesLengths (List.drop 1 followUp) |
| 84 | + where |
| 85 | + |
| 86 | + buildupOutput, followUp :: [Maybe Chunk] |
| 87 | + (buildupOutput, followUp) = span isNothing chunks |
| 88 | + |
| 89 | +prop_contentIsPreserved :: MinChunkSize -> [[Vector Word8]] -> Property |
| 90 | +prop_contentIsPreserved (MinChunkSize minChunkSize) food |
| 91 | + = withBalingOutput minChunkSize food $ \ commonChunks remnant -> |
| 92 | + let |
| 93 | + |
| 94 | + input :: Vector Word8 |
| 95 | + input = concat (List.concat food) |
| 96 | + |
| 97 | + output :: Vector Word8 |
| 98 | + output = concat (fromChunk <$> catMaybes (commonChunks ++ [remnant])) |
| 99 | + |
| 100 | + in input === output |
| 101 | + |
| 102 | +prop_noRemnantAfterOutput :: MinChunkSize |
| 103 | + -> NonEmptyList [Vector Word8] |
| 104 | + -> Property |
| 105 | +prop_noRemnantAfterOutput (MinChunkSize minChunkSize) (NonEmpty food) |
| 106 | + = withBalingOutput minChunkSize food $ \ commonChunks remnant -> |
| 107 | + isJust (last commonChunks) ==> isNothing remnant |
| 108 | + |
| 109 | +prop_commonChunksAreLarge :: MinChunkSize -> [[Vector Word8]] -> Property |
| 110 | +prop_commonChunksAreLarge (MinChunkSize minChunkSize) food |
| 111 | + = withBalingOutput minChunkSize food $ \ commonChunks _ -> |
| 112 | + all (fromChunk >>> length >>> (>= minChunkSize)) (catMaybes commonChunks) |
| 113 | + |
| 114 | +remnantChunkSizeIs :: (Int -> Bool) -> Int -> [[Vector Word8]] -> Property |
| 115 | +remnantChunkSizeIs constraint minChunkSize food |
| 116 | + = withBalingOutput minChunkSize food $ \ _ remnant -> |
| 117 | + isJust remnant ==> constraint (length (fromChunk (fromJust remnant))) |
| 118 | + |
| 119 | +prop_remnantChunkIsNonEmpty :: MinChunkSize -> [[Vector Word8]] -> Property |
| 120 | +prop_remnantChunkIsNonEmpty (MinChunkSize minChunkSize) |
| 121 | + = remnantChunkSizeIs (> 0) minChunkSize |
| 122 | + |
| 123 | +prop_remnantChunkIsSmall :: MinChunkSize -> [[Vector Word8]] -> Property |
| 124 | +prop_remnantChunkIsSmall (MinChunkSize minChunkSize) |
| 125 | + = remnantChunkSizeIs (< minChunkSize) minChunkSize |
| 126 | + |
| 127 | +-- * Test case generation and shrinking |
| 128 | + |
| 129 | +instance Arbitrary (Vector Word8) where |
| 130 | + |
| 131 | + arbitrary = fromList <$> arbitrary |
| 132 | + |
| 133 | + shrink = shrinkMap fromList toList |
| 134 | + |
| 135 | +{- |
| 136 | + The type of minimum chunk sizes. |
| 137 | +
|
| 138 | + This type is isomorphic to 'Int' but has a different way of generating test |
| 139 | + cases. Only small, positive integers are generated, and they are generated |
| 140 | + using \(2 \cdot s^{2}\) as the size parameter, where \(s\) refers to the |
| 141 | + original size parameter. |
| 142 | +
|
| 143 | + The reasons for the modification of the size parameter in the |
| 144 | + above-mentioned way are somewhat subtle. |
| 145 | +
|
| 146 | + First, we want the ratio between the average minimum chunk size and the |
| 147 | + average size of a data portion that we feed to a baler to be independent of |
| 148 | + the size parameter. Each data portion is a list of primitive vectors of |
| 149 | + bytes, and arbitrarily generated lists and byte vectors have lengths that |
| 150 | + are small, positive integers. Such integers are \(s/2\) on average. As a |
| 151 | + result, the average size of data fed to a baler is \(s^{2}/4\). By |
| 152 | + generating minimum chunk sizes with \(a \cdot s^{2}\) as the size parameter |
| 153 | + for some constant \(a\), the average minimum chunk size is \(a/2 \cdot |
| 154 | + s^{2}\) and therefore $2a$ times the average size of a data portion fed, |
| 155 | + independently of \(s\). |
| 156 | +
|
| 157 | + Second, we want prompt chunk generation as well as chunk generation after |
| 158 | + only two or more feedings to occur reasonably often. To achieve this to some |
| 159 | + degree, we can tune the parameter \(a\). It appears that \(a\) being \(2\) |
| 160 | + leads to reasonable results. |
| 161 | +-} |
| 162 | +newtype MinChunkSize = MinChunkSize Int deriving stock Show |
| 163 | + |
| 164 | +fromMinChunkSize :: MinChunkSize -> Int |
| 165 | +fromMinChunkSize (MinChunkSize minChunkSize) = minChunkSize |
| 166 | + |
| 167 | +instance Arbitrary MinChunkSize where |
| 168 | + |
| 169 | + arbitrary = scale (\ size -> 2 * size ^ (2 :: Int)) $ |
| 170 | + MinChunkSize <$> getSmall <$> getPositive <$> arbitrary |
| 171 | + |
| 172 | + shrink = shrinkMap (MinChunkSize . getSmall . getPositive) |
| 173 | + (Positive . Small . fromMinChunkSize) |
0 commit comments