Skip to content

Commit da3efd0

Browse files
authored
Merge pull request #739 from IntersectMBO/wenkokke/dump-diskio-complexities
feat(scripts): add lint-diskio-complexities
2 parents a3efabc + 09f8d16 commit da3efd0

File tree

6 files changed

+347
-12
lines changed

6 files changed

+347
-12
lines changed

Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
SOURCE=lsm-tree.cabal $(shell find src -type f -name '*.hs')
2+
3+
.PHONY: watch
4+
watch:
5+
fswatch -o $(SOURCE) | xargs -n1 -I{} make build
6+
7+
.PHONY: build
8+
build: $(SOURCE)
9+
time cabal haddock lsm-tree:lib:lsm-tree --builddir="dist-newstyle/haddock"
10+
11+
.PHONY: serve
12+
serve:
13+
python -m http.server -d "dist-newstyle/haddock/build/"*"/ghc-"*"/lsm-tree-"*"/doc/html/lsm-tree/"

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ schedule are determined by the `TableConfig` parameters
151151
<tbody>
152152
<tr>
153153
<td>Session</td>
154-
<td>Create/Open</td>
154+
<td>Open</td>
155155
<td>N/A</td>
156156
<td>N/A</td>
157157
<td><span class="math inline"><em>O</em>(1)</span></td>
@@ -166,7 +166,7 @@ schedule are determined by the `TableConfig` parameters
166166
</tr>
167167
<tr>
168168
<td>Table</td>
169-
<td>Create</td>
169+
<td>New</td>
170170
<td>N/A</td>
171171
<td>N/A</td>
172172
<td><span class="math inline"><em>O</em>(1)</span></td>
@@ -253,7 +253,7 @@ schedule are determined by the `TableConfig` parameters
253253
</tr>
254254
<tr>
255255
<td>Cursor</td>
256-
<td>Create</td>
256+
<td>New</td>
257257
<td><code>LazyLevelling</code></td>
258258
<td>N/A</td>
259259
<td><span class="math inline">$O(T \: \log_T \frac{n}{B})$</span></td>
@@ -267,7 +267,7 @@ schedule are determined by the `TableConfig` parameters
267267
</tr>
268268
<tr>
269269
<td></td>
270-
<td>Read next entry</td>
270+
<td>Next</td>
271271
<td>N/A</td>
272272
<td>N/A</td>
273273
<td><span class="math inline">$O(\frac{1}{P})$</span></td>

lsm-tree.cabal

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,11 @@ description:
8787
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
8888
| Resource | Operation | Merge policy | Merge schedule | Worst-case disk I\/O complexity |
8989
+==========+========================+=================+=================+================================================+
90-
| Session | Create\/Open | N\/A | N\/A | \(O(1)\) |
90+
| Session | Open | N\/A | N\/A | \(O(1)\) |
9191
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
9292
| | Close | @LazyLevelling@ | N\/A | \(O(o \: T \: \log_T \frac{n}{B})\) |
9393
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
94-
| Table | Create | N\/A | N\/A | \(O(1)\) |
94+
| Table | New | N\/A | N\/A | \(O(1)\) |
9595
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
9696
| | Close | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
9797
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
@@ -115,11 +115,11 @@ description:
115115
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
116116
| | List | N\/A | N\/A | \(O(s)\) |
117117
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
118-
| Cursor | Create | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
118+
| Cursor | New | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
119119
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
120120
| | Close | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
121121
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
122-
| | Read next entry | N\/A | N\/A | \(O(\frac{1}{P})\) |
122+
| | Next | N\/A | N\/A | \(O(\frac{1}{P})\) |
123123
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
124124

125125
(*The variable \(b\) refers to the number of entries retrieved by the range lookup.)
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/usr/bin/env cabal
2+
{- cabal:
3+
build-depends:
4+
, base >=4.16 && <5
5+
, bytestring ^>=0.11
6+
, cassava ^>=0.5
7+
, containers ^>=0.6 || ^>=0.7 || ^>=0.8
8+
, process ^>=1.6
9+
, text ^>=2.1
10+
, vector ^>=0.12 || ^>=0.13
11+
-}
12+
{-# LANGUAGE InstanceSigs #-}
13+
{-# LANGUAGE OverloadedRecordDot #-}
14+
{-# LANGUAGE OverloadedStrings #-}
15+
{-# LANGUAGE RecordWildCards #-}
16+
{-# LANGUAGE TupleSections #-}
17+
{-# LANGUAGE TypeApplications #-}
18+
19+
import Control.Applicative (Alternative (..))
20+
import Control.Monad (unless)
21+
import qualified Data.ByteString.Lazy as BSL
22+
import Data.Csv
23+
import Data.List (zip4)
24+
import Data.Map (Map)
25+
import qualified Data.Map as M
26+
import Data.Maybe (fromMaybe, isNothing)
27+
import Data.Set (Set)
28+
import qualified Data.Set as S
29+
import Data.Text (Text)
30+
import qualified Data.Text as T
31+
import qualified Data.Text.Encoding as TE
32+
import qualified Data.Text.IO as TIO
33+
import Data.Traversable (for)
34+
import Data.Vector (Vector)
35+
import qualified Data.Vector as V
36+
import System.Exit (ExitCode (..), exitWith)
37+
import System.Process (readProcess)
38+
39+
main :: IO ()
40+
main = do
41+
-- Get the disk I/O complexities from the package description
42+
mapForPackageDescription <-
43+
buildDiskIOComplexityMap . decodeDiskIOComplexities
44+
<$> readProcess "./scripts/lint-diskio-complexities/dump-from-package-description.hs" [] ""
45+
46+
-- Get the disk I/O complexities from Database.LSMTree
47+
mapForFullApi <-
48+
buildDiskIOComplexityMap . decodeDiskIOComplexities
49+
<$> readProcess "./scripts/lint-diskio-complexities/dump-from-source.sh" ["./src/Database/LSMTree.hs"] ""
50+
51+
-- Get the disk I/O complexities from Database.LSMTree.Simple
52+
mapForSimpleApi <-
53+
buildDiskIOComplexityMap . decodeDiskIOComplexities
54+
<$> readProcess "./scripts/lint-diskio-complexities/dump-from-source.sh" ["./src/Database/LSMTree/Simple.hs"] ""
55+
56+
-- Comparing Database.LSMTree.Simple to Database.LSMTree
57+
putStrLn "Comparing Database.LSMTree.Simple to Database.LSMTree:"
58+
comparisonSimpleToFull <-
59+
fmap concat . for (concat . M.elems $ mapForSimpleApi) $ \simpleEntry@DiskIOComplexity{..} -> do
60+
case M.lookup function mapForFullApi of
61+
Nothing ->
62+
pure [("Database.LSMTree.Simple", simpleEntry)]
63+
Just fullEntries
64+
| simpleEntry `elem` fullEntries -> pure []
65+
| otherwise -> pure (("Database.LSMTree.Simple", simpleEntry) : (("Database.LSMTree",) <$> fullEntries))
66+
TIO.putStrLn (prettyDiskIOComplexityTable comparisonSimpleToFull)
67+
68+
-- Comparing lsm-tree.cabal to Database.LSMTree
69+
putStrLn "Comparing lsm-tree.cabal to Database.LSMTree:"
70+
comparisonPackageDescriptionToFull <-
71+
fmap concat . for (concat . M.elems $ mapForPackageDescription) $ \simpleEntry@DiskIOComplexity{..} -> do
72+
case M.lookup function mapForFullApi of
73+
Nothing ->
74+
pure [("lsm-tree.cabal", simpleEntry)]
75+
Just fullEntries
76+
| any (looseEq simpleEntry) fullEntries -> pure []
77+
| otherwise -> pure (("lsm-tree.cabal", simpleEntry) : (("Database.LSMTree",) <$> fullEntries))
78+
TIO.putStrLn (prettyDiskIOComplexityTable comparisonPackageDescriptionToFull)
79+
80+
-- Set the exit code based on whether any differences were found
81+
unless (null comparisonSimpleToFull && null comparisonPackageDescriptionToFull) $
82+
exitWith (ExitFailure 1)
83+
84+
--------------------------------------------------------------------------------
85+
-- Helper functions
86+
--------------------------------------------------------------------------------
87+
88+
type Function = Text
89+
type MergePolicy = Text
90+
type MergeSchedule = Text
91+
type WorstCaseDiskIOComplexity = Text
92+
type Condition = Text
93+
94+
data DiskIOComplexity = DiskIOComplexity
95+
{ function :: Function
96+
, mergePolicy :: Maybe MergePolicy
97+
, mergeSchedule :: Maybe MergeSchedule
98+
, worstCaseDiskIOComplexity :: WorstCaseDiskIOComplexity
99+
, condition :: Maybe Condition
100+
}
101+
deriving (Eq, Show)
102+
103+
-- | Loose equality which is used when comparing the disk I/O complexities
104+
-- listed in the package description to those in the modules. Those in the
105+
-- package description do not list complex side conditions, such as all
106+
-- tables having been closed beforehand, or all tables having the same merge
107+
-- policy. Therefore, this equality disregards mismatches when the first
108+
-- entry does not list a condition.
109+
looseEq :: DiskIOComplexity -> DiskIOComplexity -> Bool
110+
entry1 `looseEq` entry2 =
111+
and
112+
[ entry1.function == entry2.function
113+
, entry1.mergePolicy == entry2.mergePolicy
114+
, entry1.mergeSchedule == entry2.mergeSchedule
115+
, entry1.worstCaseDiskIOComplexity == entry2.worstCaseDiskIOComplexity
116+
, isNothing entry1.condition || entry1.condition == entry2.condition
117+
]
118+
119+
-- | Typeset a tagged list of 'DiskIOComplexity' records as an aligned table.
120+
prettyDiskIOComplexityTable :: [(Text, DiskIOComplexity)] -> Text
121+
prettyDiskIOComplexityTable [] = "No differences found.\n"
122+
prettyDiskIOComplexityTable entries =
123+
T.unlines
124+
[ T.unwords
125+
[ prettyCellForColumn tag tags
126+
, prettyCellForColumn function functions
127+
, prettyCellForColumn fullCondition fullConditions
128+
, prettyCellForColumn worstCaseDiskIOComplexity worstCaseDiskIOComplexities
129+
]
130+
| (tag, function, fullCondition, worstCaseDiskIOComplexity) <-
131+
zip4 tags functions fullConditions worstCaseDiskIOComplexities
132+
]
133+
where
134+
tags = fst <$> entries
135+
functions = ((.function) . snd) <$> entries
136+
fullConditions = (prettyFullCondition . snd) <$> entries
137+
worstCaseDiskIOComplexities = ((.worstCaseDiskIOComplexity) . snd) <$> entries
138+
139+
prettyCellForColumn :: Text -> [Text] -> Text
140+
prettyCellForColumn cell column = cell <> T.replicate (maximum (T.length <$> column) - T.length cell) " "
141+
142+
prettyFullCondition :: DiskIOComplexity -> Text
143+
prettyFullCondition DiskIOComplexity{..} =
144+
fromMaybe "*" mergePolicy `slashWith` mergeSchedule `slashWith` condition
145+
where
146+
slashWith :: Text -> Maybe Text -> Text
147+
slashWith x my = maybe x (\y -> x <> "/" <> y) my
148+
149+
-- | Structure vector of 'DiskIOComplexity' records into lookup table by function name.
150+
buildDiskIOComplexityMap :: Vector DiskIOComplexity -> Map Function [DiskIOComplexity]
151+
buildDiskIOComplexityMap = M.unionsWith (<>) . fmap toSingletonMap . V.toList
152+
where
153+
toSingletonMap :: DiskIOComplexity -> Map Function [DiskIOComplexity]
154+
toSingletonMap simpleEntry = M.singleton simpleEntry.function [simpleEntry]
155+
156+
-- | Parse CSV file into vector of 'DiskIOComplexity' records.
157+
decodeDiskIOComplexities :: String -> Vector DiskIOComplexity
158+
decodeDiskIOComplexities =
159+
either error snd . decodeByName . BSL.fromStrict . TE.encodeUtf8 . T.pack
160+
161+
normaliseWorstCaseDiskIOComplexity :: WorstCaseDiskIOComplexity -> WorstCaseDiskIOComplexity
162+
normaliseWorstCaseDiskIOComplexity =
163+
T.replace "+ " "+" . T.replace " +" "+" . T.replace " \\" "\\" . T.replace " " " " . T.replace "\\:" ""
164+
165+
-- | Parse CSV row into 'DiskIOComplexity' record.
166+
instance FromNamedRecord DiskIOComplexity where
167+
parseNamedRecord :: NamedRecord -> Parser DiskIOComplexity
168+
parseNamedRecord m = do
169+
function <- m .: "Function"
170+
mergePolicy <- orNotApplicable (m .: "Merge policy")
171+
mergeSchedule <- orNotApplicable (m .: "Merge schedule")
172+
worstCaseDiskIOComplexity <- normaliseWorstCaseDiskIOComplexity <$> (m .: "Worst-case disk I/O complexity")
173+
condition <- orNotApplicable (m .: "Condition")
174+
pure DiskIOComplexity{..}
175+
where
176+
orNotApplicable :: Parser Text -> Parser (Maybe Text)
177+
orNotApplicable pText = (emptyTextToNothing <$> pText) <|> pure Nothing
178+
where
179+
emptyTextToNothing :: Text -> Maybe Text
180+
emptyTextToNothing txt =
181+
if T.null txt then Nothing else Just txt

0 commit comments

Comments
 (0)