Skip to content

Commit 0a98ed2

Browse files
committed
finished readmeScore
1 parent c0d4cb6 commit 0a98ed2

File tree

2 files changed

+112
-55
lines changed

2 files changed

+112
-55
lines changed

src/Distribution/Server/Features/PackageRank.hs

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ module Distribution.Server.Features.PackageRank
66
( rankPackage
77
) where
88

9-
import Distribution.Server.Features.PackageRank.Parser
9+
import Distribution.Server.Features.PackageRank.Parser
1010

11+
import Data.TarIndex ( TarEntryOffset )
1112
import Distribution.Package
1213
import Distribution.PackageDescription
1314
import Distribution.Server.Features.Documentation
@@ -17,9 +18,14 @@ import Distribution.Server.Features.PreferredVersions.State
1718
import Distribution.Server.Features.TarIndexCache
1819
import qualified Distribution.Server.Framework.BlobStorage
1920
as BlobStorage
21+
import Distribution.Server.Framework.CacheControl
2022
import Distribution.Server.Framework.ServerEnv
2123
( ServerEnv(..) )
2224
import Distribution.Server.Packages.Types
25+
import Distribution.Server.Util.Markdown
26+
( supposedToBeMarkdown )
27+
import Distribution.Server.Util.ServeTarball
28+
( loadTarEntry )
2329
import Distribution.Simple.Utils ( safeHead
2430
, safeLast
2531
)
@@ -38,8 +44,6 @@ import Distribution.Server.Packages.Readme
3844
import GHC.Float ( int2Float )
3945
import System.FilePath ( isExtensionOf )
4046

41-
-- import Debug.Trace (trace)
42-
4347
data Scorer = Scorer
4448
{ maximumS :: !Float
4549
, score :: !Float
@@ -54,7 +58,7 @@ scorer maxim scr =
5458
if maxim >= scr then Scorer maxim scr else Scorer maxim maxim
5559

5660
fracScor :: Float -> Float -> Scorer
57-
fracScor maxim frac = scorer maxim (maxim * frac)
61+
fracScor maxim frac = scorer maxim (min (maxim * frac) maxim)
5862

5963
boolScor :: Float -> Bool -> Scorer
6064
boolScor k True = Scorer k k
@@ -102,10 +106,9 @@ freshness (x : xs) lastUpd app =
102106
age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime
103107
decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200)
104108

105-
cabalScore :: PackageDescription -> IO Bool -> IO Scorer
109+
cabalScore :: PackageDescription -> Bool -> Scorer
106110
cabalScore p docum =
107-
(<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats)
108-
<$> (boolScor 30 <$> docum)
111+
tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum
109112
where
110113
tests = boolScor 50 (hasTests p)
111114
benchs = boolScor 10 (hasBenchmarks p)
@@ -115,9 +118,38 @@ cabalScore p docum =
115118
sourceRp = boolScor 8 (not $ null $ sourceRepos p)
116119
cats = boolScor 5 (not $ S.null $ category p)
117120

118-
readmeScore _ = Scorer 0 0
121+
readmeScore
122+
:: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath)
123+
-> Bool
124+
-> IO Scorer
125+
readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max
126+
readmeScore (Just (tarfile, _, offset, name)) app = do
127+
entr <- loadTarEntry tarfile offset
128+
case entr of
129+
(Right (size, str)) -> return $ calcScore str size name
130+
_ -> return $ Scorer 1 0
131+
where
132+
calcScore str size filename =
133+
scorer 75 (min 1 (fromInteger (toInteger size) / 3000))
134+
<> if supposedToBeMarkdown filename
135+
then case parseM str filename of
136+
Left _ -> Scorer 0 0
137+
Right mdStats -> format mdStats
138+
else Scorer 0 0
139+
format stats =
140+
fracScor (if app then 25 else 100) (min 1 $ int2Float hlength / 2000)
141+
<> scorer (if app then 15 else 27) (int2Float blocks * 3)
142+
<> boolScor (if app then 10 else 30) (clength > 150)
143+
<> scorer 35 (int2Float images * 10)
144+
<> scorer 30 (int2Float sections * 4)
145+
<> scorer 25 (int2Float rows * 2)
146+
where
147+
(blocks, clength) = getCode stats
148+
(_ , hlength) = getHCode stats
149+
MStats _ images = sumMStat stats
150+
rows = getListsTables stats
151+
sections = getSections stats
119152

120-
-- queryHasDocumentation
121153
baseScore
122154
:: VersionsFeature
123155
-> Int
@@ -130,18 +162,25 @@ baseScore
130162
-> IO Scorer
131163

132164
baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do
133-
versS <- versionScore versionList vers lastUploads pkg
134-
codeS <- codeScore documSize srcLines
135-
cabalS <- cabalScore pkg documHas
165+
166+
readM <- readme
167+
hasDocum <- documHas
168+
documS <- documSize
169+
srcL <- srcLines
170+
171+
versS <- versionScore versionList vers lastUploads pkg
172+
readmeS <- readmeScore readM isApp
173+
136174
return
137175
$ scale 5 versS
138-
<> scale 2 codeS
176+
<> scale 2 (codeScore documS srcL)
139177
<> scale 3 (authorScore maintainers pkg)
140-
<> scale 2 cabalS
141-
<> scale 5 (readmeScore readme)
178+
<> scale 2 (cabalScore pkg hasDocum)
179+
<> scale 5 readmeS
142180
where
143181
pkg = packageDescription $ pkgDesc pkgI
144182
pkgId = package pkg
183+
isApp = (isNothing . library) pkg && (not . null . executables) pkg
145184
srcLines = do
146185
Right (path, _, _) <- packageTarball tarCache pkgI
147186
filterLines (isExtensionOf ".hs") countLines
@@ -165,6 +204,7 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do
165204
!lns = case Tar.entryContent entry of
166205
(Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str)
167206
_ -> l
207+
-- TODO might need to decode/add the other separator
168208
countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float
169209
countSize f entry l = if not . f . Tar.entryPath $ entry then l else s
170210
where
@@ -185,15 +225,12 @@ authorScore maintainers desc =
185225
where
186226
maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers)
187227

188-
codeScore :: IO Float -> IO Float -> IO Scorer
189-
codeScore documentS haskellL = do
190-
docum <- documentS
191-
haskell <- haskellL
192-
return
193-
$ boolScor 1 (haskell > 700)
194-
<> boolScor 1 (haskell < 80000)
195-
<> fracScor 2 (min 1 (haskell / 5000))
196-
<> fracScor 2 (min 1 docum / ((3000 + haskell) * 200))
228+
codeScore :: Float -> Float -> Scorer
229+
codeScore documentS haskellL =
230+
boolScor 1 (haskellL > 700)
231+
<> boolScor 1 (haskellL < 80000)
232+
<> fracScor 2 (min 1 (haskellL / 5000))
233+
<> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200))
197234

198235
versionScore
199236
:: [Version]
Lines changed: 51 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-}
22
module Distribution.Server.Features.PackageRank.Parser
33
( parseM
4+
, sumMStat
5+
, getListsTables
6+
, getCode
7+
, getHCode
8+
, getSections
9+
, MStats(..)
410
) where
511

612

713
import Commonmark
814
import Commonmark.Extensions
9-
import Control.Monad
1015
import Control.Monad.Identity
1116
import qualified Data.ByteString.Lazy as BS
1217
( ByteString
@@ -16,13 +21,6 @@ import qualified Data.Text as T
1621
import qualified Data.Text.Encoding as T
1722
import qualified Data.Text.Encoding.Error as T
1823
( lenientDecode )
19-
import qualified Data.Text.IO as TIO
20-
import qualified Data.Text.Lazy.IO as TLIO
21-
import Data.Typeable ( Typeable )
22-
import System.FilePath
23-
24-
type MarkdownRenderable a b
25-
= (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a)
2624

2725
parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats]
2826
parseM md name = runIdentity
@@ -44,24 +42,51 @@ instance HasAttributes MStats where
4442
instance Semigroup MStats where
4543
(MStats a b) <> (MStats c d) = MStats (a + c) (b + d)
4644

47-
data MarkdownStats = NotImportant |
45+
data MarkdownStats = NotImportant MStats |
4846
HCode MStats |
4947
Code MStats |
50-
Section | -- Int?
51-
Table Int |
48+
Section MStats |
49+
Table Int MStats | -- Int of rows
5250
PText MStats |
53-
List Int
51+
List Int MStats -- Int of elements
5452
deriving (Show)
5553

54+
getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code
55+
getCode [] = (0, 0)
56+
getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs
57+
getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs
58+
getCode (_ : xs) = getCode xs
59+
60+
getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code
61+
getHCode [] = (0, 0)
62+
getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs
63+
getHCode (_ : xs) = getHCode xs
64+
65+
getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code
66+
getSections [] = 0
67+
getSections (Section _ : xs) = 1 + getSections xs
68+
getSections (_ : xs) = getSections xs
69+
70+
(><) :: (Int, Int) -> (Int, Int) -> (Int, Int)
71+
(><) (a, b) (c, d) = (a + c, b + d)
72+
73+
74+
sumMStat :: [MarkdownStats] -> MStats
5675
sumMStat [] = mempty
5776
sumMStat (x : xs) = case x of
58-
NotImportant -> sumMStat xs
59-
Section -> sumMStat xs
60-
(List a) -> sumMStat xs
61-
(Table a) -> sumMStat xs
62-
(HCode a) -> a <> sumMStat xs
63-
(Code a) -> a <> sumMStat xs
64-
(PText a) -> a <> sumMStat xs
77+
(NotImportant a) -> a <> sumMStat xs
78+
(Section a) -> a <> sumMStat xs
79+
(List _ a ) -> a <> sumMStat xs
80+
(Table _ a ) -> a <> sumMStat xs
81+
(HCode a ) -> a <> sumMStat xs
82+
(Code a ) -> a <> sumMStat xs
83+
(PText a ) -> a <> sumMStat xs
84+
85+
getListsTables :: [MarkdownStats] -> Int
86+
getListsTables [] = 0
87+
getListsTables ((List a _) : ys) = a + getListsTables ys
88+
getListsTables ((Table a _) : ys) = a + getListsTables ys
89+
getListsTables (_ : ys) = getListsTables ys
6590

6691
instance Rangeable [MarkdownStats] where
6792
ranged = const id
@@ -70,7 +95,7 @@ instance HasAttributes [MarkdownStats] where
7095
addAttributes = const id
7196

7297
instance HasPipeTable MStats [MarkdownStats] where
73-
pipeTable _ _ rows = [Table $ length rows]
98+
pipeTable _ _ rows = [Table (length rows) (mconcat $ mconcat <$> rows)]
7499

75100
instance IsInline MStats where
76101
lineBreak = MStats 0 1
@@ -88,17 +113,12 @@ instance IsInline MStats where
88113
instance IsBlock MStats [MarkdownStats] where
89114
paragraph a = [PText a]
90115
plain a = [PText a]
91-
thematicBreak = [NotImportant]
116+
thematicBreak = [NotImportant mempty]
92117
blockQuote = id
93118
codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)]
94119
| otherwise = [Code (code codeT)]
95-
heading _ _ = [Section]
96-
rawBlock _ r = [NotImportant]
97-
referenceLinkDefinition _ _ = [NotImportant]
98-
list _ _ l = [List (length l + depSum l)]
99-
100-
depSum [] = 0
101-
depSum ([] : xs) = depSum xs
102-
depSum ((List a : ys) : xs) = a + depSum (ys : xs)
103-
depSum ((_ : ys) : xs) = depSum (ys : xs)
104-
120+
heading _ a = [Section a]
121+
rawBlock _ _ = [NotImportant mempty]
122+
referenceLinkDefinition _ _ = [NotImportant mempty]
123+
list _ _ l = [List (length l + sumLT l) (mconcat $ sumMStat <$> l)]
124+
where sumLT a = sum (getListsTables <$> a)

0 commit comments

Comments
 (0)