Skip to content

Commit 137d09d

Browse files
committed
fixed documentation retrieval
1 parent ff667de commit 137d09d

File tree

1 file changed

+22
-37
lines changed

1 file changed

+22
-37
lines changed

src/Distribution/Server/Features/PackageRank.hs

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,17 @@ import Distribution.Types.Version
2525
import qualified Distribution.Utils.ShortText as S
2626

2727
import qualified Codec.Archive.Tar as Tar
28-
import qualified Codec.Archive.Tar.Entry as Tar
29-
import Control.Monad ( join
30-
, liftM2
31-
)
3228
import qualified Data.ByteString.Lazy as BSL
3329
import Data.List ( maximumBy
3430
, sortBy
3531
)
3632
import Data.Maybe ( isNothing )
3733
import Data.Ord ( comparing )
38-
import qualified Data.TarIndex as T
3934
import qualified Data.Time.Clock as CL
4035
import GHC.Float ( int2Float )
4136
import System.FilePath ( isExtensionOf )
42-
import qualified System.IO as SIO
37+
38+
-- import Debug.Trace (trace)
4339

4440
data Scorer = Scorer
4541
{ maximumS :: !Float
@@ -119,7 +115,7 @@ rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0)
119115
rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do
120116
temp <- temporalScore pkg lastUploads versionList recentDownloads
121117
versS <- versionScore versionList vers lastUploads pkg
122-
codeS <- codeScore documentLines srcLines
118+
codeS <- codeScore documSize srcLines
123119
return $ temp <> versS <> codeS <> authorScore maintainers pkg
124120

125121
where
@@ -134,45 +130,34 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do
134130
$ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs)
135131
srcLines = do
136132
Right (path, _, _) <- packageTarball tarCache pkgI
137-
filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path
133+
filterLines (isExtensionOf ".hs") countLines
134+
. Tar.read
135+
<$> BSL.readFile path
136+
documSize = do
137+
path <- documentPath
138+
case path of
139+
Nothing -> return 0
140+
Just pth ->
141+
filterLines (isExtensionOf ".html") countSize
142+
. Tar.read
143+
<$> BSL.readFile pth
138144

139-
filterLines f = Tar.foldEntries (countLines f) 0 (const 0)
145+
filterLines f g = Tar.foldEntries (g f) 0 (const 0)
140146
countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float
141147
countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns
142148
where
143149
!lns = case Tar.entryContent entry of
144150
(Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str)
145151
_ -> l
152+
countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float
153+
countSize f entry l = if not . f . Tar.entryPath $ entry then l else s
154+
where
155+
!s = case Tar.entryContent entry of
156+
(Tar.NormalFile _ siz) -> l + fromInteger (toInteger siz)
157+
_ -> l
146158

147159
documentBlob :: IO (Maybe BlobStorage.BlobId)
148-
documentBlob = queryDocumentation docs pkgId
149-
documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache)
150-
documentationEntr = do
151-
index <- documentIndex
152-
path <- documentPath
153-
return $ liftM2 (,) path (join $ liftM2 T.lookup index path)
154-
documentLines :: IO Float
155-
documentLines = documentationEntr >>= filterLinesTar (const True)
156-
157-
filterLinesTar
158-
:: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float
159-
filterLinesTar f (Just (path, T.TarFileEntry offset)) =
160-
if f path then getLines path offset else return 0
161-
filterLinesTar f (Just (_, T.TarDir dir)) =
162-
sum <$> mapM (filterLinesTar f . Just) dir
163-
filterLinesTar _ _ = return 0
164-
165-
-- TODO if size is too big give it a good score and do not read the file
166-
getLines path offset = do
167-
handle <- SIO.openFile path SIO.ReadMode
168-
SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512)
169-
header <- BSL.hGet handle 512
170-
case Tar.read header of
171-
(Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do
172-
body <- BSL.hGet handle (fromIntegral siz)
173-
return $ int2Float . length . BSL.split 10 $ body
174-
_ -> return 0
175-
160+
documentBlob = queryDocumentation docs pkgId
176161
documentPath = do
177162
blob <- documentBlob
178163
return $ BlobStorage.filepath (serverBlobStore env) <$> blob

0 commit comments

Comments
 (0)