@@ -25,21 +25,17 @@ import Distribution.Types.Version
25
25
import qualified Distribution.Utils.ShortText as S
26
26
27
27
import qualified Codec.Archive.Tar as Tar
28
- import qualified Codec.Archive.Tar.Entry as Tar
29
- import Control.Monad ( join
30
- , liftM2
31
- )
32
28
import qualified Data.ByteString.Lazy as BSL
33
29
import Data.List ( maximumBy
34
30
, sortBy
35
31
)
36
32
import Data.Maybe ( isNothing )
37
33
import Data.Ord ( comparing )
38
- import qualified Data.TarIndex as T
39
34
import qualified Data.Time.Clock as CL
40
35
import GHC.Float ( int2Float )
41
36
import System.FilePath ( isExtensionOf )
42
- import qualified System.IO as SIO
37
+
38
+ -- import Debug.Trace (trace)
43
39
44
40
data Scorer = Scorer
45
41
{ maximumS :: ! Float
@@ -119,7 +115,7 @@ rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0)
119
115
rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do
120
116
temp <- temporalScore pkg lastUploads versionList recentDownloads
121
117
versS <- versionScore versionList vers lastUploads pkg
122
- codeS <- codeScore documentLines srcLines
118
+ codeS <- codeScore documSize srcLines
123
119
return $ temp <> versS <> codeS <> authorScore maintainers pkg
124
120
125
121
where
@@ -134,45 +130,34 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do
134
130
$ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs)
135
131
srcLines = do
136
132
Right (path, _, _) <- packageTarball tarCache pkgI
137
- filterLines (isExtensionOf " .hs" ) . Tar. read <$> BSL. readFile path
133
+ filterLines (isExtensionOf " .hs" ) countLines
134
+ . Tar. read
135
+ <$> BSL. readFile path
136
+ documSize = do
137
+ path <- documentPath
138
+ case path of
139
+ Nothing -> return 0
140
+ Just pth ->
141
+ filterLines (isExtensionOf " .html" ) countSize
142
+ . Tar. read
143
+ <$> BSL. readFile pth
138
144
139
- filterLines f = Tar. foldEntries (countLines f) 0 (const 0 )
145
+ filterLines f g = Tar. foldEntries (g f) 0 (const 0 )
140
146
countLines :: (FilePath -> Bool ) -> Tar. Entry -> Float -> Float
141
147
countLines f entry l = if not . f . Tar. entryPath $ entry then l else lns
142
148
where
143
149
! lns = case Tar. entryContent entry of
144
150
(Tar. NormalFile str _) -> l + (int2Float . length $ BSL. split 10 str)
145
151
_ -> l
152
+ countSize :: (FilePath -> Bool ) -> Tar. Entry -> Float -> Float
153
+ countSize f entry l = if not . f . Tar. entryPath $ entry then l else s
154
+ where
155
+ ! s = case Tar. entryContent entry of
156
+ (Tar. NormalFile _ siz) -> l + fromInteger (toInteger siz)
157
+ _ -> l
146
158
147
159
documentBlob :: IO (Maybe BlobStorage. BlobId )
148
- documentBlob = queryDocumentation docs pkgId
149
- documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache)
150
- documentationEntr = do
151
- index <- documentIndex
152
- path <- documentPath
153
- return $ liftM2 (,) path (join $ liftM2 T. lookup index path)
154
- documentLines :: IO Float
155
- documentLines = documentationEntr >>= filterLinesTar (const True )
156
-
157
- filterLinesTar
158
- :: (FilePath -> Bool ) -> Maybe (FilePath , T. TarIndexEntry ) -> IO Float
159
- filterLinesTar f (Just (path, T. TarFileEntry offset)) =
160
- if f path then getLines path offset else return 0
161
- filterLinesTar f (Just (_, T. TarDir dir)) =
162
- sum <$> mapM (filterLinesTar f . Just ) dir
163
- filterLinesTar _ _ = return 0
164
-
165
- -- TODO if size is too big give it a good score and do not read the file
166
- getLines path offset = do
167
- handle <- SIO. openFile path SIO. ReadMode
168
- SIO. hSeek handle SIO. AbsoluteSeek (fromIntegral $ offset * 512 )
169
- header <- BSL. hGet handle 512
170
- case Tar. read header of
171
- (Tar. Next Tar. Entry { Tar. entryContent = Tar. NormalFile _ siz } _) -> do
172
- body <- BSL. hGet handle (fromIntegral siz)
173
- return $ int2Float . length . BSL. split 10 $ body
174
- _ -> return 0
175
-
160
+ documentBlob = queryDocumentation docs pkgId
176
161
documentPath = do
177
162
blob <- documentBlob
178
163
return $ BlobStorage. filepath (serverBlobStore env) <$> blob
0 commit comments