Skip to content

Commit 7ad8804

Browse files
committed
Fix parsing of licenses in Cabal files
At the moment Hoogle just reads the license field as is, which leads to suboptimal situation when `hoogle search license:BSD3` and `hoogle search license:BSD-3-Clause` return different (and disjoint!) results. We should do better and normalise licenses as Cabal does. By this point it's easier to migrate entire readCabal to reading fields of GenericPackageDescription instead of rolling out our own parser.
1 parent 210df22 commit 7ad8804

File tree

1 file changed

+32
-27
lines changed

1 file changed

+32
-27
lines changed

src/Input/Cabal.hs

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import System.Exit
2121
import qualified System.Process.ByteString as BS
2222
import qualified Data.ByteString.UTF8 as UTF8
2323
import System.Directory
24-
import Data.Char
2524
import Data.Maybe
2625
import Data.Tuple.Extra
2726
import qualified Data.Map.Strict as Map
@@ -32,10 +31,15 @@ import Prelude
3231

3332
import Distribution.Compat.Lens (toListOf)
3433
import qualified Distribution.PackageDescription as PD
34+
import qualified Distribution.PackageDescription.Configuration as PD
3535
import qualified Distribution.PackageDescription.Parsec as PD
36+
import qualified Distribution.Pretty
3637
import qualified Distribution.Types.BuildInfo.Lens as Lens
3738
import Distribution.Types.PackageName (mkPackageName, unPackageName)
39+
import Distribution.Types.Version (versionNumbers)
40+
import Distribution.Utils.ShortText (fromShortText)
3841
import Hackage.RevDeps (lastVersionsOfPackages)
42+
import qualified Distribution.SPDX as SPDX
3943

4044
---------------------------------------------------------------------
4145
-- DATA TYPE
@@ -136,40 +140,41 @@ readCabal settings src = case PD.parseGenericPackageDescriptionMaybe src of
136140
, packageDepends = []
137141
, packageDocs = Nothing
138142
}
139-
Just gpd -> readCabal' settings gpd (bstrUnpack src)
143+
Just gpd -> readCabal' settings gpd
140144

141-
readCabal' :: Settings -> PD.GenericPackageDescription -> String -> Package
142-
readCabal' Settings{..} gpd src = Package{..}
145+
readCabal' :: Settings -> PD.GenericPackageDescription -> Package
146+
readCabal' Settings{..} gpd = Package{..}
143147
where
144-
packageDepends = nubOrd $ foldMap (map (\(PD.Dependency pkg _ _) -> pkg) . PD.targetBuildDepends) $ toListOf Lens.traverseBuildInfos gpd
145-
146-
mp = Map.fromListWith (++) $ lexCabal src
147-
ask x = Map.findWithDefault [] x mp
148+
pd = PD.flattenPackageDescription gpd
149+
pkgId = PD.package pd
148150

149-
packageVersion = strPack $ headDef "0.0" $ dropWhile null (ask "version")
150-
packageSynopsis = strPack $ unwords $ words $ unwords $ ask "synopsis"
151-
packageLibrary = "library" `elem` map (lower . trim) (lines src)
152-
packageDocs = find (not . null) $ ask "haddock-html"
151+
packageDepends = nubOrd $ foldMap (map (\(PD.Dependency pkg _ _) -> pkg) . PD.targetBuildDepends) $ toListOf Lens.traverseBuildInfos gpd
152+
packageVersion = strPack $ intercalate "." $ map show $ versionNumbers $ PD.pkgVersion pkgId
153+
packageSynopsis = strPack $ fromShortText $ PD.synopsis pd
154+
packageLibrary = PD.hasPublicLib pd
155+
packageDocs = Nothing
156+
157+
unpackLicenseExpression (SPDX.EOr x y) = unpackLicenseExpression x ++ unpackLicenseExpression y
158+
unpackLicenseExpression x = [x]
159+
160+
packageLicenses = case PD.license pd of
161+
SPDX.NONE -> []
162+
SPDX.License licExpr -> map (show . Distribution.Pretty.pretty) $
163+
unpackLicenseExpression licExpr
164+
packageCategories =
165+
filter (not . null) $ split (`elem` " ,") $
166+
fromShortText $ PD.category pd
167+
packageAuthor = fromShortText $ PD.author pd
168+
packageMaintainer = fromShortText $ PD.maintainer pd
153169

154170
packageTags = map (both strPack) $ nubOrd $ concat
155-
[ map (x,) $ concatMap cleanup $ concatMap ask xs
156-
| xs@(x:_) <- [["license"],["category"],["author","maintainer"]]]
171+
[ map ("license",) packageLicenses
172+
, map ("category",) packageCategories
173+
, map ("author",) (concatMap cleanup [packageAuthor, packageMaintainer])
174+
]
157175

158176
-- split on things like "," "&" "and", then throw away email addresses, replace spaces with "-" and rename
159177
cleanup =
160178
filter (/= "") .
161179
map (renameTag . intercalate "-" . filter ('@' `notElem`) . words . takeWhile (`notElem` "<(")) .
162180
concatMap (map unwords . split (== "and") . words) . split (`elem` ",&")
163-
164-
165-
-- Ignores nesting beacuse it's not interesting for any of the fields I care about
166-
lexCabal :: String -> [(String, [String])]
167-
lexCabal = f . lines
168-
where
169-
f (x:xs) | (white,x) <- span isSpace x
170-
, (name@(_:_),x) <- span (\c -> isAlpha c || c == '-') x
171-
, ':':x <- trim x
172-
, (xs1,xs2) <- span (\s -> length (takeWhile isSpace s) > length white) xs
173-
= (lower name, trim x : replace ["."] [""] (map (trim . fst . breakOn "--") xs1)) : f xs2
174-
f (_:xs) = f xs
175-
f [] = []

0 commit comments

Comments
 (0)