Skip to content

Commit 6c43e29

Browse files
authored
Merge pull request #2475 from nexB/readme-parser-updates
Improve README.* package parsing
2 parents 3547aaa + 6894fc8 commit 6c43e29

File tree

24 files changed

+456
-11
lines changed

24 files changed

+456
-11
lines changed

src/packagedcode/readme.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@
2929
logger.setLevel(logging.DEBUG)
3030

3131

32+
README_MAPPING = {
33+
'name': ['name', 'project'],
34+
'version': ['version'],
35+
'homepage_url': ['project url', 'repo', 'source', 'upstream', 'url', 'website'],
36+
'download_url': ['download link', 'downloaded from'],
37+
'declared_license': ['license'],
38+
}
39+
40+
3241
@attr.s()
3342
class ReadmePackage(models.Package):
3443
metafiles = (
@@ -73,7 +82,16 @@ def parse(location):
7382
with open(location, encoding='utf-8') as loc:
7483
readme_manifest = loc.read()
7584

76-
return build_package(readme_manifest)
85+
package = build_package(readme_manifest)
86+
87+
if not package.name:
88+
# If no name was detected for the Package, then we use the basename of
89+
# the parent directory as the Package name
90+
parent_dir = fileutils.parent_directory(location)
91+
parent_dir_basename = fileutils.file_base_name(parent_dir)
92+
package.name = parent_dir_basename
93+
94+
return package
7795

7896

7997
def build_package(readme_manifest):
@@ -88,16 +106,18 @@ def build_package(readme_manifest):
88106

89107
if not key or not value:
90108
continue
91-
109+
92110
# Map the key, value pairs to the Package
93111
key, value = key.lower(), value.strip()
94-
if key == 'name':
112+
if key in README_MAPPING['name']:
95113
package.name = value
96-
if key == 'version':
114+
if key in README_MAPPING['version']:
97115
package.version = value
98-
if key == 'url' or key == 'project url':
116+
if key in README_MAPPING['homepage_url']:
99117
package.homepage_url = value
100-
if key == 'license':
118+
if key in README_MAPPING['download_url']:
119+
package.download_url = value
120+
if key in README_MAPPING['declared_license']:
101121
package.declared_license = value
102122

103123
return package
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Project URL: https://pypi.python.org/packages/source/s/setuptools/setuptools-18.5.tar.gz#md5=533c868f01169a3085177dffe5e768bb
2+
Project: setuptools
23
Version: 18.5
34
License: PSF or ZPL
45
Local modifications: kept only pkg_resources & _markerlib modules

tests/packagedcode/data/readme/facebook/basic/README.facebook.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"type": "readme",
33
"namespace": null,
4-
"name": null,
4+
"name": "setuptools",
55
"version": "18.5",
66
"qualifiers": {},
77
"subpath": null,
@@ -28,7 +28,7 @@
2828
"dependencies": [],
2929
"contains_source_code": null,
3030
"source_packages": [],
31-
"purl": null,
31+
"purl": "pkg:readme/[email protected]",
3232
"repository_homepage_url": null,
3333
"repository_download_url": null,
3434
"api_data_url": null
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Project URL: https://pypi.python.org/packages/source/s/setuptools/setuptools-18.5.tar.gz#md5=533c868f01169a3085177dffe5e768bb
2+
Project: setuptools
3+
Version: 18.5
4+
License: PSF or ZPL
5+
Local modifications: kept only pkg_resources & _markerlib modules
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": "setuptools",
5+
"version": "18.5",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.5.tar.gz#md5=533c868f01169a3085177dffe5e768bb",
14+
"download_url": null,
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "unknown",
25+
"declared_license": "PSF or ZPL",
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": "pkg:readme/[email protected]",
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Project: mbed TLS
2+
Version: 2.16.4
3+
Release date: 2017-03-08
4+
Source: https://github.com/ARMmbed/mbedtls
5+
License: Apache 2.0
6+
Download link: https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": "mbed TLS",
5+
"version": "2.16.4",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://github.com/ARMmbed/mbedtls",
14+
"download_url": "https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz",
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "apache-2.0",
25+
"declared_license": "Apache 2.0",
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": "pkg:readme/mbed%[email protected]",
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Project: mbed TLS
2+
Version: 2.16.4
3+
Release date: 2017-03-08
4+
Source: https://github.com/ARMmbed/mbedtls
5+
License: Apache 2.0
6+
Downloaded from: https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": "mbed TLS",
5+
"version": "2.16.4",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://github.com/ARMmbed/mbedtls",
14+
"download_url": "https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz",
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "apache-2.0",
25+
"declared_license": "Apache 2.0",
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": "pkg:readme/mbed%[email protected]",
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
This directory contains a heavily modified
2+
stuff
3+
stuff
4+
stuff
5+
6+
stuff
7+
8+
Differences from the original:
9+
10+
- one
11+
- two
12+
- three
13+
- four

0 commit comments

Comments
 (0)