Skip to content

Commit a086737

Browse files
committed
Improve README.* package parsing
Add a README_MAPPING dictionary, used to map various README.* metadata key-value pairs to scancode Package model values. A number of test cases have been added as well, to reflect different mappings. Addresses: #942 Signed-off-by: Steven Esser <[email protected]>
1 parent 96c73a2 commit a086737

File tree

18 files changed

+392
-4
lines changed

18 files changed

+392
-4
lines changed

src/packagedcode/readme.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@
2929
logger.setLevel(logging.DEBUG)
3030

3131

32+
README_MAPPING = {
33+
'name': ['name', 'project'],
34+
'version': ['version'],
35+
'homepage_url': ['project url', 'repo', 'source', 'upstream', 'url', 'website'],
36+
'download_url': ['download link', 'downloaded from'],
37+
'declared_license': ['license'],
38+
}
39+
40+
3241
@attr.s()
3342
class ReadmePackage(models.Package):
3443
metafiles = (
@@ -91,13 +100,15 @@ def build_package(readme_manifest):
91100

92101
# Map the key, value pairs to the Package
93102
key, value = key.lower(), value.strip()
94-
if key == 'name':
103+
if key in README_MAPPING['name']:
95104
package.name = value
96-
if key == 'version':
105+
if key in README_MAPPING['version']:
97106
package.version = value
98-
if key == 'url' or key == 'project url':
107+
if key in README_MAPPING['homepage_url']:
99108
package.homepage_url = value
100-
if key == 'license':
109+
if key in README_MAPPING['download_url']:
110+
package.download_url = value
111+
if key in README_MAPPING['declared_license']:
101112
package.declared_license = value
102113

103114
return package
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Project URL: https://pypi.python.org/packages/source/s/setuptools/setuptools-18.5.tar.gz#md5=533c868f01169a3085177dffe5e768bb
2+
Version: 18.5
3+
License: PSF or ZPL
4+
Local modifications: kept only pkg_resources & _markerlib modules
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": null,
5+
"version": "18.5",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.5.tar.gz#md5=533c868f01169a3085177dffe5e768bb",
14+
"download_url": null,
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "unknown",
25+
"declared_license": "PSF or ZPL",
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": null,
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Project: mbed TLS
2+
Version: 2.16.4
3+
Release date: 2017-03-08
4+
Source: https://github.com/ARMmbed/mbedtls
5+
License: Apache 2.0
6+
Download link: https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": "mbed TLS",
5+
"version": "2.16.4",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://github.com/ARMmbed/mbedtls",
14+
"download_url": "https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz",
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "apache-2.0",
25+
"declared_license": "Apache 2.0",
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": "pkg:readme/mbed%[email protected]",
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Project: mbed TLS
2+
Version: 2.16.4
3+
Release date: 2017-03-08
4+
Source: https://github.com/ARMmbed/mbedtls
5+
License: Apache 2.0
6+
Downloaded from: https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": "mbed TLS",
5+
"version": "2.16.4",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://github.com/ARMmbed/mbedtls",
14+
"download_url": "https://github.com/ARMmbed/mbedtls/archive/refs/tags/v2.16.7.tar.gz",
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "apache-2.0",
25+
"declared_license": "Apache 2.0",
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": "pkg:readme/mbed%[email protected]",
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
This directory contains a heavily modified
2+
stuff
3+
stuff
4+
stuff
5+
6+
stuff
7+
8+
Differences from the original:
9+
10+
- one
11+
- two
12+
- three
13+
- four
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"type": "readme",
3+
"namespace": null,
4+
"name": null,
5+
"version": null,
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": null,
9+
"description": null,
10+
"release_date": null,
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": null,
14+
"download_url": null,
15+
"size": null,
16+
"sha1": null,
17+
"md5": null,
18+
"sha256": null,
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": null,
25+
"declared_license": null,
26+
"notice_text": null,
27+
"root_path": null,
28+
"dependencies": [],
29+
"contains_source_code": null,
30+
"source_packages": [],
31+
"purl": null,
32+
"repository_homepage_url": null,
33+
"repository_download_url": null,
34+
"api_data_url": null
35+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Project: mbed TLS
2+
Version: 2.16.4
3+
Release date: 2017-03-08
4+
Website: https://github.com/ARMmbed/mbedtls
5+
License: Apache 2.0

0 commit comments

Comments
 (0)