Skip to content

Commit 45762c7

Browse files
Add new --purl option to only get purls
Adds a new option --purl to only parse and return in package data the purl fields, in package scan. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 4ac517e commit 45762c7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1323
-836
lines changed

src/packagedcode/about.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class AboutFileHandler(models.DatafileHandler):
4747
documentation_url = 'https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html'
4848

4949
@classmethod
50-
def parse(cls, location):
50+
def parse(cls, location, purl_only=False):
5151
"""
5252
Yield one or more Package manifest objects given a file ``location`` pointing to a
5353
package archive, manifest or similar.
@@ -71,6 +71,15 @@ def parse(cls, location):
7171

7272
name = package_data.get('name')
7373
version = package_data.get('version')
74+
if purl_only:
75+
yield models.PackageData(
76+
datasource_id=cls.datasource_id,
77+
type=package_type,
78+
namespace=package_ns,
79+
name=name,
80+
version=version,
81+
)
82+
return
7483

7584
homepage_url = package_data.get('home_url') or package_data.get('homepage_url')
7685
download_url = package_data.get('download_url')

src/packagedcode/alpine.py

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,12 @@ class AlpineInstalledDatabaseHandler(models.DatafileHandler):
6363
description = 'Alpine Linux installed package database'
6464

6565
@classmethod
66-
def parse(cls, location):
66+
def parse(cls, location, purl_only=False):
6767
yield from parse_alpine_installed_db(
6868
location=location,
6969
datasource_id=cls.datasource_id,
7070
package_type=cls.default_package_type,
71+
purl_only=purl_only,
7172
)
7273

7374
@classmethod
@@ -134,9 +135,10 @@ class AlpineApkbuildHandler(models.DatafileHandler):
134135
documentation_url = 'https://wiki.alpinelinux.org/wiki/APKBUILD_Reference'
135136

136137
@classmethod
137-
def parse(cls, location):
138-
package_data = parse_apkbuild(location, strict=True)
139-
cls.populate_license_fields(package_data)
138+
def parse(cls, location, purl_only=False):
139+
package_data = parse_apkbuild(location, strict=True, purl_only=purl_only)
140+
if not purl_only:
141+
cls.populate_license_fields(package_data)
140142
if package_data:
141143
yield package_data
142144

@@ -165,7 +167,12 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
165167
)
166168

167169

168-
def parse_alpine_installed_db(location, datasource_id, package_type):
170+
def parse_alpine_installed_db(
171+
location,
172+
datasource_id,
173+
package_type,
174+
purl_only=False,
175+
):
169176
"""
170177
Yield PackageData objects from an installed database file at `location`
171178
or None. Typically found at '/lib/apk/db/installed' in an Alpine
@@ -179,6 +186,7 @@ def parse_alpine_installed_db(location, datasource_id, package_type):
179186
package_fields=package_fields,
180187
datasource_id=datasource_id,
181188
package_type=package_type,
189+
purl_only=purl_only,
182190
)
183191

184192

@@ -241,7 +249,7 @@ def get_alpine_installed_db_fields(location):
241249
])
242250

243251

244-
def parse_apkbuild(location, strict=False):
252+
def parse_apkbuild(location, strict=False, purl_only=False):
245253
"""
246254
Return a PackageData object from an APKBUILD file at ``location`` or None.
247255
@@ -256,6 +264,7 @@ def parse_apkbuild(location, strict=False):
256264
datasource_id=AlpineApkbuildHandler.datasource_id,
257265
package_type=AlpineApkbuildHandler.default_package_type,
258266
strict=strict,
267+
purl_only=purl_only,
259268
)
260269

261270

@@ -732,7 +741,13 @@ def fix_apkbuild(text):
732741
return text
733742

734743

735-
def parse_apkbuild_text(text, datasource_id, package_type, strict=False):
744+
def parse_apkbuild_text(
745+
text,
746+
datasource_id,
747+
package_type,
748+
strict=False,
749+
purl_only=False
750+
):
736751
"""
737752
Return a PackageData object from an APKBUILD text context or None. Only
738753
consider variables with a name listed in the ``names`` set.
@@ -761,7 +776,8 @@ def parse_apkbuild_text(text, datasource_id, package_type, strict=False):
761776
package = build_package_data(
762777
variables,
763778
datasource_id=datasource_id,
764-
package_type=package_type
779+
package_type=package_type,
780+
purl_only=purl_only,
765781
)
766782

767783
if package and unresolved:
@@ -800,7 +816,12 @@ def parse_pkginfo(location):
800816
raise NotImplementedError
801817

802818

803-
def build_package_data(package_fields, datasource_id, package_type):
819+
def build_package_data(
820+
package_fields,
821+
datasource_id,
822+
package_type,
823+
purl_only=False
824+
):
804825
"""
805826
Return a PackageData object from a ``package_fields`` iterable of (name,
806827
value) tuples.
@@ -832,10 +853,17 @@ def build_package_data(package_fields, datasource_id, package_type):
832853
'type': package_type,
833854
}
834855
for name, value in package_fields:
835-
handler = package_handlers_by_field_name.get(name)
856+
handler = package_handlers_by_field_name_purl_only.get(name)
857+
if not purl_only and not handler:
858+
handler = package_handlers_by_field_name_others.get(name)
859+
836860
if handler:
837861
try:
838-
converted = handler(value, all_fields=all_fields, **converted_fields)
862+
converted = handler(
863+
value,
864+
all_fields=all_fields,
865+
**converted_fields
866+
)
839867
except:
840868
raise Exception(*list(package_fields))
841869

@@ -1199,11 +1227,11 @@ def source_handler(value, **kwargs):
11991227
# mapping of:
12001228
# - the package field one letter name in the installed db,
12011229
# - an handler for that field
1202-
package_handlers_by_field_name = {
1230+
package_handlers_by_field_name_purl_only = {
12031231

1204-
############################################################################
1205-
# per-package fields
1206-
############################################################################
1232+
###########################################################################
1233+
# per-package fields (only purl fields)
1234+
###########################################################################
12071235

12081236
# name of the package
12091237
# For example: P:busybox
@@ -1218,6 +1246,22 @@ def source_handler(value, **kwargs):
12181246
'V': build_name_value_str_handler('version'),
12191247
'pkgver': apkbuild_version_handler,
12201248

1249+
# For example: D:scanelf so:libc.musl-x86_64.so.1
1250+
# For example: D:so:libc.musl-x86_64.so.1 so:libcrypto.so.1.1 so:libssl.so.1.1 so:libz.so.1
1251+
# Can occur more than once
1252+
# 'depend' in .PKGINFO and APKBUILD
1253+
# TODO: add other dependencies (e.g. makedepends)
1254+
'D': D_dependencies_handler,
1255+
'depend': D_dependencies_handler,
1256+
}
1257+
1258+
1259+
package_handlers_by_field_name_others = {
1260+
1261+
###########################################################################
1262+
# per-package fields (other than purls)
1263+
###########################################################################
1264+
12211265
# For example: T:Size optimized toolbox of many common UNIX utilities
12221266
# 'pkgdesc' in .PKGINFO and APKBUILD
12231267
'T': build_name_value_str_handler('description'),
@@ -1272,14 +1316,6 @@ def source_handler(value, **kwargs):
12721316
'c': c_git_commit_handler,
12731317
'commit': c_git_commit_handler,
12741318

1275-
# For example: D:scanelf so:libc.musl-x86_64.so.1
1276-
# For example: D:so:libc.musl-x86_64.so.1 so:libcrypto.so.1.1 so:libssl.so.1.1 so:libz.so.1
1277-
# Can occur more than once
1278-
# 'depend' in .PKGINFO and APKBUILD
1279-
# TODO: add other dependencies (e.g. makedepends)
1280-
'D': D_dependencies_handler,
1281-
'depend': D_dependencies_handler,
1282-
12831319
# For example: source="http://liba52.sourceforge.net/files/$pkgname-$pkgver.tar.gz
12841320
# automake.patch
12851321
# fix-globals-test-x86-pie.patch"

src/packagedcode/bower.py

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -25,42 +25,13 @@ class BowerJsonHandler(models.DatafileHandler):
2525
documentation_url = 'https://bower.io'
2626

2727
@classmethod
28-
def parse(cls, location):
28+
def parse(cls, location, purl_only=False):
2929
with io.open(location, encoding='utf-8') as loc:
3030
package_data = json.load(loc)
3131

3232
# note: having no name is not a problem for private packages. See #1514
3333
name = package_data.get('name')
34-
35-
description = package_data.get('description')
3634
version = package_data.get('version')
37-
extracted_license_statement = package_data.get('license')
38-
keywords = package_data.get('keywords') or []
39-
40-
parties = []
41-
42-
authors = package_data.get('authors') or []
43-
for author in authors:
44-
if isinstance(author, dict):
45-
name = author.get('name')
46-
email = author.get('email')
47-
url = author.get('homepage')
48-
party = models.Party(name=name, role='author', email=email, url=url)
49-
parties.append(party)
50-
elif isinstance(author, str):
51-
parties.append(models.Party(name=author, role='author'))
52-
else:
53-
parties.append(models.Party(name=repr(author), role='author'))
54-
55-
homepage_url = package_data.get('homepage')
56-
57-
repository = package_data.get('repository') or {}
58-
repo_type = repository.get('type')
59-
repo_url = repository.get('url')
60-
61-
vcs_url = None
62-
if repo_type and repo_url:
63-
vcs_url = f'{repo_type}+{repo_url}'
6435

6536
deps = package_data.get('dependencies') or {}
6637
dependencies = []
@@ -86,17 +57,53 @@ def parse(cls, location):
8657
is_optional=True,
8758
)
8859
)
89-
90-
yield models.PackageData(
60+
61+
pkg = models.PackageData(
9162
datasource_id=cls.datasource_id,
9263
type=cls.default_package_type,
9364
name=name,
94-
description=description,
9565
version=version,
96-
extracted_license_statement=extracted_license_statement,
97-
keywords=keywords,
98-
parties=parties,
99-
homepage_url=homepage_url,
100-
vcs_url=vcs_url,
101-
dependencies=dependencies
66+
dependencies=dependencies,
10267
)
68+
if purl_only:
69+
yield pkg
70+
return
71+
72+
description = package_data.get('description')
73+
extracted_license_statement = package_data.get('license')
74+
keywords = package_data.get('keywords') or []
75+
76+
parties = []
77+
78+
authors = package_data.get('authors') or []
79+
for author in authors:
80+
if isinstance(author, dict):
81+
name = author.get('name')
82+
email = author.get('email')
83+
url = author.get('homepage')
84+
party = models.Party(name=name, role='author', email=email, url=url)
85+
parties.append(party)
86+
elif isinstance(author, str):
87+
parties.append(models.Party(name=author, role='author'))
88+
else:
89+
parties.append(models.Party(name=repr(author), role='author'))
90+
91+
homepage_url = package_data.get('homepage')
92+
93+
repository = package_data.get('repository') or {}
94+
repo_type = repository.get('type')
95+
repo_url = repository.get('url')
96+
97+
vcs_url = None
98+
if repo_type and repo_url:
99+
vcs_url = f'{repo_type}+{repo_url}'
100+
101+
pkg.description = description
102+
pkg.primary_language = BowerJsonHandler.default_primary_language
103+
pkg.extracted_license_statement = extracted_license_statement
104+
pkg.keywords = keywords
105+
pkg.parties = parties
106+
pkg.homepage_url = homepage_url
107+
pkg.vcs_url = vcs_url
108+
pkg.populate_license_fields()
109+
yield pkg

0 commit comments

Comments
 (0)