Skip to content

Commit e561e41

Browse files
authored
Merge pull request #2331 from nexB/1532-add-package-extra-data-field
Add extra_data field to Package model #1532
2 parents 5d33b05 + 48a927b commit e561e41

File tree

826 files changed

+3796
-140
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

826 files changed

+3796
-140
lines changed

CHANGELOG.rst

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Changelog
22
=========
33

4-
v21.x.x (next)
5-
--------------
4+
v21.x.x (next, future)
5+
-----------------------
66

77
Breaking API changes:
88
~~~~~~~~~~~~~~~~~~~~~
@@ -24,7 +24,7 @@ Breaking API changes:
2424
multiple manifests for a single package instance.
2525

2626

27-
v21.6.2
27+
v21.6.6
2828
--------
2929

3030
Breaking API changes:
@@ -36,6 +36,7 @@ Breaking API changes:
3636
has been removed. Use the PYTHON_EXECUTABLE enviroment variable to point to
3737
alternative non-default Python executable and this on all OSes.
3838

39+
3940
Security updates:
4041
~~~~~~~~~~~~~~~~~
4142

@@ -47,14 +48,18 @@ Security updates:
4748
- pkg:pypi/lxml: (low severity, likely no impact) CVE-2021-28957
4849
- pkg:pypi/nltk: (low severity, likely no impact) CVE-2019-14751
4950
- pkg:pypi/jinja2: (low severity, likely no impact) CVE-2020-28493, CVE-2019-10906
50-
- pkg:pypi/pycryptodome: (high severity) CVE-2018-15560 (dropped since no longer used)
51+
- pkg:pypi/pycryptodome: (high severity) CVE-2018-15560 (dropped since no
52+
longer used by pdfminer)
5153

5254

5355
Ouputs:
5456
~~~~~~~
5557

5658
- Add new YAML-formatted output. This is exactly the same data structure as for
5759
the JSON output
60+
- The JSON output packages section has a new "extra_data" attributes which is
61+
a JSON object that can contain arbitrary data that are specific to a package
62+
type.
5863

5964

6065
License scanning:

src/packagedcode/__init__.py

Lines changed: 106 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import attr
11+
1012
from packagedcode import about
1113
from packagedcode import bower
1214
from packagedcode import build
@@ -126,20 +128,111 @@ def get_package_class(scan_data, default=models.Package):
126128
return ptype_class or default
127129

128130

129-
_props = frozenset([
130-
'api_data_url',
131-
'repository_download_url',
132-
'purl',
133-
'repository_homepage_url']
134-
)
131+
def get_package_instance(scan_data):
132+
"""
133+
Return a Package instance re-built from a mapping of ``scan_data`` native
134+
Python data that has the structure of a scan. Known attributes that store a
135+
list of objects are also "rehydrated" (such as models.Party).
135136
137+
The Package instance will use the Package subclass that supports the
138+
provided package "type" when possible or the base Package class otherwise.
136139
137-
def get_package_instance(scan_data, properties=_props):
138-
"""
139-
Given a `scan_data` native Python mapping representing a Package, return a
140-
Package object instance.
140+
Unknown attributes provided in ``scan_data`` that do not exist as fields in
141+
the Package class are kept as items in the Package.extra_data mapping.
142+
An Exception is raised if an "unknown attribute" name already exists as
143+
a Package.extra_data key.
141144
"""
142-
# remove computed properties from attributes
143-
scan_data = {k: v for k, v in scan_data.items() if k not in properties}
145+
# TODO: consider using a proper library for this such as cattrs,
146+
# marshmallow, etc. or use the field type that we declare.
147+
148+
# Each of these are lists of class instances tracked here, which are stored
149+
# as a list of mappings in scanc_data
150+
list_field_types_by_name = {
151+
'parties': models.Party,
152+
'dependencies': models.DependentPackage,
153+
'installed_files': models.PackageFile,
154+
}
155+
156+
# these are computed attributes serialized on a package
157+
# that should not be recreated when serializing
158+
computed_attributes = set([
159+
'purl',
160+
'repository_homepage_url',
161+
'repository_download_url',
162+
'api_data_url'
163+
])
164+
165+
# re-hydrate lists of typed objects
144166
klas = get_package_class(scan_data)
145-
return klas(**scan_data)
167+
existing_fields = attr.fields_dict(klas)
168+
169+
extra_data = scan_data.get('extra_data')
170+
package_data = {}
171+
172+
for key, value in scan_data.items():
173+
if not value or key in computed_attributes:
174+
continue
175+
176+
field = existing_fields.get(key)
177+
178+
if not field:
179+
if key not in extra_data:
180+
# keep unknown field as extra data
181+
extra_data[key] = value
182+
continue
183+
else:
184+
raise Exception(
185+
f'Invalid scan_data with duplicated key: {key}={value!r} '
186+
f'present both as attribute AND as extra_data: '
187+
f'{key}={extra_data[key]!r}'
188+
)
189+
190+
list_field_type = list_field_types_by_name.get(key)
191+
if not list_field_type:
192+
# this is a plain known field
193+
package_data[key] = value
194+
continue
195+
196+
# Since we have a list_field_type, value must be a list of mappings:
197+
# we transform it in a list of objects.
198+
199+
if not isinstance(value, list):
200+
raise Exception(
201+
f'Invalid scan_data with unknown data structure. '
202+
f'Expected the value to be a list of dicts and not a '
203+
f'{type(value)!r} for {key}={value!r}'
204+
)
205+
206+
objects = list(_build_objects_list(values=value, klass=list_field_type))
207+
package_data[key] = objects
208+
209+
return klas(**package_data)
210+
211+
212+
def _build_objects_list(values, klass):
213+
"""
214+
Yield ``klass`` objects built from a ``values`` list of mappings.
215+
"""
216+
# Since we have a list_field_type, value must be a list of mappings:
217+
# we transform it in a list of objects.
218+
219+
if not isinstance(values, list):
220+
raise Exception(
221+
f'Invalid scan_data with unknown data structure. '
222+
f'Expected the value to be a list of dicts and not a '
223+
f'{type(values)!r} for {values!r}'
224+
)
225+
226+
for val in values:
227+
if not val:
228+
continue
229+
230+
if not isinstance(val, dict):
231+
raise Exception(
232+
f'Invalid scan_data with unknown data structure. '
233+
f'Expected the value to be a mapping for and not a '
234+
f'{type(val)!r} for {values!r}'
235+
)
236+
237+
yield klass.create(**val)
238+

src/packagedcode/about.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,8 @@ class AboutPackage(models.Package):
3737
def recognize(cls, location):
3838
yield parse(location)
3939

40-
@classmethod
41-
def get_package_root(cls, manifest_resource, codebase):
42-
# FIXME: this should have been already stored with the Package itself as extra_data
43-
with io.open(manifest_resource.location, encoding='utf-8') as loc:
44-
package_data = saneyaml.load(loc.read())
45-
about_resource = package_data.get('about_resource')
40+
def get_package_root(self, manifest_resource, codebase):
41+
about_resource = self.extra_data.get('about_resource')
4642
if about_resource:
4743
manifest_resource_parent = manifest_resource.parent(codebase)
4844
for child in manifest_resource_parent.children(codebase):
@@ -89,7 +85,7 @@ def build_package(package_data):
8985
owner = repr(owner)
9086
parties = [models.Party(type=models.party_person, name=owner, role='owner')]
9187

92-
return AboutPackage(
88+
about_package = AboutPackage(
9389
type='about',
9490
name=name,
9591
version=version,
@@ -99,3 +95,5 @@ def build_package(package_data):
9995
homepage_url=homepage_url,
10096
download_url=download_url,
10197
)
98+
about_package.extra_data['about_resource'] = package_data.get('about_resource')
99+
return about_package

src/packagedcode/alpine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def build_package(package_fields):
106106
converted_fields.update(converted)
107107

108108
# construct the package: we ignore unknown as we added a few technical fields
109-
package = AlpinePackage.create(ignore_unknown=True, **converted_fields)
109+
package = AlpinePackage.create(**converted_fields)
110110
return package
111111

112112
# Note handlers MUST accept **kwargs as they also receive the current data

src/packagedcode/debian.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from packageurl import PackageURL
1616

1717
from commoncode import filetype
18-
from commoncode.datautils import String
1918
from packagedcode import models
2019

2120
"""
@@ -40,24 +39,16 @@ class DebianPackage(models.Package):
4039
mimetypes = ('application/x-archive', 'application/vnd.debian.binary-package',)
4140
default_type = 'deb'
4241

43-
multi_arch = String(
44-
label='Multi-Arch',
45-
help='Multi-Arch value from status file')
46-
47-
def to_dict(self, _detailed=False, **kwargs):
48-
data = super().to_dict(_detailed=_detailed, **kwargs)
49-
if _detailed:
50-
#################################################
51-
# populate temporary fields
52-
data['multi_arch'] = self.multi_arch
53-
#################################################
54-
else:
55-
#################################################
56-
# remove temporary fields
57-
data.pop('multi_arch', None)
58-
#################################################
42+
@property
43+
def multi_arch(self):
44+
"""
45+
Multi-Arch value from a status or spec file.
46+
"""
47+
return self.extra_data.get('multi_arch')
5948

60-
return data
49+
def set_multi_arch(self, value):
50+
if value:
51+
self.extra_data['multi_arch'] = value
6152

6253
def populate_installed_files(self, var_lib_dpkg_info_dir):
6354
"""
@@ -235,6 +226,8 @@ def build_package(package_data, distro='debian'):
235226
('arch', package_data.get('architecture')),
236227
])
237228

229+
package.set_multi_arch(package_data.get('multi-arch'))
230+
238231
# mapping of top level `status` file items to the Package object field name
239232
plain_fields = [
240233
('description', 'description'),
@@ -243,7 +236,6 @@ def build_package(package_data, distro='debian'):
243236
('package', 'name'),
244237
('version', 'version'),
245238
('maintainer', 'maintainer'),
246-
('multi-arch', 'multi_arch'),
247239
]
248240

249241
for source, target in plain_fields:

0 commit comments

Comments
 (0)