Skip to content

Commit 753c492

Browse files
committed
Merge remote-tracking branch 'upstream/develop' into misc-copyrights
2 parents ab6699f + 4f49985 commit 753c492

File tree

34 files changed

+10627
-239
lines changed

34 files changed

+10627
-239
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ TAGS
108108
Procfile
109109
local.cfg
110110
geckodriver.log
111-
var
112111
.metaflow
113112
selenium
114113
/dist/

src/packagedcode/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,10 @@
216216

217217
debian.DebianInstalledFilelistHandler,
218218
debian.DebianInstalledMd5sumFilelistHandler,
219-
debian.DebianInstalledStatusDatabaseHandler
219+
debian.DebianInstalledStatusDatabaseHandler,
220+
221+
rpm.RpmLicenseFilesHandler,
222+
rpm.RpmMarinerContainerManifestHandler
220223
]
221224

222225
if on_linux:

src/packagedcode/debian.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
127127
class DebianControlFileInExtractedDebHandler(models.DatafileHandler):
128128
datasource_id = 'debian_control_extracted_deb'
129129
default_package_type = 'deb'
130-
path_patterns = ('*/control.tar.gz-extract/control',)
130+
path_patterns = ('*/control.tar.gz-extract/control','*/control.tar.xz-extract/control')
131131
description = 'Debian control file - extracted layout'
132132
documentation_url = 'https://www.debian.org/doc/debian-policy/ch-controlfields.html'
133133

@@ -590,7 +590,7 @@ def parse_debian_files_list(location, datasource_id, package_type):
590590
else:
591591
name = None
592592
# For DebianMd5sumFilelistInPackageHandler we cannot infer name
593-
if not name == "md5sums":
593+
if not filename == "md5sums":
594594
name = filename
595595

596596
file_references = []
@@ -663,6 +663,19 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
663663
party = models.Party(role='maintainer', name=maintainer_name, email=maintainer_email)
664664
parties.append(party)
665665

666+
uploaders = debian_data.get('uploaders')
667+
if uploaders:
668+
for uploader in uploaders.split(", "):
669+
uploader_name, uploader_email = parse_debian_maintainers(uploader)
670+
party = models.Party(role='uploader', name=uploader_name, email=uploader_email)
671+
parties.append(party)
672+
673+
vcs_url = debian_data.get('vcs-git')
674+
if vcs_url and ' ' in vcs_url:
675+
vcs_url = vcs_url.split(' ')[0]
676+
677+
code_view_url = debian_data.get('vcs-browser')
678+
666679
keywords = []
667680
keyword = debian_data.get('section')
668681
if keyword:
@@ -712,6 +725,8 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
712725
qualifiers=qualifiers,
713726
description=description,
714727
homepage_url=homepage_url,
728+
vcs_url=vcs_url,
729+
code_view_url=code_view_url,
715730
size=size,
716731
source_packages=source_packages,
717732
keywords=keywords,

src/packagedcode/licensing.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ def logger_debug(*args):
6060
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
6161

6262

63+
RESOURCE_TO_PACKAGE_LICENSE_FIELDS = {
64+
'detected_license_expression': 'declared_license_expression',
65+
'detected_license_expression_spdx': 'declared_license_expression_spdx',
66+
'license_detections': 'license_detections',
67+
}
68+
69+
6370
def add_referenced_license_matches_for_package(resource, codebase):
6471
"""
6572
Return an updated ``resource`` saving it in place, after adding new license

src/packagedcode/models.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
from commoncode.datautils import String
2929
from commoncode.fileutils import as_posixpath
3030
from commoncode.resource import Resource
31+
from license_expression import combine_expressions
32+
from license_expression import Licensing
33+
3134
try:
3235
from typecode import contenttype
3336
except ImportError:
@@ -118,11 +121,8 @@
118121
- IdentifiablePackageData: a base class for a Package-like class with a Package URL.
119122
"""
120123

121-
SCANCODE_DEBUG_PACKAGE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
122-
SCANCODE_DEBUG_PACKAGE_ASSEMBLY = os.environ.get('SCANCODE_DEBUG_PACKAGE_ASSEMBLY', False)
123-
124-
TRACE = SCANCODE_DEBUG_PACKAGE
125-
TRACE_UPDATE = SCANCODE_DEBUG_PACKAGE_ASSEMBLY
124+
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
125+
TRACE_UPDATE = os.environ.get('SCANCODE_DEBUG_PACKAGE_ASSEMBLY', False)
126126

127127

128128
def logger_debug(*args):
@@ -1618,6 +1618,8 @@ def update(
16181618
include_qualifiers=False,
16191619
include_subpath=False,
16201620
ignore_name_check=False,
1621+
default_relation='AND',
1622+
licensing=Licensing(),
16211623
):
16221624
"""
16231625
Update this Package with data from the ``package_data`` PackageData.
@@ -1670,16 +1672,16 @@ def update(
16701672
'file_references',
16711673
])
16721674

1675+
license_modified = False
16731676
for name, value in existing.items():
16741677
new_value = new_package_data.get(name)
1678+
if not new_value:
1679+
if TRACE_UPDATE: logger_debug(f' No new value: {name!r}: skipping')
1680+
continue
16751681

16761682
if TRACE_UPDATE:
16771683
logger_debug(f'update: {name!r}={value!r} with new_value: {new_value!r}')
16781684

1679-
if not new_value:
1680-
if TRACE_UPDATE: logger_debug(' No new value: skipping')
1681-
continue
1682-
16831685
if not value:
16841686
if TRACE_UPDATE: logger_debug(' set existing value to new')
16851687
setattr(self, name, new_value)
@@ -1694,6 +1696,18 @@ def update(
16941696
if name == 'extra_data':
16951697
value.update(new_value)
16961698

1699+
if 'license_detections' in name:
1700+
license_modified = True
1701+
license_keys = licensing.license_keys(
1702+
expression=new_package_data.get("declared_license_expression"),
1703+
unique=True,
1704+
)
1705+
if name == 'license_detections' and len(license_keys) > 1:
1706+
setattr(self, 'other_license_detections', new_value)
1707+
else:
1708+
merged = value + new_value
1709+
setattr(self, name, merged)
1710+
16971711
if name in list_fields:
16981712
if TRACE_UPDATE: logger_debug(' merge lists of values')
16991713
merged = merge_sequences(list1=value, list2=new_value)
@@ -1702,8 +1716,36 @@ def update(
17021716
elif TRACE_UPDATE and value != new_value:
17031717
if TRACE_UPDATE: logger_debug(' skipping update: no replace')
17041718

1719+
if license_modified:
1720+
self.refresh_license_expressions(default_relation=default_relation)
1721+
17051722
return True
17061723

1724+
def refresh_license_expressions(self, default_relation='AND'):
1725+
if self.license_detections:
1726+
self.declared_license_expression = str(combine_expressions(
1727+
expressions=[
1728+
detection["license_expression"]
1729+
for detection in self.license_detections
1730+
],
1731+
relation=default_relation,
1732+
))
1733+
self.declared_license_expression_spdx = get_declared_license_expression_spdx(
1734+
declared_license_expression=self.declared_license_expression,
1735+
)
1736+
1737+
if self.other_license_detections:
1738+
self.other_license_expression = str(combine_expressions(
1739+
expressions=[
1740+
detection["license_expression"]
1741+
for detection in self.other_license_detections
1742+
],
1743+
relation=default_relation,
1744+
))
1745+
self.other_license_expression_spdx = get_declared_license_expression_spdx(
1746+
declared_license_expression=self.other_license_expression,
1747+
)
1748+
17071749
def get_packages_files(self, codebase):
17081750
"""
17091751
Yield all the Resource of this package found in codebase.

src/packagedcode/npm.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import logging
1313
import json
1414
import re
15+
import sys
1516
import urllib.parse
1617
from functools import partial
1718
from itertools import islice
@@ -35,8 +36,10 @@
3536

3637

3738
SCANCODE_DEBUG_PACKAGE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
39+
SCANCODE_DEBUG_PACKAGE_NPM = os.environ.get('SCANCODE_DEBUG_PACKAGE_NPM', False)
3840

3941
TRACE = SCANCODE_DEBUG_PACKAGE
42+
TRACE_NPM = SCANCODE_DEBUG_PACKAGE_NPM
4043

4144

4245
def logger_debug(*args):
@@ -45,8 +48,7 @@ def logger_debug(*args):
4548

4649
logger = logging.getLogger(__name__)
4750

48-
if TRACE:
49-
import sys
51+
if TRACE or TRACE_NPM:
5052
logging.basicConfig(stream=sys.stdout)
5153
logger.setLevel(logging.DEBUG)
5254

@@ -643,11 +645,16 @@ def parse(cls, location, package_only=False):
643645
# <alias-package>@npm:<package>
644646
if "@npm:" in ns:
645647
ns = ns.split(':')[1]
648+
if "@npm:" in name:
649+
name = name.split(':')[1]
646650
top_requirements.append((ns, name, constraint,))
647651

648652
else:
649653
raise Exception('Inconsistent content')
650654

655+
if TRACE_NPM:
656+
logger_debug(f'YarnLockV1Handler: parse: top_requirements: {top_requirements}')
657+
651658
# top_requirements should be all for the same package
652659
ns_names = set([(ns, name) for ns, name, _constraint in top_requirements])
653660
assert len(ns_names) == 1, f'Different names for same dependency is not supported: {ns_names!r}'

0 commit comments

Comments
 (0)