Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 98 additions & 27 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
import re

import saneyaml
Expand All @@ -20,7 +21,81 @@
"""


class CargoTomlHandler(models.DatafileHandler):
class CargoBaseHandler(models.DatafileHandler):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
support cargo workspaces where we have multiple packages from
a repository and some shared information present at top-level.
"""
workspace = package_data.extra_data.get("workspace", {})
workspace_members = workspace.get("members", [])
workspace_package_data = workspace.get("package", {})
attributes_to_copy = [
"license_detections",
"declared_license_expression",
"declared_license_expression_spdx"
]
if "license" in workspace_package_data:
for attribute in attributes_to_copy:
workspace_package_data[attribute] = getattr(package_data, attribute)

workspace_root_path = resource.parent(codebase).path
if workspace_package_data and workspace_members:
for workspace_member_path in workspace_members:
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
workspace_directory = codebase.get_resource(path=workspace_directory_path)
if not workspace_directory:
continue

# Update the package data for all members with the
# workspace package data
for resource in workspace_directory.children(codebase):
if cls.is_datafile(location=resource.location):
if not resource.package_data:
continue

updated_package_data = cls.update_resource_package_data(
package_data=workspace_package_data,
old_package_data=resource.package_data.pop(),
mapping=CARGO_ATTRIBUTE_MAPPING,
)
resource.package_data.append(updated_package_data)
resource.save(codebase)

yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=workspace_directory,
codebase=codebase,
package_adder=package_adder,
)
else:
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)

@classmethod
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):

for attribute in old_package_data.keys():
if attribute in mapping:
replace_by_attribute = mapping.get(attribute)
old_package_data[attribute] = package_data.get(replace_by_attribute)
elif attribute == "parties":
old_package_data[attribute] = list(get_parties(
person_names=package_data.get("authors"),
party_role='author',
))

return old_package_data



class CargoTomlHandler(CargoBaseHandler):
datasource_id = 'cargo_toml'
path_patterns = ('*/Cargo.toml', '*/cargo.toml',)
default_package_type = 'cargo'
Expand All @@ -31,11 +106,14 @@ class CargoTomlHandler(models.DatafileHandler):
@classmethod
def parse(cls, location):
package_data = toml.load(location, _dict=dict)

core_package_data = package_data.get('package', {})
workspace = package_data.get('workspace', {})

name = core_package_data.get('name')
version = core_package_data.get('version')
if isinstance(version, dict) and "workspace" in version:
version = "workspace"

description = core_package_data.get('description') or ''
description = description.strip()

Expand Down Expand Up @@ -66,6 +144,9 @@ def parse(cls, location):
repository_homepage_url = name and f'https://crates.io/crates/{name}'
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
extra_data = {}
if workspace:
extra_data["workspace"] = workspace

yield models.PackageData(
datasource_id=cls.datasource_id,
Expand All @@ -82,22 +163,24 @@ def parse(cls, location):
repository_download_url=repository_download_url,
api_data_url=api_data_url,
dependencies=dependencies,
extra_data=extra_data,
)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles
"""
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)

CARGO_ATTRIBUTE_MAPPING = {
# Fields in PackageData model: Fields in cargo
"homepage_url": "homepage",
"vcs_url": "repository",
"keywords": "categories",
"extracted_license_statement": "license",
# These are fields carried over to avoid re-detection of licenses
"license_detections": "license_detections",
"declared_license_expression": "declared_license_expression",
"declared_license_expression_spdx": "declared_license_expression_spdx",
}


class CargoLockHandler(models.DatafileHandler):
class CargoLockHandler(CargoBaseHandler):
datasource_id = 'cargo_lock'
path_patterns = ('*/Cargo.lock', '*/cargo.lock',)
default_package_type = 'cargo'
Expand Down Expand Up @@ -144,18 +227,6 @@ def parse(cls, location):
dependencies=dependencies,
)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles
"""
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'Cargo.lock',),
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
)


def dependency_mapper(dependencies, scope='dependencies'):
"""
Expand Down Expand Up @@ -197,7 +268,7 @@ def get_parties(person_names, party_role):
name=name,
role=party_role,
email=email,
)
).to_dict()


person_parser = re.compile(
Expand Down
8 changes: 7 additions & 1 deletion src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,12 @@ def get_normalized_license_detections(
if detections:
license_detections.extend(detections)

if not license_detections:
unknown_dict_object = repr(dict(extracted_license.items()))
unknown_detection = get_unknown_license_detection(query_string=unknown_dict_object)
license_detections.append(unknown_detection)
if TRACE:
logger_debug(f'get_normalized_license_detections: dict: unknown_dict_object: {unknown_dict_object}, unknown_detection: {saneyaml.dump(unknown_detection.to_dict())}')
else:
extracted_license_statement = saneyaml.dump(extracted_license)
license_detections = get_license_detections_for_extracted_license_statement(
Expand Down Expand Up @@ -728,7 +734,6 @@ def get_normalized_license_detections(

else:
extracted_license_statement = saneyaml.dump(extracted_license_item)

detections = get_license_detections_for_extracted_license_statement(
extracted_license_statement=extracted_license_statement,
try_as_expression=try_as_expression,
Expand Down Expand Up @@ -794,6 +799,7 @@ def get_license_detections_and_expression(
if not license_detections:
if not isinstance(extracted_license_statement, str):
extracted_license_statement = saneyaml.dump(extracted_license_statement)

license_detection = get_unknown_license_detection(query_string=extracted_license_statement)
license_detections = [license_detection]

Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,10 @@ def populate_license_fields(self):
)

if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)
if isinstance(self.extracted_license_statement, dict):
self.extracted_license_statement = saneyaml.dump(dict(self.extracted_license_statement.items()))
else:
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)

def to_dict(self, with_details=True, **kwargs):
mapping = super().to_dict(with_details=with_details, **kwargs)
Expand Down
24 changes: 14 additions & 10 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
from packagedcode.models import PackageWithResources

TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)

TRACE_DEEP = os.environ.get('SCANCODE_DEBUG_PACKAGE_API_DEEP', False)
TRACE_LICENSE = os.environ.get('SCANCODE_DEBUG_PACKAGE_LICENSE', False)

def logger_debug(*args):
pass
Expand Down Expand Up @@ -207,7 +208,7 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
# If we don't detect license in package_data but there is license detected in file
# we add the license expression from the file to a package
modified = add_license_from_file(resource, codebase)
if TRACE and modified:
if TRACE_LICENSE and modified:
logger_debug(f'packagedcode: process_codebase: add_license_from_file: modified: {modified}')

if codebase.has_single_resource:
Expand All @@ -216,7 +217,7 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
# If there is referenced files in a extracted license statement, we follow
# the references, look for license detections and add them back
modified = list(add_referenced_license_matches_for_package(resource, codebase))
if TRACE and modified:
if TRACE_LICENSE and modified:
logger_debug(f'packagedcode: process_codebase: add_referenced_license_matches_for_package: modified: {modified}')

# If there is a LICENSE file on the same level as the manifest, and no license
Expand All @@ -234,7 +235,7 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
# If there is a unknown reference to a package we add the license
# from the package license detection
modified = list(add_referenced_license_detection_from_package(resource, codebase))
if TRACE and modified:
if TRACE_LICENSE and modified:
logger_debug(f'packagedcode: process_codebase: add_referenced_license_matches_from_package: modified: {modified}')


Expand All @@ -244,15 +245,15 @@ def add_license_from_file(resource, codebase):
and the file has license detections, and if so, populate the package_data license
expression and detection fields from the file license.
"""
if TRACE:
if TRACE_LICENSE:
logger_debug(f'packagedcode.plugin_package: add_license_from_file: resource: {resource.path}')

if not resource.is_file:
return

license_detections_file = resource.license_detections

if TRACE:
if TRACE_LICENSE:
logger_debug(f'add_license_from_file: license_detections_file: {license_detections_file}')
if not license_detections_file:
return
Expand All @@ -263,7 +264,7 @@ def add_license_from_file(resource, codebase):

for pkg in package_data:
license_detections_pkg = pkg["license_detections"]
if TRACE:
if TRACE_LICENSE:
logger_debug(f'add_license_from_file: license_detections_pkg: {license_detections_pkg}')

if not license_detections_pkg:
Expand Down Expand Up @@ -359,7 +360,7 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
package_data = PackageData.from_dict(mapping=package_data)

if TRACE:
logger_debug(' get_package_and_deps: package_data:', package_data)
logger_debug(' get_package_and_deps: package_data.purl:', package_data.purl)

# Find a handler for this package datasource to assemble collect
# packages and deps
Expand All @@ -375,8 +376,6 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
)

for item in items:
if TRACE:
logger_debug(' get_package_and_deps: item:', item)

if isinstance(item, Package):
if strip_root and not has_single_resource:
Expand All @@ -385,6 +384,8 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
for dfp in item.datafile_paths
]
packages.append(item)
if TRACE:
logger_debug(' get_package_and_deps: Package:', item.purl)

elif isinstance(item, Dependency):
if strip_root and not has_single_resource:
Expand All @@ -395,6 +396,9 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
seen_resource_paths.add(item.path)

if TRACE:
logger_debug(' get_package_and_deps: Resource:', item.path)

if TRACE_DEEP:
logger_debug(
' get_package_and_deps: seen_resource_path:',
seen_resource_paths,
Expand Down
Loading