Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ v33.0.0 (next next, roadmap)
v32.1.0 (next, roadmap)
----------------------------

New CLI options:

- A new CLI option ``--package-only`` has been added which performs
a faster package scan by skipping the package assembly step and
also skipping license/copyright detection on package metadata.

Major API/other changes:

- Output Format Version updated to 3.1.0 (minor version bump)
Expand Down
4 changes: 4 additions & 0 deletions docs/source/rst_snippets/basic_options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ documenting a program's options. For example:
--system-package Scan ``<input>`` for installed system package
databases.

--package-only Scan ``<input>`` for system and application
only for package metadata, without license/
copyright detection and package assembly.

-e, --email Scan ``<input>`` for emails.

Sub-Options:
Expand Down
5 changes: 3 additions & 2 deletions src/packagedcode/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class AboutFileHandler(models.DatafileHandler):
documentation_url = 'https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
"""
Yield one or more Package manifest objects given a file ``location`` pointing to a
package archive, manifest or similar.
Expand Down Expand Up @@ -90,7 +90,7 @@ def parse(cls, location):
file_references.append(models.FileReference(path=about_resource))

# FIXME: we should put the unprocessed attributes in extra data
yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=package_type,
namespace=package_ns,
Expand All @@ -103,6 +103,7 @@ def parse(cls, location):
download_url=download_url,
file_references=file_references,
)
yield models.PackageData.from_data(package_data, package_only)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
Expand Down
38 changes: 28 additions & 10 deletions src/packagedcode/alpine.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,12 @@ class AlpineInstalledDatabaseHandler(models.DatafileHandler):
description = 'Alpine Linux installed package database'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
yield from parse_alpine_installed_db(
location=location,
datasource_id=cls.datasource_id,
package_type=cls.default_package_type,
package_only=package_only,
)

@classmethod
Expand Down Expand Up @@ -134,9 +135,14 @@ class AlpineApkbuildHandler(models.DatafileHandler):
documentation_url = 'https://wiki.alpinelinux.org/wiki/APKBUILD_Reference'

@classmethod
def parse(cls, location):
package_data = parse_apkbuild(location, strict=True)
cls.populate_license_fields(package_data)
def parse(cls, location, package_only=False):
package_data = parse_apkbuild(
location=location,
strict=True,
package_only=package_only
)
if not package_only:
cls.populate_license_fields(package_data)
if package_data:
yield package_data

Expand Down Expand Up @@ -165,7 +171,7 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
)


def parse_alpine_installed_db(location, datasource_id, package_type):
def parse_alpine_installed_db(location, datasource_id, package_type, package_only=False):
"""
Yield PackageData objects from an installed database file at `location`
or None. Typically found at '/lib/apk/db/installed' in an Alpine
Expand All @@ -179,6 +185,7 @@ def parse_alpine_installed_db(location, datasource_id, package_type):
package_fields=package_fields,
datasource_id=datasource_id,
package_type=package_type,
package_only=package_only,
)


Expand Down Expand Up @@ -241,7 +248,7 @@ def get_alpine_installed_db_fields(location):
])


def parse_apkbuild(location, strict=False):
def parse_apkbuild(location, strict=False, package_only=False):
"""
Return a PackageData object from an APKBUILD file at ``location`` or None.

Expand All @@ -256,6 +263,7 @@ def parse_apkbuild(location, strict=False):
datasource_id=AlpineApkbuildHandler.datasource_id,
package_type=AlpineApkbuildHandler.default_package_type,
strict=strict,
package_only=package_only,
)


Expand Down Expand Up @@ -732,7 +740,7 @@ def fix_apkbuild(text):
return text


def parse_apkbuild_text(text, datasource_id, package_type, strict=False):
def parse_apkbuild_text(text, datasource_id, package_type, strict=False, package_only=False):
"""
Return a PackageData object from an APKBUILD text context or None. Only
consider variables with a name listed in the ``names`` set.
Expand Down Expand Up @@ -761,7 +769,8 @@ def parse_apkbuild_text(text, datasource_id, package_type, strict=False):
package = build_package_data(
variables,
datasource_id=datasource_id,
package_type=package_type
package_type=package_type,
package_only=package_only,
)

if package and unresolved:
Expand Down Expand Up @@ -800,7 +809,7 @@ def parse_pkginfo(location):
raise NotImplementedError


def build_package_data(package_fields, datasource_id, package_type):
def build_package_data(package_fields, datasource_id, package_type, package_only=False):
"""
Return a PackageData object from a ``package_fields`` iterable of (name,
value) tuples.
Expand Down Expand Up @@ -850,7 +859,16 @@ def build_package_data(package_fields, datasource_id, package_type):

converted_fields.update(converted)

return models.PackageData.from_dict(converted_fields)
fields_not_required = ["current_file", "current_dir"]
for field in fields_not_required:
value = converted_fields.get(field)
if value:
converted_fields.pop(field)

return models.PackageData.from_data(
package_data=converted_fields,
package_only=package_only,
)

#####################################
# Note: all handlers MUST accept **kwargs as they also receive the current data
Expand Down
7 changes: 4 additions & 3 deletions src/packagedcode/bower.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class BowerJsonHandler(models.DatafileHandler):
documentation_url = 'https://bower.io'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
with io.open(location, encoding='utf-8') as loc:
package_data = json.load(loc)

Expand Down Expand Up @@ -87,7 +87,7 @@ def parse(cls, location):
)
)

yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
Expand All @@ -98,5 +98,6 @@ def parse(cls, location):
parties=parties,
homepage_url=homepage_url,
vcs_url=vcs_url,
dependencies=dependencies
dependencies=dependencies,
)
yield models.PackageData.from_data(package_data, package_only)
32 changes: 20 additions & 12 deletions src/packagedcode/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class AutotoolsConfigureHandler(models.NonAssemblableDatafileHandler):
documentation_url = 'https://www.gnu.org/software/automake/'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
# we use the parent directory as a package name
name = fileutils.file_name(fileutils.parent_directory(location))
# we could use checksums as version in the future
Expand All @@ -67,12 +67,13 @@ def parse(cls, location):
# there are dependencies we could use
# dependencies = []

yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
version=version,
)
yield models.PackageData.from_data(package_data, package_only)



Expand Down Expand Up @@ -104,6 +105,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
package_only=True,
)

if TRACE:
Expand Down Expand Up @@ -135,8 +137,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
yield resource

@classmethod
def parse(cls, location):

def parse(cls, location, package_only=False):
# Thanks to Starlark being a Python dialect, we can use `ast` to parse it
with open(location, 'rb') as f:
tree = ast.parse(f.read())
Expand Down Expand Up @@ -188,23 +189,28 @@ def parse(cls, location):
if TRACE:
logger_debug(f"build: parse: license_files: {license_files}")

package_data = models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
extracted_license_statement=license_files,
)
# `package_only` is True as we do the license detection
# on assembly
yield models.PackageData.from_data(
package_data=package_data,
package_only=True,
)

package_data.extracted_license_statement = license_files
yield package_data

else:
# If we don't find anything in the pkgdata file, we yield a Package
# with the parent directory as the name
yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=fileutils.file_name(fileutils.parent_directory(location))
)
yield models.PackageData.from_data(package_data, package_only)

@classmethod
def assign_package_to_resources(cls, package, resource, codebase, package_adder, skip_name=None):
Expand Down Expand Up @@ -326,7 +332,7 @@ class BuckMetadataBzlHandler(BaseStarlarkManifestHandler):
documentation_url = 'https://buck.build/'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=True):

with open(location, 'rb') as f:
tree = ast.parse(f.read())
Expand Down Expand Up @@ -378,7 +384,7 @@ def parse(cls, location):
):
# TODO: Create function that determines package type from download URL,
# then create a package of that package type from the metadata info
yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=metadata_fields.get('upstream_type', cls.default_package_type),
name=metadata_fields.get('name'),
Expand All @@ -388,6 +394,7 @@ def parse(cls, location):
homepage_url=metadata_fields.get('upstream_address', ''),
# TODO: Store 'upstream_hash` somewhere
)
yield models.PackageData.from_data(package_data, package_only=True)

if (
'package_type'
Expand All @@ -401,7 +408,7 @@ def parse(cls, location):
and 'vcs_commit_hash'
in metadata_fields
):
yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=metadata_fields.get('package_type', cls.default_package_type),
name=metadata_fields.get('name'),
Expand All @@ -414,6 +421,7 @@ def parse(cls, location):
sha1=metadata_fields.get('download_archive_sha1', ''),
extra_data=dict(vcs_commit_hash=metadata_fields.get('vcs_commit_hash', ''))
)
yield models.PackageData.from_data(package_data, package_only=True)

@classmethod
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
Expand Down
9 changes: 5 additions & 4 deletions src/packagedcode/build_gradle.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ class BuildGradleHandler(models.DatafileHandler):
description = 'Gradle build script'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
dependencies = get_dependencies(location)
return build_package(cls, dependencies)
return build_package(cls, dependencies, package_only)

# TODO: handle complex cases of nested builds with many packages
@classmethod
Expand Down Expand Up @@ -328,7 +328,7 @@ def get_dependencies(build_gradle_location):
return list(get_dependencies_from_parse_tree(parse_tree))


def build_package(cls, dependencies):
def build_package(cls, dependencies, package_only=False):
"""
Yield PackageData from a ``dependencies`` list of mappings.
"""
Expand Down Expand Up @@ -364,10 +364,11 @@ def build_package(cls, dependencies):
)
)

yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=BuildGradleHandler.default_primary_language,
dependencies=package_dependencies,
)
yield models.PackageData.from_data(package_data, package_only)

10 changes: 6 additions & 4 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class CargoTomlHandler(CargoBaseHandler):
documentation_url = 'https://doc.rust-lang.org/cargo/reference/manifest.html'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
package_data = toml.load(location, _dict=dict)
core_package_data = package_data.get('package', {})
workspace = package_data.get('workspace', {})
Expand Down Expand Up @@ -149,7 +149,7 @@ def parse(cls, location):
if workspace:
extra_data["workspace"] = workspace

yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
Expand All @@ -166,6 +166,7 @@ def parse(cls, location):
dependencies=dependencies,
extra_data=extra_data,
)
yield models.PackageData.from_data(package_data, package_only)


CARGO_ATTRIBUTE_MAPPING = {
Expand Down Expand Up @@ -200,7 +201,7 @@ class CargoLockHandler(CargoBaseHandler):
# ]

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
cargo_lock = toml.load(location, _dict=dict)
dependencies = []
package = cargo_lock.get('package', [])
Expand All @@ -221,12 +222,13 @@ def parse(cls, location):
)
)

yield models.PackageData(
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
dependencies=dependencies,
)
yield models.PackageData.from_data(package_data, package_only)


def dependency_mapper(dependencies, scope='dependencies'):
Expand Down
Loading