Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 80 additions & 37 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3772,6 +3772,18 @@ class DiscoveredDependencyQuerySet(
VulnerabilityQuerySetMixin,
ProjectRelatedQuerySet,
):
def project_dependencies(self):
return self.filter(for_package__isnull=True)

def package_dependencies(self):
return self.filter(for_package__isnull=False)

def resolved(self):
return self.filter(resolved_to_package__isnull=False)

def unresolved(self):
return self.filter(resolved_to_package__isnull=True)

def prefetch_for_serializer(self):
"""
Optimized prefetching for a QuerySet to be consumed by the
Expand Down Expand Up @@ -3816,6 +3828,26 @@ class DiscoveredDependency(
system and application packages discovered in the code under analysis.
Dependencies are usually collected from parsed package data such as a package
manifest or lockfile.

This class manages dependencies with the following considerations:

1. A dependency can be associated with a Package via the ``for_package`` field.
In this case, it is termed a "Package's dependency".
If there is no such association, the dependency is considered a
"Project's dependency".

2. A dependency can also be linked to a Package through the ``resolved_to_package``
field. When this link exists, the dependency is considered "resolved".

3. Dependencies can be either direct or transitive:
- A **direct dependency** is explicitly declared in a package manifest or
lockfile.
- A **transitive dependency** is not declared directly, but is required by one
of the project's direct dependencies.

Understanding the distinction between direct and transitive dependencies is
important for analyzing dependency trees, resolving version conflicts, and
assessing potential security risks.
"""

# Overrides the `project` field to set the proper `related_name`.
Expand Down Expand Up @@ -3966,6 +3998,24 @@ def datafile_path(self):
if self.datafile_resource:
return self.datafile_resource.path

@property
def is_project_dependency(self):
"""
Return True if the dependency is directly associated with the project
(not tied to a specific package).
"""
return not bool(self.for_package_id)

@property
def is_package_dependency(self):
"""Return True if the dependency is explicitly associated with a package."""
return bool(self.for_package_id)

@property
def is_resolved_to_package(self):
"""Return True if the dependency is resolved to a package."""
return bool(self.resolved_to_package_id)

@classmethod
def create_from_data(
cls,
Expand All @@ -3981,6 +4031,14 @@ def create_from_data(
Create and returns a DiscoveredDependency for a `project` from the
`dependency_data`.

The `for_package` and `resolved_to_package` FKs can be provided as args,
or in the `dependency_data` using the `for_package_uid` and
`resolve_to_package_uid`.

Note that a dependency:
- without a `for_package` FK is a "Project's dependency"
- without a `resolve_to_package` is "unresolved".

If `strip_datafile_path_root` is True, then `create_from_data()` will
strip the root path segment from the `datafile_path` of
`dependency_data` before looking up the corresponding CodebaseResource
Expand All @@ -3989,51 +4047,36 @@ def create_from_data(
not stripped for `datafile_path`.
"""
dependency_data = dependency_data.copy()
required_fields = ["purl", "dependency_uid"]
missing_values = [
field_name
for field_name in required_fields
if not dependency_data.get(field_name)
]
project_packages_qs = project.discoveredpackages

if missing_values:
message = (
f"No values for the following required fields: "
f"{', '.join(missing_values)}"
)
if not dependency_data.get("dependency_uid"):
dependency_data["dependency_uid"] = str(uuid.uuid4())

project.add_warning(description=message, model=cls, details=dependency_data)
return

if not for_package:
for_package_uid = dependency_data.get("for_package_uid")
if for_package_uid:
for_package = project.discoveredpackages.get(
package_uid=for_package_uid
)
for_package_uid = dependency_data.get("for_package_uid")
if not for_package and for_package_uid:
for_package = project_packages_qs.get_or_none(package_uid=for_package_uid)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)
resolve_to_package_uid = dependency_data.get("resolve_to_package_uid")
if not resolved_to_package and resolve_to_package_uid:
resolved_to_package = project_packages_qs.get_or_none(
package_uid=resolve_to_package_uid
)

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
if strip_datafile_path_root:
segments = datafile_path.split("/")
datafile_path = "/".join(segments[1:])
datafile_resource = project.codebaseresources.get(path=datafile_path)
datafile_path = dependency_data.get("datafile_path")
if not datafile_resource and datafile_path:
if strip_datafile_path_root:
segments = datafile_path.split("/")
datafile_path = "/".join(segments[1:])
datafile_resource = project.codebaseresources.get(path=datafile_path)

if datasource_id:
dependency_data["datasource_id"] = datasource_id

# Set purl fields from `purl`
# Set package_url fields from the ``purl`` string.
purl = dependency_data.get("purl")
purl_mapping = PackageURL.from_string(purl).to_dict()
dependency_data.update(**purl_mapping)
if purl:
purl_data_dict = PackageURL.from_string(purl).to_dict()
dependency_data.update(**purl_data_dict)

cleaned_data = {
field_name: value
Expand Down Expand Up @@ -4072,7 +4115,7 @@ def spdx_id(self):
# "SPDXID is a unique string containing letters, numbers, ., and/or -"
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}"

def as_spdx(self):
def as_spdx_package(self):
"""Return this Dependency as an SPDX Package entry."""
from scanpipe.pipes import spdx

Expand Down
1 change: 0 additions & 1 deletion scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ def get_dependencies(project, dependency_data):
Given a `dependency_data` mapping, get a list of DiscoveredDependency objects
for that `project` with similar dependency data.
"""
dependency = None
dependency_uid = dependency_data.get("dependency_uid")
extracted_requirement = dependency_data.get("extracted_requirement") or ""

Expand Down
2 changes: 1 addition & 1 deletion scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ def to_spdx(project, include_files=False):
license_expressions.append(license_expression)

for dependency in discovereddependency_qs:
packages_as_spdx.append(dependency.as_spdx())
packages_as_spdx.append(dependency.as_spdx_package())
if dependency.for_package:
relationships.append(
spdx.Relationship(
Expand Down
51 changes: 33 additions & 18 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2041,15 +2041,35 @@ def test_scanpipe_discovered_package_queryset_dependency_methods(self):
z = make_package(project, "pkg:type/z")
# Project -> A -> B -> C
# Project -> Z
make_dependency(project, for_package=a, resolved_to_package=b)
make_dependency(project, for_package=b, resolved_to_package=c)
a_to_b = make_dependency(
project, for_package=a, resolved_to_package=b, dependency_uid="a_to_b"
)
b_to_c = make_dependency(
project, for_package=b, resolved_to_package=c, dependency_uid="b_to_c"
)
unresolved_dependency = make_dependency(project, dependency_uid="unresolved")

self.assertFalse(a_to_b.is_project_dependency)
self.assertTrue(a_to_b.is_package_dependency)
self.assertTrue(a_to_b.is_resolved_to_package)
self.assertTrue(unresolved_dependency.is_project_dependency)
self.assertFalse(unresolved_dependency.is_package_dependency)
self.assertFalse(unresolved_dependency.is_resolved_to_package)

project_packages_qs = project.discoveredpackages.order_by("name")
root_packages = project_packages_qs.root_packages()
self.assertEqual([a, z], list(root_packages))
non_root_packages = project_packages_qs.non_root_packages()
self.assertEqual([b, c], list(non_root_packages))

dependency_qs = project.discovereddependencies
self.assertEqual(
[unresolved_dependency], list(dependency_qs.project_dependencies())
)
self.assertEqual([a_to_b, b_to_c], list(dependency_qs.package_dependencies()))
self.assertEqual([a_to_b, b_to_c], list(dependency_qs.resolved()))
self.assertEqual([unresolved_dependency], list(dependency_qs.unresolved()))

@skipIf(sys.platform != "linux", "Ordering differs on macOS.")
def test_scanpipe_codebase_resource_model_walk_method(self):
fixtures = self.data / "asgiref" / "asgiref-3.3.0_walk_test_fixtures.json"
Expand Down Expand Up @@ -2955,10 +2975,11 @@ def test_scanpipe_discovered_package_model_create_from_data_missing_type(self):
def test_scanpipe_discovered_dependency_model_create_from_data(self):
project1 = make_project("Analysis")

DiscoveredPackage.create_from_data(project1, package_data1)
package1 = DiscoveredPackage.create_from_data(project1, package_data1)
CodebaseResource.objects.create(
project=project1, path="daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO"
)
# Unresolved dependency
dependency = DiscoveredDependency.create_from_data(
project1, dependency_data1, strip_datafile_path_root=False
)
Expand All @@ -2982,23 +3003,17 @@ def test_scanpipe_discovered_dependency_model_create_from_data(self):
dependency.datafile_path,
)
self.assertEqual("pypi_sdist_pkginfo", dependency.datasource_id)
self.assertFalse(dependency.is_project_dependency)
self.assertTrue(dependency.is_package_dependency)
self.assertFalse(dependency.is_resolved_to_package)

# Test field validation when using create_from_data
dependency_count = DiscoveredDependency.objects.count()
incomplete_data = dict(dependency_data1)
incomplete_data["dependency_uid"] = ""
self.assertIsNone(
DiscoveredDependency.create_from_data(project1, incomplete_data)
# Resolved project dependency, resolved_to_package provided as arg
dependency2 = DiscoveredDependency.create_from_data(
project1, dependency_data={}, resolved_to_package=package1
)
self.assertEqual(dependency_count, DiscoveredDependency.objects.count())
message = project1.projectmessages.latest("created_date")
self.assertEqual("DiscoveredDependency", message.model)
self.assertEqual(ProjectMessage.Severity.WARNING, message.severity)
expected_message = "No values for the following required fields: dependency_uid"
self.assertEqual(expected_message, message.description)
self.assertEqual(dependency_data1["purl"], message.details["purl"])
self.assertEqual("", message.details["dependency_uid"])
self.assertEqual("", message.traceback)
self.assertTrue(dependency2.is_project_dependency)
self.assertFalse(dependency2.is_package_dependency)
self.assertTrue(dependency2.is_resolved_to_package)

def test_scanpipe_discovered_package_model_unique_package_uid_in_project(self):
project1 = make_project("Analysis")
Expand Down