Skip to content

Commit 80c090f

Browse files
committed
Use Project inputs as root elements that the SPDX document describes
Signed-off-by: tdruez <[email protected]>
1 parent 1179b18 commit 80c090f

File tree

4 files changed

+154
-18
lines changed

4 files changed

+154
-18
lines changed

scanpipe/models.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1248,7 +1248,12 @@ def add_upload(self, uploaded_file, tag=""):
12481248
adds the `input_source`.
12491249
"""
12501250
self.write_input_file(uploaded_file)
1251-
self.add_input_source(filename=uploaded_file.name, is_uploaded=True, tag=tag)
1251+
input_source = self.add_input_source(
1252+
filename=uploaded_file.name,
1253+
is_uploaded=True,
1254+
tag=tag,
1255+
)
1256+
return input_source
12521257

12531258
def add_uploads(self, uploads):
12541259
"""

scanpipe/pipes/output.py

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import io
2626
import json
2727
import re
28+
import uuid
2829
from operator import attrgetter
2930
from pathlib import Path
3031

@@ -693,6 +694,25 @@ def get_dependency_as_spdx_relationship(dependency, document_spdx_id, packages_a
693694
return spdx_relationship
694695

695696

697+
def get_inputs_as_spdx_packages(project):
698+
"""Return the Project's inputs as SPDX package to be used as root elements."""
699+
inputs_as_spdx_packages = []
700+
701+
for input_source in project.get_inputs_with_source():
702+
input_uuid = input_source.get("uuid") or uuid.uuid4()
703+
704+
input_as_spdx_package = spdx.Package(
705+
spdx_id=f"SPDXRef-scancodeio-input-{input_uuid}",
706+
name=input_source.get("filename"),
707+
filename=input_source.get("filename"),
708+
download_location=input_source.get("download_url"),
709+
files_analyzed=True,
710+
)
711+
inputs_as_spdx_packages.append(input_as_spdx_package)
712+
713+
return inputs_as_spdx_packages
714+
715+
696716
def to_spdx(project, include_files=False):
697717
"""
698718
Generate output for the provided ``project`` in SPDX document format.
@@ -705,13 +725,31 @@ def to_spdx(project, include_files=False):
705725
discoveredpackage_qs = get_queryset(project, "discoveredpackage")
706726
discovereddependency_qs = get_queryset(project, "discovereddependency")
707727

708-
project_as_root_package = spdx.Package(
709-
spdx_id=f"SPDXRef-scancodeio-project-{project.uuid}",
710-
name=project.name,
711-
files_analyzed=True,
712-
)
728+
project_inputs_as_spdx_packages = get_inputs_as_spdx_packages(project)
729+
730+
# Use the Project's input(s) as the root element(s) that the SPDX document
731+
# describes.
732+
# This ensures "documentDescribes" points only to the main subject of the SBOM,
733+
# not to every dependency or file in the project.
734+
# See https://github.com/spdx/spdx-spec/issues/395 and
735+
# https://github.com/aboutcode-org/scancode.io/issues/564#issuecomment-3269296563
736+
# for detailed context.
737+
describes = [
738+
input_as_spdx_package.spdx_id
739+
for input_as_spdx_package in project_inputs_as_spdx_packages
740+
]
741+
packages_as_spdx = project_inputs_as_spdx_packages
742+
743+
# Fallback to the Project as the SPDX root element for the "documentDescribes"
744+
if not project_inputs_as_spdx_packages:
745+
project_as_root_package = spdx.Package(
746+
spdx_id=f"SPDXRef-scancodeio-project-{project.uuid}",
747+
name=project.name,
748+
files_analyzed=True,
749+
)
750+
packages_as_spdx = [project_as_root_package]
751+
describes = [project_as_root_package.spdx_id]
713752

714-
packages_as_spdx = [project_as_root_package]
715753
license_expressions = []
716754
relationships = []
717755

@@ -723,7 +761,7 @@ def to_spdx(project, include_files=False):
723761
license_expressions.append(license_expression)
724762

725763
spdx_relationship = spdx.Relationship(
726-
spdx_id=project_as_root_package.spdx_id,
764+
spdx_id=describes[0],
727765
related_spdx_id=spdx_package.spdx_id,
728766
relationship="DEPENDS_ON",
729767
)
@@ -744,15 +782,6 @@ def to_spdx(project, include_files=False):
744782
for resource in get_queryset(project, "codebaseresource").files()
745783
]
746784

747-
# Use the Project (top-level package) as the root element that the SPDX document
748-
# describes.
749-
# This ensures "documentDescribes" points only to the main subject of the SBOM,
750-
# not to every dependency or file in the project.
751-
# See https://github.com/spdx/spdx-spec/issues/395 and
752-
# https://github.com/aboutcode-org/scancode.io/issues/564#issuecomment-3269296563
753-
# for detailed context.
754-
describes = [project_as_root_package.spdx_id]
755-
756785
document = spdx.Document(
757786
spdx_id=document_spdx_id,
758787
name=f"scancodeio_{project.name}",

scanpipe/pipes/spdx.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ def as_dict(self):
362362

363363
optional_data = {
364364
"versionInfo": self.version,
365+
"packageFileName": self.filename,
365366
"licenseDeclared": self.license_declared,
366367
"supplier": self.supplier,
367368
"originator": self.originator,

scanpipe/tests/pipes/test_output.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from unittest import mock
3232

3333
from django.conf import settings
34+
from django.core.files.uploadedfile import SimpleUploadedFile
3435
from django.core.management import call_command
3536
from django.test import TestCase
3637

@@ -417,7 +418,7 @@ def test_scanpipe_pipes_outputs_to_spdx(self):
417418
call_command("loaddata", fixtures, **{"verbosity": 0})
418419
project = Project.objects.get(name="asgiref")
419420

420-
with self.assertNumQueries(8):
421+
with self.assertNumQueries(9):
421422
output_file = output.to_spdx(project=project, include_files=True)
422423
self.assertIn(output_file.name, project.output_root)
423424

@@ -520,6 +521,106 @@ def test_scanpipe_pipes_outputs_to_spdx_dependencies(self, mock_uuid4):
520521
expected_file = self.data / "spdx" / "dependencies.spdx.json"
521522
self.assertResultsEqual(expected_file, results)
522523

524+
@mock.patch("uuid.uuid4")
525+
def test_scanpipe_pipes_outputs_to_spdx_get_inputs_as_spdx_packages(
526+
self, mock_uuid4
527+
):
528+
forced_uuid = "b74fe5df-e965-415e-ba65-f38421a0695d"
529+
mock_uuid4.return_value = forced_uuid
530+
531+
# 1. Input manually copied to Project's inputs
532+
project = make_project(name="Copied")
533+
copied_input = project.input_path / "input_filename"
534+
copied_input.touch()
535+
inputs_as_spdx_packages = output.get_inputs_as_spdx_packages(project)
536+
expected = [
537+
{
538+
"name": "input_filename",
539+
"SPDXID": f"SPDXRef-scancodeio-input-{forced_uuid}",
540+
"packageFileName": "input_filename",
541+
"licenseConcluded": "NOASSERTION",
542+
"copyrightText": "NOASSERTION",
543+
"downloadLocation": "NOASSERTION",
544+
"filesAnalyzed": True,
545+
"licenseDeclared": "NOASSERTION",
546+
}
547+
]
548+
inputs_spdx_as_dict = [package.as_dict() for package in inputs_as_spdx_packages]
549+
self.assertEqual(expected, inputs_spdx_as_dict)
550+
551+
# 2. Input uploaded to Project's inputs
552+
project = make_project(name="Uploaded")
553+
uploaded_file = SimpleUploadedFile("filename.ext", content=b"content")
554+
input_source = project.add_upload(
555+
uploaded_file=uploaded_file,
556+
)
557+
inputs_as_spdx_packages = output.get_inputs_as_spdx_packages(project)
558+
expected = [
559+
{
560+
"name": "filename.ext",
561+
"SPDXID": f"SPDXRef-scancodeio-input-{input_source.uuid}",
562+
"packageFileName": "filename.ext",
563+
"licenseConcluded": "NOASSERTION",
564+
"copyrightText": "NOASSERTION",
565+
"downloadLocation": "NOASSERTION",
566+
"filesAnalyzed": True,
567+
"licenseDeclared": "NOASSERTION",
568+
}
569+
]
570+
inputs_spdx_as_dict = [package.as_dict() for package in inputs_as_spdx_packages]
571+
self.assertEqual(expected, inputs_spdx_as_dict)
572+
573+
# 3. Fetched (download_url, purl, docker, git, ...)
574+
project = make_project(name="Fetched")
575+
input_from_download_url = project.add_input_source(
576+
download_url="https://download.url/archive.zip",
577+
filename="archive.zip",
578+
)
579+
input_from_purl = project.add_input_source(
580+
download_url="pkg:npm/[email protected]",
581+
filename="dnd-core-7.0.2.tgz",
582+
)
583+
input_from_docker = project.add_input_source(
584+
download_url="docker://registry.com/debian:10.9",
585+
filename="debian_10.9.tar",
586+
)
587+
inputs_as_spdx_packages = output.get_inputs_as_spdx_packages(project)
588+
inputs_spdx_as_dict = [package.as_dict() for package in inputs_as_spdx_packages]
589+
self.maxDiff = None
590+
expected = [
591+
{
592+
"name": "archive.zip",
593+
"SPDXID": f"SPDXRef-scancodeio-input-{input_from_download_url.uuid}",
594+
"downloadLocation": "https://download.url/archive.zip",
595+
"licenseConcluded": "NOASSERTION",
596+
"copyrightText": "NOASSERTION",
597+
"filesAnalyzed": True,
598+
"packageFileName": "archive.zip",
599+
"licenseDeclared": "NOASSERTION",
600+
},
601+
{
602+
"name": "debian_10.9.tar",
603+
"SPDXID": f"SPDXRef-scancodeio-input-{input_from_docker.uuid}",
604+
"downloadLocation": "docker://registry.com/debian:10.9",
605+
"licenseConcluded": "NOASSERTION",
606+
"copyrightText": "NOASSERTION",
607+
"filesAnalyzed": True,
608+
"packageFileName": "debian_10.9.tar",
609+
"licenseDeclared": "NOASSERTION",
610+
},
611+
{
612+
"name": "dnd-core-7.0.2.tgz",
613+
"SPDXID": f"SPDXRef-scancodeio-input-{input_from_purl.uuid}",
614+
"downloadLocation": "pkg:npm/[email protected]",
615+
"licenseConcluded": "NOASSERTION",
616+
"copyrightText": "NOASSERTION",
617+
"filesAnalyzed": True,
618+
"packageFileName": "dnd-core-7.0.2.tgz",
619+
"licenseDeclared": "NOASSERTION",
620+
},
621+
]
622+
self.assertEqual(expected, inputs_spdx_as_dict)
623+
523624
def test_scanpipe_pipes_outputs_make_unknown_license_object(self):
524625
licensing = get_licensing()
525626
parsed_expression = licensing.parse("some-unknown-license")

0 commit comments

Comments
 (0)