Skip to content

Commit 31ba3be

Browse files
committed
Add support for SPDX as YAML in load_sbom pipeline
Signed-off-by: tdruez <[email protected]>
1 parent 45ef995 commit 31ba3be

File tree

6 files changed

+98
-6
lines changed

6 files changed

+98
-6
lines changed

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ v35.4.0 (unreleased)
3838
- Add ORT ``package-list.yml`` as new downloadable output format.
3939
https://github.com/aboutcode-org/scancode.io/pull/1852
4040

41+
- Add support for SPDX as YAML in ``load_sbom`` pipeline.
42+
4143
v35.3.0 (2025-08-20)
4244
--------------------
4345

scanpipe/pipes/resolve.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from django.core.exceptions import ObjectDoesNotExist
3131

3232
import python_inspector.api as python_inspector
33+
import saneyaml
3334
from attributecode.model import About
3435
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
3536
from packagedcode.licensing import get_license_detections_and_expression
@@ -378,7 +379,11 @@ def spdx_relationship_to_dependency_data(spdx_relationship):
378379
def get_spdx_document_from_file(input_location):
379380
"""Return the loaded SPDX document from the `input_location` file."""
380381
input_path = Path(input_location)
381-
spdx_document = json.loads(input_path.read_text())
382+
383+
if str(input_path).endswith((".yml", ".yaml")):
384+
spdx_document = saneyaml.load(input_path.read_text())
385+
else:
386+
spdx_document = json.loads(input_path.read_text())
382387

383388
try:
384389
spdx.validate_document(spdx_document)
@@ -425,13 +430,13 @@ def get_default_package_type(input_location):
425430
if handler.is_datafile(input_location):
426431
return handler.default_package_type
427432

428-
if input_location.endswith((".spdx", ".spdx.json")):
433+
if input_location.endswith((".spdx", ".spdx.json", ".spdx.yml")):
429434
return "spdx"
430435

431436
if input_location.endswith(("bom.json", ".cdx.json", "bom.xml", ".cdx.xml")):
432437
return "cyclonedx"
433438

434-
if input_location.endswith((".json", ".xml")):
439+
if input_location.endswith((".json", ".xml", ".yml", ".yaml")):
435440
if cyclonedx.is_cyclonedx_bom(input_location):
436441
return "cyclonedx"
437442
if spdx.is_spdx_document(input_location):

scanpipe/pipes/spdx.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from datetime import timezone
3030
from pathlib import Path
3131

32+
import saneyaml
33+
3234
SPDX_SPEC_VERSION = "2.3"
3335
SPDX_LICENSE_LIST_VERSION = "3.20"
3436
SPDX_SCHEMA_NAME = "spdx-schema-2.3.json"
@@ -653,8 +655,15 @@ def validate_document(document, schema=SPDX_SCHEMA_PATH):
653655

654656
def is_spdx_document(input_location):
655657
"""Return True if the file at `input_location` is a SPDX Document."""
658+
input_location = str(input_location)
659+
data = {}
660+
656661
with suppress(Exception):
657-
data = json.loads(Path(input_location).read_text())
658-
if data.get("SPDXID"):
659-
return True
662+
if input_location.endswith(".json"):
663+
data = json.loads(Path(input_location).read_text())
664+
elif input_location.endswith((".yml", ".yaml")):
665+
data = saneyaml.load(Path(input_location).read_text())
666+
667+
if data.get("SPDXID"):
668+
return True
660669
return False
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
SPDXID: "SPDXRef-DOCUMENT"
2+
spdxVersion: "SPDX-2.2"
3+
creationInfo:
4+
created: "2020-07-23T18:30:22Z"
5+
creators:
6+
- "Organization: Example Inc."
7+
- "Person: Thomas Steenbergen"
8+
licenseListVersion: "3.9"
9+
name: "curl-7.70.0"
10+
dataLicense: "CC0-1.0"
11+
documentNamespace: "http://spdx.org/spdxdocs/spdx-document-curl"
12+
documentDescribes:
13+
- "SPDXRef-Package-curl"
14+
packages:
15+
- SPDXID: "SPDXRef-Package-curl"
16+
description: "A command line tool and library for transferring data with URL syntax, supporting \
17+
HTTP, HTTPS, FTP, FTPS, GOPHER, TFTP, SCP, SFTP, SMB, TELNET, DICT, LDAP, LDAPS, MQTT, FILE, \
18+
IMAP, SMTP, POP3, RTSP and RTMP. libcurl offers a myriad of powerful features."
19+
copyrightText: "Copyright (c) 1996 - 2020, Daniel Stenberg, <[email protected]>, and many
20+
contributors, see the THANKS file."
21+
downloadLocation: "https://github.com/curl/curl/releases/download/curl-7_70_0/curl-7.70.0.tar.gz"
22+
externalRefs:
23+
- referenceCategory: "SECURITY"
24+
referenceLocator: "cpe:2.3:a:http:curl:7.70.0:*:*:*:*:*:*:*"
25+
referenceType: "cpe23Type"
26+
filesAnalyzed: false
27+
homepage: "https://curl.haxx.se/"
28+
licenseConcluded: "NOASSERTION"
29+
licenseDeclared: "curl"
30+
name: "curl"
31+
versionInfo: "7.70.0"
32+
originator: "Person: Daniel Stenberg ([email protected])"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
SPDXID: "SPDXRef-DOCUMENT"
2+
spdxVersion: "SPDX-2.2"
3+
creationInfo:
4+
created: "2020-07-23T18:30:22Z"
5+
creators:
6+
- "Organization: Example Inc."
7+
- "Person: Thomas Steenbergen"
8+
licenseListVersion: "3.9"
9+
name: "curl-7.70.0"
10+
dataLicense: "CC0-1.0"
11+
documentNamespace: "http://spdx.org/spdxdocs/spdx-document-curl"
12+
documentDescribes:
13+
- "SPDXRef-Package-curl"
14+
packages:
15+
- SPDXID: "SPDXRef-Package-curl"
16+
description: "A command line tool and library for transferring data with URL syntax, supporting \
17+
HTTP, HTTPS, FTP, FTPS, GOPHER, TFTP, SCP, SFTP, SMB, TELNET, DICT, LDAP, LDAPS, MQTT, FILE, \
18+
IMAP, SMTP, POP3, RTSP and RTMP. libcurl offers a myriad of powerful features."
19+
copyrightText: "Copyright (c) 1996 - 2020, Daniel Stenberg, <[email protected]>, and many
20+
contributors, see the THANKS file."
21+
downloadLocation: "https://github.com/curl/curl/releases/download/curl-7_70_0/curl-7.70.0.tar.gz"
22+
externalRefs:
23+
- referenceCategory: "SECURITY"
24+
referenceLocator: "cpe:2.3:a:http:curl:7.70.0:*:*:*:*:*:*:*"
25+
referenceType: "cpe23Type"
26+
filesAnalyzed: false
27+
homepage: "https://curl.haxx.se/"
28+
licenseConcluded: "NOASSERTION"
29+
licenseDeclared: "curl"
30+
name: "curl"
31+
versionInfo: "7.70.0"
32+
originator: "Person: Daniel Stenberg ([email protected])"

scanpipe/tests/pipes/test_resolve.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,15 @@ def test_scanpipe_pipes_resolve_get_default_package_type(self):
5050
input_location = self.manifest_location / "toml.spdx.json"
5151
self.assertEqual("spdx", resolve.get_default_package_type(input_location))
5252

53+
input_location = self.manifest_location / "curl-7.70.0-v2.2.spdx.yml"
54+
self.assertEqual("spdx", resolve.get_default_package_type(input_location))
55+
5356
input_location = self.manifest_location / "toml.json"
5457
self.assertEqual("spdx", resolve.get_default_package_type(input_location))
5558

59+
input_location = self.manifest_location / "curl-7.70.0.yaml"
60+
self.assertEqual("spdx", resolve.get_default_package_type(input_location))
61+
5662
input_location = self.data / "cyclonedx/nested.cdx.json"
5763
self.assertEqual("cyclonedx", resolve.get_default_package_type(input_location))
5864

@@ -181,6 +187,12 @@ def test_scanpipe_pipes_resolve_get_spdx_document_from_file(self):
181187
self.assertEqual("SPDXRef-DOCUMENT", spdx_document["SPDXID"])
182188
self.assertEqual("SPDX-2.3", spdx_document["spdxVersion"])
183189

190+
input_location = self.data / "manifests" / "curl-7.70.0-v2.2.spdx.yml"
191+
spdx_document = resolve.get_spdx_document_from_file(input_location)
192+
self.assertIsInstance(spdx_document, dict)
193+
self.assertEqual("SPDXRef-DOCUMENT", spdx_document["SPDXID"])
194+
self.assertEqual("SPDX-2.2", spdx_document["spdxVersion"])
195+
184196
def test_scanpipe_pipes_resolve_spdx_package_to_package_data(self):
185197
p1 = Project.objects.create(name="Analysis")
186198
package = pipes.update_or_create_package(p1, package_data1)

0 commit comments

Comments
 (0)