Skip to content

Commit 09ad0cc

Browse files
feat: Add XML schema validation (Fixes #1507) (#1544)
1 parent c22936a commit 09ad0cc

21 files changed

+14365
-76
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ Usage:
109109
-V, --version show program's version number and exit
110110
--disable-version-check
111111
skips checking for a new version
112+
--disable-validation-check
113+
skips checking xml files against schema
112114
--offline operate in offline mode
113115

114116
CVE Data Download:

cve_bin_tool/checkers/xml2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def guess_xml2_version(lines):
6060
new_guess2 = match.group(1).strip()
6161
if len(new_guess2) > len(new_guess):
6262
new_guess = new_guess2
63-
# If no version guessed, set version to UNKNOWN
63+
# If no version guessed, set version to "UNKNOWN"
6464
return new_guess or "UNKNOWN"
6565

6666
def get_version(self, lines, filename):

cve_bin_tool/cli.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,11 @@ def main(argv=None):
261261
help="skips checking for a new version",
262262
default=False,
263263
)
264+
parser.add_argument(
265+
"--disable-validation-check",
266+
action="store_true",
267+
help="skips checking xml files against schema",
268+
)
264269
parser.add_argument(
265270
"--offline",
266271
action="store_true",
@@ -556,6 +561,7 @@ def main(argv=None):
556561
should_extract=args["extract"],
557562
exclude_folders=args["exclude"],
558563
error_mode=error_mode,
564+
validate=not args["disable_validation_check"],
559565
)
560566
version_scanner.remove_skiplist(skips)
561567
LOGGER.info(f"Number of checkers: {version_scanner.number_of_checkers()}")
@@ -576,7 +582,10 @@ def main(argv=None):
576582
if args["sbom_file"]:
577583
# Process SBOM file
578584
sbom_list = SBOMManager(
579-
args["sbom_file"], sbom_type=args["sbom"], logger=LOGGER
585+
args["sbom_file"],
586+
sbom_type=args["sbom"],
587+
logger=LOGGER,
588+
validate=not args["disable_validation_check"],
580589
)
581590
parsed_data = sbom_list.scan_file()
582591
LOGGER.info(

cve_bin_tool/sbom_manager/__init__.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (C) 2021 Anthony Harrison
22
# SPDX-License-Identifier: GPL-3.0-or-later
33

4+
import os
45
from collections import defaultdict
56
from logging import Logger
67
from typing import DefaultDict, Dict, List, Optional
@@ -24,35 +25,39 @@ class SBOMManager:
2425
sbom_data: DefaultDict[ProductInfo, TriageData]
2526

2627
def __init__(
27-
self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None
28+
self,
29+
filename: str,
30+
sbom_type: str = "spdx",
31+
logger: Optional[Logger] = None,
32+
validate: bool = True,
2833
):
2934
self.filename = filename
3035
self.sbom_data = defaultdict(dict)
3136
self.type = "unknown"
3237
if sbom_type in self.SBOMtype:
3338
self.type = sbom_type
3439
self.logger = logger or LOGGER.getChild(self.__class__.__name__)
40+
self.validate = validate
3541

3642
# Connect to the database
3743
self.cvedb = CVEDB(version_check=False)
3844

3945
def scan_file(self) -> Dict[ProductInfo, TriageData]:
4046
LOGGER.info(f"Processing SBOM {self.filename} of type {self.type.upper()}")
47+
modules = []
4148
try:
42-
if self.type == "spdx":
43-
spdx = SPDXParser()
44-
modules = spdx.parse(self.filename)
45-
elif self.type == "cyclonedx":
46-
cyclone = CycloneParser()
47-
modules = cyclone.parse(self.filename)
48-
elif self.type == "swid":
49-
swid = SWIDParser()
50-
modules = swid.parse(self.filename)
51-
else:
52-
modules = []
49+
if os.path.exists(self.filename):
50+
if self.type == "spdx":
51+
spdx = SPDXParser(self.validate)
52+
modules = spdx.parse(self.filename)
53+
elif self.type == "cyclonedx":
54+
cyclone = CycloneParser(self.validate)
55+
modules = cyclone.parse(self.filename)
56+
elif self.type == "swid":
57+
swid = SWIDParser(self.validate)
58+
modules = swid.parse(self.filename)
5359
except (KeyError, FileNotFoundError, ET.ParseError) as e:
5460
LOGGER.debug(e, exc_info=True)
55-
modules = []
5661

5762
LOGGER.debug(
5863
f"The number of modules identified in SBOM - {len(modules)}\n{modules}"

cve_bin_tool/sbom_manager/cyclonedx_parser.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66

77
import defusedxml.ElementTree as ET
88

9+
from cve_bin_tool.validator import validate_cyclonedx
10+
911

1012
class CycloneParser:
11-
def __init__(self):
12-
pass
13+
def __init__(self, validate: bool = True):
14+
self.validate = validate
1315

1416
def parse(self, sbom_file: str) -> List[List[str]]:
1517
"""parses CycloneDX BOM file extracting package name and version"""
@@ -35,15 +37,16 @@ def parse_cyclonedx_json(self, sbom_file: str) -> List[List[str]]:
3537

3638
def parse_cyclonedx_xml(self, sbom_file: str) -> List[List[str]]:
3739
"""parses CycloneDX XML BOM file extracting package name and version"""
38-
40+
modules: List[List[str]] = []
41+
if self.validate and not validate_cyclonedx(sbom_file):
42+
return modules
3943
tree = ET.parse(sbom_file)
4044
# Find root element
4145
root = tree.getroot()
4246
# Extract schema
4347
schema = root.tag[: root.tag.find("}") + 1]
4448
# schema = '{http://cyclonedx.org/schema/bom/1.3}'
45-
46-
modules: List[List[str]] = []
49+
print("Schema", schema)
4750
for components in root.findall(schema + "components"):
4851
for component in components.findall(schema + "component"):
4952
# Only if library....
@@ -58,10 +61,8 @@ def parse_cyclonedx_xml(self, sbom_file: str) -> List[List[str]]:
5861
if component_version is None:
5962
raise KeyError(f"Could not find version in {component}")
6063
version = component_version.text
61-
if version is None:
62-
raise KeyError(f"Could not find version in {component}")
63-
modules.append([package, version])
64-
64+
if version is not None:
65+
modules.append([package, version])
6566
return modules
6667

6768

cve_bin_tool/sbom_manager/spdx_parser.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
import yaml
1010

1111
from cve_bin_tool.log import LOGGER
12+
from cve_bin_tool.validator import validate_spdx
1213

1314

1415
class SPDXParser:
15-
def __init__(self):
16-
pass
16+
def __init__(self, validate: bool = True):
17+
self.validate = validate
1718

1819
def parse(self, sbom_file: str) -> List[List[str]]:
1920
"""parses SPDX BOM file extracting package name and version"""
@@ -113,12 +114,15 @@ def parse_spdx_yaml(self, sbom_file: str) -> List[List[str]]:
113114
def parse_spdx_xml(self, sbom_file: str) -> List[List[str]]:
114115
"""parses SPDX XML BOM file extracting package name and version"""
115116
# XML is experimental in SPDX 2.2
117+
modules: List[List[str]] = []
118+
if self.validate and not validate_spdx(sbom_file):
119+
return modules
116120
tree = ET.parse(sbom_file)
117121
# Find root element
118122
root = tree.getroot()
119123
# Extract schema
120124
schema = root.tag[: root.tag.find("}") + 1]
121-
modules: List[List[str]] = []
125+
122126
for component in root.findall(schema + "packages"):
123127
try:
124128
package_match = component.find(schema + "name")

cve_bin_tool/sbom_manager/swid_parser.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,24 @@
55

66
import defusedxml.ElementTree as ET
77

8+
from cve_bin_tool.validator import validate_swid
9+
810

911
class SWIDParser:
10-
def __init__(self):
11-
pass
12+
def __init__(self, validate: bool = True):
13+
self.validate = validate
1214

1315
def parse(self, sbom_file: str) -> List[List[str]]:
1416
"""parses SWID XML BOM file extracting package name and version"""
15-
17+
modules: List[List[str]] = []
18+
if self.validate and not validate_swid(sbom_file):
19+
return modules
1620
tree = ET.parse(sbom_file)
1721
# Find root element
1822
root = tree.getroot()
1923
# Extract schema
2024
schema = root.tag[: root.tag.find("}") + 1]
2125
# schema = '{http://standards.iso.org/iso/19770/-2/2015/schema.xsd}'
22-
23-
modules: List[List[str]] = []
2426
for component in root.findall(schema + "Link"):
2527
# Only if a component ....
2628
if component.get("rel") == "component":

cve_bin_tool/schemas/README.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
The cyclconedx_gen.xsd is an amalgamation of cyclonedx.xsd and cyclonedx_spdx.xsd. References
2+
to spdx namespace in the cyclonedx.xsd is changed to bom.
3+
4+
The spdx.xsd has been generated from the test XML files as there is no official XSD schema.
5+
6+
The swid_gen.xsd has been generated from the test XML files as the official XSD schema (swid.xsd)
7+
contains entities which are unsafe when parsed.
8+
9+
The pom.xsd file has been modified to ensure that all HTML tags have matching closure tags.
10+

0 commit comments

Comments
 (0)