Skip to content

Commit 812e8de

Browse files
authored
feat: Add ability to read CPE identifiers from CycloneDX triage data (#3990)
* feat: Prefer vendor from CPE before PURL * feat: Support CPE 2.2 strings when decoding product information * test: Improve SBOM manager test coverage * test: Improved coverage of SBOM manager * test: Enabled tests on test_bad_ext_ref_cyclonedx_file * feat: Rolled back changes to decode PURL product name * fix: Cleanup merge conflicts
1 parent 1fe4be7 commit 812e8de

14 files changed

+597
-42
lines changed

cve_bin_tool/sbom_manager/__init__.py

Lines changed: 181 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,15 @@
2323

2424
class SBOMManager:
2525
"""
26-
SBOMManager is a class that manages the Software Bill of Materials (SBOM) data.
26+
Class: InputEngine
27+
28+
This class is responsible for parsing various SBOM file formats (SPDX, CycloneDX, SWID) in the CVE Bin Tool.
29+
2730
It provides methods for scanning SBOM files, parsing them, and retrieving vendor information.
31+
32+
Attributes:
33+
- sbom_data (DefaultDict[ProductInfo, TriageData]): Dictionary containing parsed SBOM data.
34+
2835
"""
2936

3037
SBOMtype = ["spdx", "cyclonedx", "swid"]
@@ -95,6 +102,14 @@ def common_prefix_split(self, product, version) -> list[ProductInfo]:
95102
return parsed_data
96103

97104
def scan_file(self) -> dict[ProductInfo, TriageData]:
105+
"""
106+
Parses the SBOM input file and returns the product information and
107+
corresponding triage data.
108+
109+
Returns:
110+
- dict[ProductInfo, TriageData]: Parsed SBOM data.
111+
112+
"""
98113
self.logger.debug(
99114
f"Processing SBOM {self.filename} of type {self.type.upper()}"
100115
)
@@ -115,16 +130,18 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
115130

116131
# Now process list of modules to create [vendor, product, version] tuples
117132
parsed_data: list[ProductInfo] = []
118-
for m in modules:
119-
if m and m[0]:
120-
# Using lower to normalize product names across databases
121-
product, version = m[0].lower(), m[1]
122-
if version != "":
123-
# Now add vendor to create product record....
124-
vendor_set = self.get_vendor(product)
125-
for vendor in vendor_set:
126-
# if vendor is not None:
127-
parsed_data.append(ProductInfo(vendor, product, version))
133+
for module_vendor, product, version in modules:
134+
# Using lower to normalize product names across databases
135+
product = product.lower()
136+
137+
if module_vendor is None:
138+
# Now add vendor to create product record....
139+
vendor_set = self.get_vendor(product)
140+
for vendor in vendor_set:
141+
# if vendor is not None:
142+
parsed_data.append(ProductInfo(vendor, product, version))
143+
else:
144+
parsed_data.append(ProductInfo(module_vendor, product, version))
128145

129146
for row in parsed_data:
130147
self.sbom_data[row]["default"] = {
@@ -138,9 +155,22 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
138155
return self.sbom_data
139156

140157
def get_vendor(self, product: str) -> list:
158+
"""
159+
Get the list of vendors for the product name.
160+
161+
There may be more than one vendor for a given product name and all
162+
matches are returned.
163+
164+
Args:
165+
- product (str): Product name.
166+
167+
Returns:
168+
- list: The list of vendors for the product
169+
170+
"""
141171
vendorlist: list[str] = []
142172
vendor_package_pair = self.cvedb.get_vendor_product_pairs(product)
143-
if vendor_package_pair != []:
173+
if vendor_package_pair:
144174
# To handle multiple vendors, return all combinations of product/vendor mappings
145175
for v in vendor_package_pair:
146176
vendor = v["vendor"]
@@ -149,13 +179,34 @@ def get_vendor(self, product: str) -> list:
149179
vendorlist.append("UNKNOWN")
150180
return vendorlist
151181

152-
def is_valid_purl(self, purl_string):
153-
"""Returns true if give purl_string is a valid purl string"""
154-
purl_pattern = r"^\w+://[\w\-.]+/[\w\-.]+(?:/[\w\-.]+)*(?:\?[\w\-.]+=[\w\-.]+(?:&[\w\-.]+=[\w\-.]+)*)?$"
182+
def is_valid_purl(self, purl_string: str):
183+
"""
184+
Validate the PURL string is the correct form.
185+
186+
Args:
187+
- purl_string (str): Package URL string
188+
189+
Returns:
190+
- bool: True if the purl_string parameter is a valid purl string, False otherwise.
191+
192+
"""
193+
purl_pattern = r"^(?P<scheme>.+):(?P<type>.+)/(?P<namespace>.+)/(?P<name>.+)@(?P<version>.+)\??(?P<qualifiers>.*)#?(?P<subpath>.*)$"
155194
return re.match(purl_pattern, purl_string) is not None
156195

157-
def parse_sbom(self):
158-
"""parse SBOM, using PURL identifiers preferentially if found"""
196+
def parse_sbom(self) -> [(str, str, str)]:
197+
"""
198+
Parse the SBOM to extract a list of modules, including vendor, product, and version information.
199+
200+
The parsed product information can be retrieved from different components of the SBOM, with the following order of preference:
201+
1. CPE 2.3 Identifiers
202+
2. CPE 2.2 Identifiers
203+
3. Package URLs (purl)
204+
4. Name and Version from the SBOM (Vendor will be unspecified)
205+
206+
Returns:
207+
- List[(str, str, str)]: A list of tuples, each containing vendor, product, and version information for a module.
208+
209+
"""
159210

160211
# Set up SBOM parser
161212
sbom_parser = SBOMParser(sbom_type=self.type)
@@ -173,28 +224,124 @@ def parse_sbom(self):
173224
packages = [x for x in sbom_parser.get_sbom()["packages"].values()]
174225
LOGGER.debug(f"Parsed SBOM {self.filename} {packages}")
175226
for package in packages:
176-
purl_found = False
177-
# If PURL record found, use this data in preference to package data
227+
vendor = None
228+
package_name = None
229+
version = None
230+
231+
# If Package URL or CPE record found, use this data in preference to package data
178232
ext_ref = package.get("externalreference")
179233
if ext_ref is not None:
180-
for ref in ext_ref:
181-
if ref[1] == "purl":
182-
if self.is_valid_purl(ref[2]):
183-
# Process purl identifier
184-
purl_info = PackageURL.from_string(ref[2]).to_dict()
185-
if purl_info["name"] and purl_info["version"]:
186-
modules.append(
187-
[purl_info["name"], purl_info["version"]]
188-
)
189-
purl_found = True
190-
if not purl_found:
191-
if package.get("version") is not None:
192-
modules.append([package["name"], package["version"]])
193-
else:
194-
LOGGER.debug(f"No version found in {package}")
234+
vendor, package_name, version = self.parse_ext_ref(ext_ref=ext_ref)
235+
236+
# For any data not found in CPE or the Package URL get from package data
237+
if not vendor:
238+
pass # Because no vendor was detected then all vendors with this named package
239+
# will be included in the output.
240+
241+
if not package_name:
242+
package_name = package["name"]
243+
244+
if (not version) and (package.get("version") is not None):
245+
version = package["version"]
246+
else:
247+
LOGGER.debug(f"No version found in {package}")
248+
249+
if version:
250+
# Found at least package and version, save the results
251+
modules.append([vendor, package_name, version])
252+
195253
LOGGER.debug(f"Parsed SBOM {self.filename} {modules}")
196254
return modules
197255

256+
def parse_ext_ref(self, ext_ref) -> (str | None, str | None, str | None):
257+
"""
258+
Parse external references in an SBOM to extract module information.
259+
260+
Two passes are made through the external references, giving priority to CPE types,
261+
which will always match the CVE database.
262+
263+
Args:
264+
- ext_ref (List[List[str]]): List of lists representing external references.
265+
Each inner list contains [category, type, locator].
266+
267+
Returns:
268+
- Optional[Tuple[str | None, str | None, str | None]]: A tuple containing the vendor, product, and version
269+
information extracted from the external references, or None if not found.
270+
271+
"""
272+
decoded = {}
273+
for ref in ext_ref:
274+
if ref[1] == "cpe23Type":
275+
decoded["cpe23Type"] = self.decode_cpe23(ref[2])
276+
277+
elif ref[1] == "cpe22Type":
278+
decoded["cpe22Type"] = self.decode_cpe22(ref[2])
279+
280+
elif ref[1] == "purl":
281+
decoded["purl"] = self.decode_purl(ref[2])
282+
283+
# No ext-ref matches, return none
284+
return decoded.get(
285+
"cpe23Type",
286+
decoded.get("cpe22Type", decoded.get("purl", (None, None, None))),
287+
)
288+
289+
def decode_cpe22(self, cpe22) -> (str | None, str | None, str | None):
290+
"""
291+
Decode a CPE 2.2 formatted string to extract vendor, product, and version information.
292+
293+
Args:
294+
- cpe22 (str): CPE 2.2 formatted string.
295+
296+
Returns:
297+
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
298+
information extracted from the CPE 2.2 string, or None if the information is incomplete.
299+
300+
"""
301+
cpe = cpe22.split(":")
302+
vendor, product, version = cpe[2], cpe[3], cpe[4]
303+
# Return available data, convert empty fields to None
304+
return [vendor or None, product or None, version or None]
305+
306+
def decode_cpe23(self, cpe23) -> (str | None, str | None, str | None):
307+
"""
308+
Decode a CPE 2.3 formatted string to extract vendor, product, and version information.
309+
310+
Args:
311+
- cpe23 (str): CPE 2.3 formatted string.
312+
313+
Returns:
314+
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
315+
information extracted from the CPE 2.3 string, or None if the information is incomplete.
316+
317+
"""
318+
cpe = cpe23.split(":")
319+
vendor, product, version = cpe[3], cpe[4], cpe[5]
320+
# Return available data, convert empty fields to None
321+
return [vendor or None, product or None, version or None]
322+
323+
def decode_purl(self, purl) -> (str | None, str | None, str | None):
324+
"""
325+
Decode a Package URL (purl) to extract version information.
326+
327+
Args:
328+
- purl (str): Package URL (purl) string.
329+
330+
Returns:
331+
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor (which is always None for purl),
332+
product, and version information extracted from the purl string, or None if the purl is invalid or incomplete.
333+
334+
"""
335+
vendor = None # Because the vendor and product identifiers in the purl don't always align
336+
product = None # with the CVE DB, only the version is parsed.
337+
version = None
338+
if self.is_valid_purl(purl):
339+
# Process purl identifier
340+
purl_info = PackageURL.from_string(purl).to_dict()
341+
version = purl_info.get("version")
342+
343+
return [vendor or None, product or None, version or None]
344+
198345

199346
if __name__ == "__main__":
200347
import sys

cve_bin_tool/sbom_manager/swid_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def extract(self, swid: str) -> list[str]:
5050
# Format of swid is "URI: <vendor>-<product>-<version>"
5151
item = swid[swid.find(":") + 1 :].split("-")
5252
# As some version numbers have leading 'v', it is removed
53-
return [item[1], item[2].upper().replace("V", "")]
53+
return [item[0].strip(" "), item[1], item[2].upper().replace("V", "")]
5454

5555

5656
if __name__ == "__main__":

test/sbom/cyclonedx_bad_cpe22.json

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
{
2+
"$schema": "http://cyclonedx.org/schema/bom-1.5.schema.json",
3+
"bomFormat": "CycloneDX",
4+
"specVersion": "1.5",
5+
"serialNumber": "urn:uuid:80c1b198-5175-4fda-86c8-1cc725b6c532",
6+
"version": 1,
7+
"metadata": {
8+
"timestamp": "2024-03-30T18:21:29Z",
9+
"tools": {
10+
"components": [
11+
{
12+
"name": "cve-bin-tool",
13+
"version": "3.3rc2",
14+
"type": "application"
15+
}
16+
]
17+
},
18+
"component": {
19+
"type": "application",
20+
"bom-ref": "CDXRef-DOCUMENT",
21+
"name": "SBOM_CVEBINTOOL-product_1-0-0-66_all-deb"
22+
}
23+
},
24+
"components": [
25+
{
26+
"type": "application",
27+
"bom-ref": "1-CVEBINTOOL-product_1-0-0-66_all-deb",
28+
"name": "CVEBINTOOL-product_1-0-0-66_all-deb"
29+
},
30+
{
31+
"type": "library",
32+
"bom-ref": "2-libjpeg",
33+
"name": "libjpeg-novendor",
34+
"version": "8b",
35+
"supplier": {
36+
"name": "ijg"
37+
},
38+
"cpe": "cpe:/a::libjpeg:8b"
39+
},
40+
{
41+
"type": "library",
42+
"bom-ref": "3-libexpat",
43+
"name": "libexpat",
44+
"version": "2.0.1",
45+
"supplier": {
46+
"name": "libexpat project"
47+
},
48+
"cpe": "cpe:/a:libexpat_project::2.0.1"
49+
},
50+
{
51+
"type": "library",
52+
"bom-ref": "4-ncurses",
53+
"name": "ncurses-noversion",
54+
"version": "5.9.noversion",
55+
"supplier": {
56+
"name": "gnu"
57+
},
58+
"cpe": "cpe:/a:gnu:ncurses:"
59+
},
60+
{
61+
"type": "library",
62+
"bom-ref": "5-zlib",
63+
"name": "zlib",
64+
"version": "1.2.3",
65+
"supplier": {
66+
"name": "zlib"
67+
},
68+
"cpe": "cpe:/a:zlib:zlib:1.2.3"
69+
}
70+
],
71+
"dependencies": [
72+
{
73+
"ref": "CDXRef-DOCUMENT",
74+
"dependsOn": [
75+
"1-CVEBINTOOL-product_1-0-0-66_all-deb"
76+
]
77+
},
78+
{
79+
"ref": "1-CVEBINTOOL-product_1-0-0-66_all-deb",
80+
"dependsOn": [
81+
"2-libjpeg",
82+
"3-libexpat",
83+
"4-ncurses",
84+
"5-zlib"
85+
]
86+
}
87+
]
88+
}

0 commit comments

Comments
 (0)