23
23
24
24
class SBOMManager :
25
25
"""
26
- SBOMManager is a class that manages the Software Bill of Materials (SBOM) data.
26
+ Class: InputEngine
27
+
28
+ This class is responsible for parsing various SBOM file formats (SPDX, CycloneDX, SWID) in the CVE Bin Tool.
29
+
27
30
It provides methods for scanning SBOM files, parsing them, and retrieving vendor information.
31
+
32
+ Attributes:
33
+ - sbom_data (DefaultDict[ProductInfo, TriageData]): Dictionary containing parsed SBOM data.
34
+
28
35
"""
29
36
30
37
SBOMtype = ["spdx" , "cyclonedx" , "swid" ]
@@ -95,6 +102,14 @@ def common_prefix_split(self, product, version) -> list[ProductInfo]:
95
102
return parsed_data
96
103
97
104
def scan_file (self ) -> dict [ProductInfo , TriageData ]:
105
+ """
106
+ Parses the SBOM input file and returns the product information and
107
+ corresponding triage data.
108
+
109
+ Returns:
110
+ - dict[ProductInfo, TriageData]: Parsed SBOM data.
111
+
112
+ """
98
113
self .logger .debug (
99
114
f"Processing SBOM { self .filename } of type { self .type .upper ()} "
100
115
)
@@ -115,16 +130,18 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
115
130
116
131
# Now process list of modules to create [vendor, product, version] tuples
117
132
parsed_data : list [ProductInfo ] = []
118
- for m in modules :
119
- if m and m [0 ]:
120
- # Using lower to normalize product names across databases
121
- product , version = m [0 ].lower (), m [1 ]
122
- if version != "" :
123
- # Now add vendor to create product record....
124
- vendor_set = self .get_vendor (product )
125
- for vendor in vendor_set :
126
- # if vendor is not None:
127
- parsed_data .append (ProductInfo (vendor , product , version ))
133
+ for module_vendor , product , version in modules :
134
+ # Using lower to normalize product names across databases
135
+ product = product .lower ()
136
+
137
+ if module_vendor is None :
138
+ # Now add vendor to create product record....
139
+ vendor_set = self .get_vendor (product )
140
+ for vendor in vendor_set :
141
+ # if vendor is not None:
142
+ parsed_data .append (ProductInfo (vendor , product , version ))
143
+ else :
144
+ parsed_data .append (ProductInfo (module_vendor , product , version ))
128
145
129
146
for row in parsed_data :
130
147
self .sbom_data [row ]["default" ] = {
@@ -138,9 +155,22 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
138
155
return self .sbom_data
139
156
140
157
def get_vendor (self , product : str ) -> list :
158
+ """
159
+ Get the list of vendors for the product name.
160
+
161
+ There may be more than one vendor for a given product name and all
162
+ matches are returned.
163
+
164
+ Args:
165
+ - product (str): Product name.
166
+
167
+ Returns:
168
+ - list: The list of vendors for the product
169
+
170
+ """
141
171
vendorlist : list [str ] = []
142
172
vendor_package_pair = self .cvedb .get_vendor_product_pairs (product )
143
- if vendor_package_pair != [] :
173
+ if vendor_package_pair :
144
174
# To handle multiple vendors, return all combinations of product/vendor mappings
145
175
for v in vendor_package_pair :
146
176
vendor = v ["vendor" ]
@@ -149,13 +179,34 @@ def get_vendor(self, product: str) -> list:
149
179
vendorlist .append ("UNKNOWN" )
150
180
return vendorlist
151
181
152
- def is_valid_purl (self , purl_string ):
153
- """Returns true if give purl_string is a valid purl string"""
154
- purl_pattern = r"^\w+://[\w\-.]+/[\w\-.]+(?:/[\w\-.]+)*(?:\?[\w\-.]+=[\w\-.]+(?:&[\w\-.]+=[\w\-.]+)*)?$"
182
+ def is_valid_purl (self , purl_string : str ):
183
+ """
184
+ Validate the PURL string is the correct form.
185
+
186
+ Args:
187
+ - purl_string (str): Package URL string
188
+
189
+ Returns:
190
+ - bool: True if the purl_string parameter is a valid purl string, False otherwise.
191
+
192
+ """
193
+ purl_pattern = r"^(?P<scheme>.+):(?P<type>.+)/(?P<namespace>.+)/(?P<name>.+)@(?P<version>.+)\??(?P<qualifiers>.*)#?(?P<subpath>.*)$"
155
194
return re .match (purl_pattern , purl_string ) is not None
156
195
157
- def parse_sbom (self ):
158
- """parse SBOM, using PURL identifiers preferentially if found"""
196
+ def parse_sbom (self ) -> [(str , str , str )]:
197
+ """
198
+ Parse the SBOM to extract a list of modules, including vendor, product, and version information.
199
+
200
+ The parsed product information can be retrieved from different components of the SBOM, with the following order of preference:
201
+ 1. CPE 2.3 Identifiers
202
+ 2. CPE 2.2 Identifiers
203
+ 3. Package URLs (purl)
204
+ 4. Name and Version from the SBOM (Vendor will be unspecified)
205
+
206
+ Returns:
207
+ - List[(str, str, str)]: A list of tuples, each containing vendor, product, and version information for a module.
208
+
209
+ """
159
210
160
211
# Set up SBOM parser
161
212
sbom_parser = SBOMParser (sbom_type = self .type )
@@ -173,28 +224,124 @@ def parse_sbom(self):
173
224
packages = [x for x in sbom_parser .get_sbom ()["packages" ].values ()]
174
225
LOGGER .debug (f"Parsed SBOM { self .filename } { packages } " )
175
226
for package in packages :
176
- purl_found = False
177
- # If PURL record found, use this data in preference to package data
227
+ vendor = None
228
+ package_name = None
229
+ version = None
230
+
231
+ # If Package URL or CPE record found, use this data in preference to package data
178
232
ext_ref = package .get ("externalreference" )
179
233
if ext_ref is not None :
180
- for ref in ext_ref :
181
- if ref [1 ] == "purl" :
182
- if self .is_valid_purl (ref [2 ]):
183
- # Process purl identifier
184
- purl_info = PackageURL .from_string (ref [2 ]).to_dict ()
185
- if purl_info ["name" ] and purl_info ["version" ]:
186
- modules .append (
187
- [purl_info ["name" ], purl_info ["version" ]]
188
- )
189
- purl_found = True
190
- if not purl_found :
191
- if package .get ("version" ) is not None :
192
- modules .append ([package ["name" ], package ["version" ]])
193
- else :
194
- LOGGER .debug (f"No version found in { package } " )
234
+ vendor , package_name , version = self .parse_ext_ref (ext_ref = ext_ref )
235
+
236
+ # For any data not found in CPE or the Package URL get from package data
237
+ if not vendor :
238
+ pass # Because no vendor was detected then all vendors with this named package
239
+ # will be included in the output.
240
+
241
+ if not package_name :
242
+ package_name = package ["name" ]
243
+
244
+ if (not version ) and (package .get ("version" ) is not None ):
245
+ version = package ["version" ]
246
+ else :
247
+ LOGGER .debug (f"No version found in { package } " )
248
+
249
+ if version :
250
+ # Found at least package and version, save the results
251
+ modules .append ([vendor , package_name , version ])
252
+
195
253
LOGGER .debug (f"Parsed SBOM { self .filename } { modules } " )
196
254
return modules
197
255
256
+ def parse_ext_ref (self , ext_ref ) -> (str | None , str | None , str | None ):
257
+ """
258
+ Parse external references in an SBOM to extract module information.
259
+
260
+ Two passes are made through the external references, giving priority to CPE types,
261
+ which will always match the CVE database.
262
+
263
+ Args:
264
+ - ext_ref (List[List[str]]): List of lists representing external references.
265
+ Each inner list contains [category, type, locator].
266
+
267
+ Returns:
268
+ - Optional[Tuple[str | None, str | None, str | None]]: A tuple containing the vendor, product, and version
269
+ information extracted from the external references, or None if not found.
270
+
271
+ """
272
+ decoded = {}
273
+ for ref in ext_ref :
274
+ if ref [1 ] == "cpe23Type" :
275
+ decoded ["cpe23Type" ] = self .decode_cpe23 (ref [2 ])
276
+
277
+ elif ref [1 ] == "cpe22Type" :
278
+ decoded ["cpe22Type" ] = self .decode_cpe22 (ref [2 ])
279
+
280
+ elif ref [1 ] == "purl" :
281
+ decoded ["purl" ] = self .decode_purl (ref [2 ])
282
+
283
+ # No ext-ref matches, return none
284
+ return decoded .get (
285
+ "cpe23Type" ,
286
+ decoded .get ("cpe22Type" , decoded .get ("purl" , (None , None , None ))),
287
+ )
288
+
289
+ def decode_cpe22 (self , cpe22 ) -> (str | None , str | None , str | None ):
290
+ """
291
+ Decode a CPE 2.2 formatted string to extract vendor, product, and version information.
292
+
293
+ Args:
294
+ - cpe22 (str): CPE 2.2 formatted string.
295
+
296
+ Returns:
297
+ - Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
298
+ information extracted from the CPE 2.2 string, or None if the information is incomplete.
299
+
300
+ """
301
+ cpe = cpe22 .split (":" )
302
+ vendor , product , version = cpe [2 ], cpe [3 ], cpe [4 ]
303
+ # Return available data, convert empty fields to None
304
+ return [vendor or None , product or None , version or None ]
305
+
306
+ def decode_cpe23 (self , cpe23 ) -> (str | None , str | None , str | None ):
307
+ """
308
+ Decode a CPE 2.3 formatted string to extract vendor, product, and version information.
309
+
310
+ Args:
311
+ - cpe23 (str): CPE 2.3 formatted string.
312
+
313
+ Returns:
314
+ - Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
315
+ information extracted from the CPE 2.3 string, or None if the information is incomplete.
316
+
317
+ """
318
+ cpe = cpe23 .split (":" )
319
+ vendor , product , version = cpe [3 ], cpe [4 ], cpe [5 ]
320
+ # Return available data, convert empty fields to None
321
+ return [vendor or None , product or None , version or None ]
322
+
323
+ def decode_purl (self , purl ) -> (str | None , str | None , str | None ):
324
+ """
325
+ Decode a Package URL (purl) to extract version information.
326
+
327
+ Args:
328
+ - purl (str): Package URL (purl) string.
329
+
330
+ Returns:
331
+ - Tuple[str | None, str | None, str | None]: A tuple containing the vendor (which is always None for purl),
332
+ product, and version information extracted from the purl string, or None if the purl is invalid or incomplete.
333
+
334
+ """
335
+ vendor = None # Because the vendor and product identifiers in the purl don't always align
336
+ product = None # with the CVE DB, only the version is parsed.
337
+ version = None
338
+ if self .is_valid_purl (purl ):
339
+ # Process purl identifier
340
+ purl_info = PackageURL .from_string (purl ).to_dict ()
341
+ version = purl_info .get ("version" )
342
+
343
+ return [vendor or None , product or None , version or None ]
344
+
198
345
199
346
if __name__ == "__main__" :
200
347
import sys
0 commit comments