2323
2424class SBOMManager :
2525 """
26- SBOMManager is a class that manages the Software Bill of Materials (SBOM) data.
26+ Class: InputEngine
27+
28+ This class is responsible for parsing various SBOM file formats (SPDX, CycloneDX, SWID) in the CVE Bin Tool.
29+
2730 It provides methods for scanning SBOM files, parsing them, and retrieving vendor information.
31+
32+ Attributes:
33+ - sbom_data (DefaultDict[ProductInfo, TriageData]): Dictionary containing parsed SBOM data.
34+
2835 """
2936
3037 SBOMtype = ["spdx" , "cyclonedx" , "swid" ]
@@ -95,6 +102,14 @@ def common_prefix_split(self, product, version) -> list[ProductInfo]:
95102 return parsed_data
96103
97104 def scan_file (self ) -> dict [ProductInfo , TriageData ]:
105+ """
106+ Parses the SBOM input file and returns the product information and
107+ corresponding triage data.
108+
109+ Returns:
110+ - dict[ProductInfo, TriageData]: Parsed SBOM data.
111+
112+ """
98113 self .logger .debug (
99114 f"Processing SBOM { self .filename } of type { self .type .upper ()} "
100115 )
@@ -115,16 +130,18 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
115130
116131 # Now process list of modules to create [vendor, product, version] tuples
117132 parsed_data : list [ProductInfo ] = []
118- for m in modules :
119- if m and m [0 ]:
120- # Using lower to normalize product names across databases
121- product , version = m [0 ].lower (), m [1 ]
122- if version != "" :
123- # Now add vendor to create product record....
124- vendor_set = self .get_vendor (product )
125- for vendor in vendor_set :
126- # if vendor is not None:
127- parsed_data .append (ProductInfo (vendor , product , version ))
133+ for module_vendor , product , version in modules :
134+ # Using lower to normalize product names across databases
135+ product = product .lower ()
136+
137+ if module_vendor is None :
138+ # Now add vendor to create product record....
139+ vendor_set = self .get_vendor (product )
140+ for vendor in vendor_set :
141+ # if vendor is not None:
142+ parsed_data .append (ProductInfo (vendor , product , version ))
143+ else :
144+ parsed_data .append (ProductInfo (module_vendor , product , version ))
128145
129146 for row in parsed_data :
130147 self .sbom_data [row ]["default" ] = {
@@ -138,9 +155,22 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
138155 return self .sbom_data
139156
140157 def get_vendor (self , product : str ) -> list :
158+ """
159+ Get the list of vendors for the product name.
160+
161+ There may be more than one vendor for a given product name and all
162+ matches are returned.
163+
164+ Args:
165+ - product (str): Product name.
166+
167+ Returns:
168+ - list: The list of vendors for the product
169+
170+ """
141171 vendorlist : list [str ] = []
142172 vendor_package_pair = self .cvedb .get_vendor_product_pairs (product )
143- if vendor_package_pair != [] :
173+ if vendor_package_pair :
144174 # To handle multiple vendors, return all combinations of product/vendor mappings
145175 for v in vendor_package_pair :
146176 vendor = v ["vendor" ]
@@ -149,13 +179,34 @@ def get_vendor(self, product: str) -> list:
149179 vendorlist .append ("UNKNOWN" )
150180 return vendorlist
151181
152- def is_valid_purl (self , purl_string ):
153- """Returns true if give purl_string is a valid purl string"""
154- purl_pattern = r"^\w+://[\w\-.]+/[\w\-.]+(?:/[\w\-.]+)*(?:\?[\w\-.]+=[\w\-.]+(?:&[\w\-.]+=[\w\-.]+)*)?$"
182+ def is_valid_purl (self , purl_string : str ):
183+ """
184+ Validate the PURL string is the correct form.
185+
186+ Args:
187+ - purl_string (str): Package URL string
188+
189+ Returns:
190+ - bool: True if the purl_string parameter is a valid purl string, False otherwise.
191+
192+ """
193+ purl_pattern = r"^(?P<scheme>.+):(?P<type>.+)/(?P<namespace>.+)/(?P<name>.+)@(?P<version>.+)\??(?P<qualifiers>.*)#?(?P<subpath>.*)$"
155194 return re .match (purl_pattern , purl_string ) is not None
156195
157- def parse_sbom (self ):
158- """parse SBOM, using PURL identifiers preferentially if found"""
196+ def parse_sbom (self ) -> [(str , str , str )]:
197+ """
198+ Parse the SBOM to extract a list of modules, including vendor, product, and version information.
199+
200+ The parsed product information can be retrieved from different components of the SBOM, with the following order of preference:
201+ 1. CPE 2.3 Identifiers
202+ 2. CPE 2.2 Identifiers
203+ 3. Package URLs (purl)
204+ 4. Name and Version from the SBOM (Vendor will be unspecified)
205+
206+ Returns:
207+ - List[(str, str, str)]: A list of tuples, each containing vendor, product, and version information for a module.
208+
209+ """
159210
160211 # Set up SBOM parser
161212 sbom_parser = SBOMParser (sbom_type = self .type )
@@ -173,28 +224,124 @@ def parse_sbom(self):
173224 packages = [x for x in sbom_parser .get_sbom ()["packages" ].values ()]
174225 LOGGER .debug (f"Parsed SBOM { self .filename } { packages } " )
175226 for package in packages :
176- purl_found = False
177- # If PURL record found, use this data in preference to package data
227+ vendor = None
228+ package_name = None
229+ version = None
230+
231+ # If Package URL or CPE record found, use this data in preference to package data
178232 ext_ref = package .get ("externalreference" )
179233 if ext_ref is not None :
180- for ref in ext_ref :
181- if ref [1 ] == "purl" :
182- if self .is_valid_purl (ref [2 ]):
183- # Process purl identifier
184- purl_info = PackageURL .from_string (ref [2 ]).to_dict ()
185- if purl_info ["name" ] and purl_info ["version" ]:
186- modules .append (
187- [purl_info ["name" ], purl_info ["version" ]]
188- )
189- purl_found = True
190- if not purl_found :
191- if package .get ("version" ) is not None :
192- modules .append ([package ["name" ], package ["version" ]])
193- else :
194- LOGGER .debug (f"No version found in { package } " )
234+ vendor , package_name , version = self .parse_ext_ref (ext_ref = ext_ref )
235+
236+ # For any data not found in CPE or the Package URL get from package data
237+ if not vendor :
238+ pass # Because no vendor was detected then all vendors with this named package
239+ # will be included in the output.
240+
241+ if not package_name :
242+ package_name = package ["name" ]
243+
244+ if (not version ) and (package .get ("version" ) is not None ):
245+ version = package ["version" ]
246+ else :
247+ LOGGER .debug (f"No version found in { package } " )
248+
249+ if version :
250+ # Found at least package and version, save the results
251+ modules .append ([vendor , package_name , version ])
252+
195253 LOGGER .debug (f"Parsed SBOM { self .filename } { modules } " )
196254 return modules
197255
256+ def parse_ext_ref (self , ext_ref ) -> (str | None , str | None , str | None ):
257+ """
258+ Parse external references in an SBOM to extract module information.
259+
260+ Two passes are made through the external references, giving priority to CPE types,
261+ which will always match the CVE database.
262+
263+ Args:
264+ - ext_ref (List[List[str]]): List of lists representing external references.
265+ Each inner list contains [category, type, locator].
266+
267+ Returns:
268+ - Optional[Tuple[str | None, str | None, str | None]]: A tuple containing the vendor, product, and version
269+ information extracted from the external references, or None if not found.
270+
271+ """
272+ decoded = {}
273+ for ref in ext_ref :
274+ if ref [1 ] == "cpe23Type" :
275+ decoded ["cpe23Type" ] = self .decode_cpe23 (ref [2 ])
276+
277+ elif ref [1 ] == "cpe22Type" :
278+ decoded ["cpe22Type" ] = self .decode_cpe22 (ref [2 ])
279+
280+ elif ref [1 ] == "purl" :
281+ decoded ["purl" ] = self .decode_purl (ref [2 ])
282+
283+ # No ext-ref matches, return none
284+ return decoded .get (
285+ "cpe23Type" ,
286+ decoded .get ("cpe22Type" , decoded .get ("purl" , (None , None , None ))),
287+ )
288+
289+ def decode_cpe22 (self , cpe22 ) -> (str | None , str | None , str | None ):
290+ """
291+ Decode a CPE 2.2 formatted string to extract vendor, product, and version information.
292+
293+ Args:
294+ - cpe22 (str): CPE 2.2 formatted string.
295+
296+ Returns:
297+ - Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
298+ information extracted from the CPE 2.2 string, or None if the information is incomplete.
299+
300+ """
301+ cpe = cpe22 .split (":" )
302+ vendor , product , version = cpe [2 ], cpe [3 ], cpe [4 ]
303+ # Return available data, convert empty fields to None
304+ return [vendor or None , product or None , version or None ]
305+
306+ def decode_cpe23 (self , cpe23 ) -> (str | None , str | None , str | None ):
307+ """
308+ Decode a CPE 2.3 formatted string to extract vendor, product, and version information.
309+
310+ Args:
311+ - cpe23 (str): CPE 2.3 formatted string.
312+
313+ Returns:
314+ - Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
315+ information extracted from the CPE 2.3 string, or None if the information is incomplete.
316+
317+ """
318+ cpe = cpe23 .split (":" )
319+ vendor , product , version = cpe [3 ], cpe [4 ], cpe [5 ]
320+ # Return available data, convert empty fields to None
321+ return [vendor or None , product or None , version or None ]
322+
323+ def decode_purl (self , purl ) -> (str | None , str | None , str | None ):
324+ """
325+ Decode a Package URL (purl) to extract version information.
326+
327+ Args:
328+ - purl (str): Package URL (purl) string.
329+
330+ Returns:
331+ - Tuple[str | None, str | None, str | None]: A tuple containing the vendor (which is always None for purl),
332+ product, and version information extracted from the purl string, or None if the purl is invalid or incomplete.
333+
334+ """
335+ vendor = None # Because the vendor and product identifiers in the purl don't always align
336+ product = None # with the CVE DB, only the version is parsed.
337+ version = None
338+ if self .is_valid_purl (purl ):
339+ # Process purl identifier
340+ purl_info = PackageURL .from_string (purl ).to_dict ()
341+ version = purl_info .get ("version" )
342+
343+ return [vendor or None , product or None , version or None ]
344+
198345
199346if __name__ == "__main__" :
200347 import sys
0 commit comments