9292from spdx_tools .spdx .parser .error import SPDXParsingError
9393from spdx_tools .spdx .parser .parse_anything import parse_file
9494
95+
96+ # Returns SPDX Document object on success, otherwise exits on parse failure
97+ def spdx_parse (file ):
98+ print ("Parsing SPDX file..." )
99+ start = time .process_time ()
100+ try :
101+ document : Document = parse_file (file )
102+ print (f"SPDX parsing took { time .process_time () - start } seconds" )
103+ return (document )
104+ except SPDXParsingError :
105+ logging .exception ("Failed to parse spdx file" )
106+ sys .exit (1 )
107+
108+ # Validates the SPDX file. Logs all validation messages as warnings.
109+ def spdx_validate (document ):
110+ print ("Validating SPDX file..." )
111+ start = time .process_time ()
112+ validation_messages = validate_full_spdx_document (document )
113+ print (f"SPDX validation took { time .process_time () - start } seconds" )
114+
115+ # TODO is there a way to distinguish between something fatal and something
116+ # BD can deal with?
117+ # TODO - this can take forever, so add an optional --skip-validation flag
118+ for validation_message in validation_messages :
119+ # Just printing these messages intead of exiting. Later when we try to import
120+ # the file to BD, let's plan to exit if it fails. Seeing lots of errors in the
121+ # sample data.
122+ logging .warning (validation_message .validation_message )
123+
124+ # Lookup the given matchname in the KB
125+ # Logs a successful match
126+ # Return the boolean purlmatch and matchname, which we might change from
127+ # its original value -- we will force it to be the same as the name in BD.
128+ # That way we can more accurately search the BOM later.
129+ def find_comp_in_kb (matchname , extref ):
130+ # KB lookup to check for pURL match
131+ purlmatch = False
132+ params = {
133+ 'packageUrl' : extref
134+ }
135+ # TODO any other action to take here?
136+ # We should probably track KB matches?
137+ for result in bd .get_items ("/api/search/purl-components" , params = params ):
138+ # do we need to worry about more than 1 match?
139+ #print(f"Found KB match for {extref}")
140+ purlmatch = True
141+ # in this event, override the spdx name and use the known KB name
142+ # (is version mangling possible??)
143+ if matchname != result ['componentName' ]:
144+ print (f"updating { matchname } -> { result ['componentName' ]} " )
145+ return (purlmatch , result ['componentName' ])
146+ return (purlmatch , matchname )
147+
95148# Locate component name + version in BOM
96149# Returns True on success, False on failure
97150def find_comp_in_bom (bd , compname , compver , projver ):
@@ -109,17 +162,89 @@ def find_comp_in_bom(bd, compname, compver, projver):
109162 for comp in comps :
110163 if comp ['componentName' ] != compname :
111164 # The BD API search is inexact. Force our match to be precise.
112- print (f"fuzzy match failed us: { comp ['componentName' ]} vs { compname } " )
165+ # print(f"fuzzy match failed us: {comp['componentName']} vs {compname}")
113166 continue
114167 # Check component name + version name
115168 if comp ['componentVersionName' ] == compver :
116169 return True
117170 return False
118171
119- logging .basicConfig (
120- level = logging .INFO ,
121- format = "[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
122- )
172+
173+ # Returns:
174+ # CompMatch - Contains matched component object, None for no match
175+ # FoundVer - Boolen: True if matched the custom component version
176+ def find_cust_comp (cust_comp_name , cust_comp_version ):
177+ params = {
178+ 'q' : [f"name:{ cust_comp_name } " ]
179+ }
180+
181+ matched_comp = None
182+ # Relies on internal header
183+ headers = {'Accept' : 'application/vnd.blackducksoftware.internal-1+json' }
184+ ver_match = False
185+ for comp in bd .get_resource ('components' , params = params , headers = headers ):
186+ print (f"{ comp ['name' ]} " )
187+ if cust_comp_name != comp ['name' ]:
188+ # Skip it. We want to be precise in our matching, despite the API.
189+ continue
190+ matched_comp = comp
191+ # Check version
192+ for version in bd .get_resource ('versions' , comp ):
193+ if cust_comp_version == version ['versionName' ]:
194+ # Successfully matched both name and version
195+ ver_match = True
196+ return (matched_comp , ver_match )
197+
198+ return (matched_comp , ver_match )
199+
200+
201+ # Returns URL of matching license
202+ # Exits on failure, we assume it must exist - TODO could probably just create this?
203+ def get_license_url (license_name ):
204+ params = {
205+ 'q' : [f"name:{ license_name } " ]
206+ }
207+ for result in bd .get_items ("/api/licenses" , params = params ):
208+ # Added precise matching in case of a situation like "NOASSERTION" & "NOASSERTION2"
209+ if (result ['name' ] == license_name ):
210+ return (result ['_meta' ]['href' ])
211+
212+ logging .error (f"Failed to find license { license_name } " )
213+ sys .exit (1 )
214+
215+ def create_cust_comp (name , version , license , approval ):
216+ print (f"Adding custom component: { name } { version } " )
217+ license_url = get_license_url (license )
218+ data = {
219+ 'name' : name ,
220+ 'version' : {
221+ 'versionName' : version ,
222+ 'license' : {
223+ 'license' : license_url
224+ },
225+ },
226+ 'approvalStatus' : approval
227+ }
228+ response = bd .session .post ("api/components" , json = data )
229+ pprint (response )
230+
231+ # TODO validate response
232+ # looks like a 412 if it already existed
233+
234+ # Create a version for a custom component that already exists
235+ # The comp argument is the component object from previous lookup
236+ def create_cust_comp_ver (comp , version , license ):
237+ print (f"Adding version { version } to custom component { comp ['name' ]} " )
238+ license_url = get_license_url (license )
239+ data = {
240+ 'versionName' : version ,
241+ 'license' : {
242+ 'license' : license_url
243+ },
244+ }
245+ response = bd .session .post (comp ['_meta' ]['href' ] + "/versions" , json = data )
246+ pprint (response )
247+ # TODO validate response
123248
124249parser = argparse .ArgumentParser (description = "Parse SPDX file and verify if component names are in current SBOM for given project-version" )
125250parser .add_argument ("--base-url" , required = True , help = "Hub server URL e.g. https://your.blackduck.url" )
@@ -129,38 +254,25 @@ def find_comp_in_bom(bd, compname, compver, projver):
129254parser .add_argument ("--project" , dest = 'project_name' , required = True , help = "Project that contains the BOM components" )
130255parser .add_argument ("--version" , dest = 'version_name' , required = True , help = "Version that contains the BOM components" )
131256parser .add_argument ("--no-verify" , dest = 'verify' , action = 'store_false' , help = "Disable TLS certificate verification" )
257+ parser .add_argument ("--no-spdx-validate" , dest = 'spdx_validate' , action = 'store_false' , help = "Disable SPDX validation" )
132258args = parser .parse_args ()
133259
134- # Parse SPDX file. This can take a very long time, so do this first.
135- # Returns a Document object on success, otherwise raises an SPDXParsingError
136- try :
137- print ("Reading SPDX file..." )
138- start = time .process_time ()
139- document : Document = parse_file (args .spdx_file )
140- print (f"SPDX parsing took { time .process_time () - start } seconds" )
141- except SPDXParsingError :
142- logging .exception ("Failed to parse spdx file" )
143- sys .exit (1 )
144-
145- # TODO also validate the file, which is an extra step once you have a document?
146- print ("Validating SPDX file..." )
147- start = time .process_time ()
148- validation_messages = validate_full_spdx_document (document )
149- print (f"SPDX validation took { time .process_time () - start } seconds" )
260+ logging .basicConfig (
261+ level = logging .INFO ,
262+ format = "[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
263+ )
150264
151- # TODO is there a way to distinguish between something fatal and something
152- # BD can deal with?
153- # I guess we can just print all the msgs and then also exit when the import fails..
154- for validation_message in validation_messages :
155- logging .warning (validation_message .validation_message )
265+ document = spdx_parse (args .spdx_file )
266+ if (args .spdx_validate ):
267+ spdx_validate (document )
156268
157269with open (args .token_file , 'r' ) as tf :
158270 access_token = tf .readline ().strip ()
159271
160272bd = Client (base_url = args .base_url , token = access_token , verify = args .verify )
161273
162274# Open unmatched component file
163- # Will save name, spdxid, version, and origin/purl (if available) like so :
275+ # Will save name, spdxid, version, and origin/purl for later in json format :
164276# "name": "react-bootstrap",
165277# "spdx_id": "SPDXRef-Pkg-react-bootstrap-2.1.2-30223",
166278# "version": "2.1.2",
@@ -195,12 +307,6 @@ def find_comp_in_bom(bd, compname, compver, projver):
195307
196308logging .debug (f"Found { project ['name' ]} :{ version ['versionName' ]} " )
197309
198- # Can now access attributes from the parsed document
199- # Note: The SPDX module renames tags slightly from the original json format.
200-
201- matches = 0
202- nopurl = 0
203- nomatch = 0
204310
205311# situations to consider + actions
206312# 1) No purl available : check SBOM for comp+ver, then add cust comp + add to SBOM
@@ -212,41 +318,33 @@ def find_comp_in_bom(bd, compname, compver, projver):
212318# - In SBOM? (maybe already added or whatever?) -> done
213319# - Else -> add cust comp + add to SBOM (same as 1)
214320
215- # Walk through each component in the SPDX file
321+ # Stats to track
322+ bom_matches = 0
323+ kb_matches = 0
324+ nopurl = 0
325+ nomatch = 0
216326package_count = 0
327+ cust_comp_count = 0
328+ cust_ver_count = 0
329+ # Saving all encountered components by their name+version (watching for repeats)
217330packages = {}
331+
332+ # Walk through each component in the SPDX file
218333for package in document .packages :
219334 package_count += 1
220- # spdx-tools module says only name, spdx_id, download_location are required
221335 # We hope we'll have an external reference (pURL), but we might not.
222336 extref = None
223337 purlmatch = False
224338 matchname = package .name
225339 matchver = package .version
226- packages [ package . name + package . version ] = packages . get ( package . name + package . version , 0 ) + 1
227- #blah['zzz' ] = blah .get('zzz' , 0) + 1
340+ # Tracking unique package name + version from spdx file
341+ packages [ matchname + matchver ] = packages .get (matchname + matchver , 0 ) + 1
228342
229- # NOTE: BD can mangle the original component name
343+ # NOTE: BD can change the original component name
230344 # EX: "React" -> "React from Facebook"
231345 if package .external_references :
232- extref = package .external_references [0 ].locator
233-
234- # KB lookup to check for pURL match
235- params = {
236- 'packageUrl' : extref
237- }
238- for result in bd .get_items ("/api/search/purl-components" , params = params ):
239- # do we need to worry about more than 1 match?
240- print (f"Found KB match for { extref } " )
241- purlmatch = True
242- #pprint(result)
243- # in this event, override the spdx name and use the known KB name
244- # (any concern for version mangling??)
245- if matchname != result ['componentName' ]:
246- print (f"updating { matchname } -> { result ['componentName' ]} " )
247- matchname = result ['componentName' ]
248- # Any match means we should already have it
249- # But we will also check to see if the comp is in the BOM i guess
346+ inkb , matchname = find_comp_in_kb (matchname , package .external_references [0 ].locator )
347+ if inkb : kb_matches += 1
250348 else :
251349 nopurl += 1
252350 print ("No pURL found for component: " )
@@ -255,22 +353,38 @@ def find_comp_in_bom(bd, compname, compver, projver):
255353 print (" " + package .version )
256354
257355 if find_comp_in_bom (bd , matchname , matchver , version ):
258- matches += 1
259- print (" Found comp match in BOM: " + matchname + matchver )
356+ bom_matches += 1
357+ # print(" Found comp match in BOM: " + matchname + matchver)
260358 else :
261- # TODO:
262- # 1) check if in custom component list (system-wide)
263- # 2) add if not there
264- # 3) add to project BOM
265359 nomatch += 1
266- print (" Need to add custom comp: " + package .name )
267360 comp_data = {
268361 "name" : package .name ,
269362 "spdx_id" : package .spdx_id ,
270363 "version" : package .version ,
271364 "origin" : extref
272365 }
273366 comps_out .append (comp_data )
367+
368+ # Check if custom component already exists
369+ comp_match , found_ver = find_cust_comp (package .name , package .version )
370+
371+ # TODO make these optional args with defaults
372+ license = "NOASSERTION"
373+ approval = "UNREVIEWED"
374+ if not comp_match :
375+ cust_comp_count += 1
376+ create_cust_comp (package .name , package .version , license , approval )
377+ elif comp_match and not found_ver :
378+ cust_ver_count += 1
379+ print ("Adding custom component version..." )
380+ create_cust_comp_ver (comp_match , package .version , license )
381+ else :
382+ # nothing to do?
383+ print ("probably found name and ver" )
384+
385+ # TODO write sbom add code
386+ #add_to_sbom()
387+
274388
275389# Save unmatched components
276390json .dump (comps_out , outfile )
@@ -280,52 +394,11 @@ def find_comp_in_bom(bd, compname, compver, projver):
280394print ("------" )
281395print (f" SPDX packages processed: { package_count } " )
282396print (f" Non matches: { nomatch } " )
283- print (f" Matches: { matches } " )
397+ print (f" KB matches: { kb_matches } " )
398+ print (f" BOM matches: { bom_matches } " )
284399print (f" Packages missing purl: { nopurl } " )
400+ print (f" Custom components created: { cust_comp_count } " )
401+ print (f" Custom component versions created: { cust_ver_count } " )
285402
286- pprint (packages )
403+ # pprint(packages)
287404print (f" { len (packages )} unique packages processed" )
288- # Parsed SPDX package data looks like
289- # Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343',
290- # name='micromatch',
291- # download_location=NOASSERTION,
292- # version='4.0.2',
293- # file_name=None,
294- # supplier=None,
295- # originator=None,
296- # files_analyzed=True,
297- # verification_code=PackageVerificationCode(value='600ce1a1b891b48a20a3d395e4714f854dc6ced4',
298- # excluded_files=[]),
299- # checksums=[],
300- # homepage='https://www.npmjs.com/package/micromatch',
301- # source_info=None,
302- # license_concluded=LicenseSymbol('MIT',
303- # is_exception=False),
304- # license_info_from_files=[LicenseSymbol('Apache-2.0',
305- # is_exception=False),
306- # LicenseSymbol('BSD-2-Clause',
307- # is_exception=False),
308- # LicenseSymbol('ISC',
309- # is_exception=False),
310- # LicenseSymbol('JSON',
311- # is_exception=False),
312- # LicenseSymbol('LicenseRef-Historical-Permission-Notice-and-Disclaimer---sell-variant',
313- # is_exception=False),
314- # LicenseSymbol('LicenseRef-MIT-Open-Group-variant',
315- # is_exception=False)],
316- # license_declared=LicenseSymbol('MIT',
317- # is_exception=False),
318- # license_comment=None,
319- # copyright_text=NOASSERTION,
320- # summary=None,
321- # description=None,
322- # comment=None,
323- # external_references=[ExternalPackageRef(category=<ExternalPackageRefCategory.PACKAGE_MANAGER: 2>,
324- # reference_type='purl',
325- # locator='pkg:npm/[email protected] ', 326- # comment=None)],
327- # attribution_texts=[],
328- # primary_package_purpose=None,
329- # release_date=None,
330- # built_date=None,
331- # valid_until_date=None)
0 commit comments