92
92
from spdx_tools .spdx .parser .error import SPDXParsingError
93
93
from spdx_tools .spdx .parser .parse_anything import parse_file
94
94
95
+
96
+ # Returns SPDX Document object on success, otherwise exits on parse failure
97
+ def spdx_parse (file ):
98
+ print ("Parsing SPDX file..." )
99
+ start = time .process_time ()
100
+ try :
101
+ document : Document = parse_file (file )
102
+ print (f"SPDX parsing took { time .process_time () - start } seconds" )
103
+ return (document )
104
+ except SPDXParsingError :
105
+ logging .exception ("Failed to parse spdx file" )
106
+ sys .exit (1 )
107
+
108
+ # Validates the SPDX file. Logs all validation messages as warnings.
109
+ def spdx_validate (document ):
110
+ print ("Validating SPDX file..." )
111
+ start = time .process_time ()
112
+ validation_messages = validate_full_spdx_document (document )
113
+ print (f"SPDX validation took { time .process_time () - start } seconds" )
114
+
115
+ # TODO is there a way to distinguish between something fatal and something
116
+ # BD can deal with?
117
+ # TODO - this can take forever, so add an optional --skip-validation flag
118
+ for validation_message in validation_messages :
119
+ # Just printing these messages intead of exiting. Later when we try to import
120
+ # the file to BD, let's plan to exit if it fails. Seeing lots of errors in the
121
+ # sample data.
122
+ logging .warning (validation_message .validation_message )
123
+
124
+ # Lookup the given matchname in the KB
125
+ # Logs a successful match
126
+ # Return the boolean purlmatch and matchname, which we might change from
127
+ # its original value -- we will force it to be the same as the name in BD.
128
+ # That way we can more accurately search the BOM later.
129
+ def find_comp_in_kb (matchname , extref ):
130
+ # KB lookup to check for pURL match
131
+ purlmatch = False
132
+ params = {
133
+ 'packageUrl' : extref
134
+ }
135
+ # TODO any other action to take here?
136
+ # We should probably track KB matches?
137
+ for result in bd .get_items ("/api/search/purl-components" , params = params ):
138
+ # do we need to worry about more than 1 match?
139
+ #print(f"Found KB match for {extref}")
140
+ purlmatch = True
141
+ # in this event, override the spdx name and use the known KB name
142
+ # (is version mangling possible??)
143
+ if matchname != result ['componentName' ]:
144
+ print (f"updating { matchname } -> { result ['componentName' ]} " )
145
+ return (purlmatch , result ['componentName' ])
146
+ return (purlmatch , matchname )
147
+
95
148
# Locate component name + version in BOM
96
149
# Returns True on success, False on failure
97
150
def find_comp_in_bom (bd , compname , compver , projver ):
@@ -109,17 +162,89 @@ def find_comp_in_bom(bd, compname, compver, projver):
109
162
for comp in comps :
110
163
if comp ['componentName' ] != compname :
111
164
# The BD API search is inexact. Force our match to be precise.
112
- print (f"fuzzy match failed us: { comp ['componentName' ]} vs { compname } " )
165
+ # print(f"fuzzy match failed us: {comp['componentName']} vs {compname}")
113
166
continue
114
167
# Check component name + version name
115
168
if comp ['componentVersionName' ] == compver :
116
169
return True
117
170
return False
118
171
119
- logging .basicConfig (
120
- level = logging .INFO ,
121
- format = "[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
122
- )
172
+
173
+ # Returns:
174
+ # CompMatch - Contains matched component object, None for no match
175
+ # FoundVer - Boolen: True if matched the custom component version
176
+ def find_cust_comp (cust_comp_name , cust_comp_version ):
177
+ params = {
178
+ 'q' : [f"name:{ cust_comp_name } " ]
179
+ }
180
+
181
+ matched_comp = None
182
+ # Relies on internal header
183
+ headers = {'Accept' : 'application/vnd.blackducksoftware.internal-1+json' }
184
+ ver_match = False
185
+ for comp in bd .get_resource ('components' , params = params , headers = headers ):
186
+ print (f"{ comp ['name' ]} " )
187
+ if cust_comp_name != comp ['name' ]:
188
+ # Skip it. We want to be precise in our matching, despite the API.
189
+ continue
190
+ matched_comp = comp
191
+ # Check version
192
+ for version in bd .get_resource ('versions' , comp ):
193
+ if cust_comp_version == version ['versionName' ]:
194
+ # Successfully matched both name and version
195
+ ver_match = True
196
+ return (matched_comp , ver_match )
197
+
198
+ return (matched_comp , ver_match )
199
+
200
+
201
+ # Returns URL of matching license
202
+ # Exits on failure, we assume it must exist - TODO could probably just create this?
203
+ def get_license_url (license_name ):
204
+ params = {
205
+ 'q' : [f"name:{ license_name } " ]
206
+ }
207
+ for result in bd .get_items ("/api/licenses" , params = params ):
208
+ # Added precise matching in case of a situation like "NOASSERTION" & "NOASSERTION2"
209
+ if (result ['name' ] == license_name ):
210
+ return (result ['_meta' ]['href' ])
211
+
212
+ logging .error (f"Failed to find license { license_name } " )
213
+ sys .exit (1 )
214
+
215
+ def create_cust_comp (name , version , license , approval ):
216
+ print (f"Adding custom component: { name } { version } " )
217
+ license_url = get_license_url (license )
218
+ data = {
219
+ 'name' : name ,
220
+ 'version' : {
221
+ 'versionName' : version ,
222
+ 'license' : {
223
+ 'license' : license_url
224
+ },
225
+ },
226
+ 'approvalStatus' : approval
227
+ }
228
+ response = bd .session .post ("api/components" , json = data )
229
+ pprint (response )
230
+
231
+ # TODO validate response
232
+ # looks like a 412 if it already existed
233
+
234
+ # Create a version for a custom component that already exists
235
+ # The comp argument is the component object from previous lookup
236
+ def create_cust_comp_ver (comp , version , license ):
237
+ print (f"Adding version { version } to custom component { comp ['name' ]} " )
238
+ license_url = get_license_url (license )
239
+ data = {
240
+ 'versionName' : version ,
241
+ 'license' : {
242
+ 'license' : license_url
243
+ },
244
+ }
245
+ response = bd .session .post (comp ['_meta' ]['href' ] + "/versions" , json = data )
246
+ pprint (response )
247
+ # TODO validate response
123
248
124
249
parser = argparse .ArgumentParser (description = "Parse SPDX file and verify if component names are in current SBOM for given project-version" )
125
250
parser .add_argument ("--base-url" , required = True , help = "Hub server URL e.g. https://your.blackduck.url" )
@@ -129,38 +254,25 @@ def find_comp_in_bom(bd, compname, compver, projver):
129
254
parser .add_argument ("--project" , dest = 'project_name' , required = True , help = "Project that contains the BOM components" )
130
255
parser .add_argument ("--version" , dest = 'version_name' , required = True , help = "Version that contains the BOM components" )
131
256
parser .add_argument ("--no-verify" , dest = 'verify' , action = 'store_false' , help = "Disable TLS certificate verification" )
257
+ parser .add_argument ("--no-spdx-validate" , dest = 'spdx_validate' , action = 'store_false' , help = "Disable SPDX validation" )
132
258
args = parser .parse_args ()
133
259
134
- # Parse SPDX file. This can take a very long time, so do this first.
135
- # Returns a Document object on success, otherwise raises an SPDXParsingError
136
- try :
137
- print ("Reading SPDX file..." )
138
- start = time .process_time ()
139
- document : Document = parse_file (args .spdx_file )
140
- print (f"SPDX parsing took { time .process_time () - start } seconds" )
141
- except SPDXParsingError :
142
- logging .exception ("Failed to parse spdx file" )
143
- sys .exit (1 )
144
-
145
- # TODO also validate the file, which is an extra step once you have a document?
146
- print ("Validating SPDX file..." )
147
- start = time .process_time ()
148
- validation_messages = validate_full_spdx_document (document )
149
- print (f"SPDX validation took { time .process_time () - start } seconds" )
260
+ logging .basicConfig (
261
+ level = logging .INFO ,
262
+ format = "[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
263
+ )
150
264
151
- # TODO is there a way to distinguish between something fatal and something
152
- # BD can deal with?
153
- # I guess we can just print all the msgs and then also exit when the import fails..
154
- for validation_message in validation_messages :
155
- logging .warning (validation_message .validation_message )
265
+ document = spdx_parse (args .spdx_file )
266
+ if (args .spdx_validate ):
267
+ spdx_validate (document )
156
268
157
269
with open (args .token_file , 'r' ) as tf :
158
270
access_token = tf .readline ().strip ()
159
271
160
272
bd = Client (base_url = args .base_url , token = access_token , verify = args .verify )
161
273
162
274
# Open unmatched component file
163
- # Will save name, spdxid, version, and origin/purl (if available) like so :
275
+ # Will save name, spdxid, version, and origin/purl for later in json format :
164
276
# "name": "react-bootstrap",
165
277
# "spdx_id": "SPDXRef-Pkg-react-bootstrap-2.1.2-30223",
166
278
# "version": "2.1.2",
@@ -195,12 +307,6 @@ def find_comp_in_bom(bd, compname, compver, projver):
195
307
196
308
logging .debug (f"Found { project ['name' ]} :{ version ['versionName' ]} " )
197
309
198
- # Can now access attributes from the parsed document
199
- # Note: The SPDX module renames tags slightly from the original json format.
200
-
201
- matches = 0
202
- nopurl = 0
203
- nomatch = 0
204
310
205
311
# situations to consider + actions
206
312
# 1) No purl available : check SBOM for comp+ver, then add cust comp + add to SBOM
@@ -212,41 +318,33 @@ def find_comp_in_bom(bd, compname, compver, projver):
212
318
# - In SBOM? (maybe already added or whatever?) -> done
213
319
# - Else -> add cust comp + add to SBOM (same as 1)
214
320
215
- # Walk through each component in the SPDX file
321
+ # Stats to track
322
+ bom_matches = 0
323
+ kb_matches = 0
324
+ nopurl = 0
325
+ nomatch = 0
216
326
package_count = 0
327
+ cust_comp_count = 0
328
+ cust_ver_count = 0
329
+ # Saving all encountered components by their name+version (watching for repeats)
217
330
packages = {}
331
+
332
+ # Walk through each component in the SPDX file
218
333
for package in document .packages :
219
334
package_count += 1
220
- # spdx-tools module says only name, spdx_id, download_location are required
221
335
# We hope we'll have an external reference (pURL), but we might not.
222
336
extref = None
223
337
purlmatch = False
224
338
matchname = package .name
225
339
matchver = package .version
226
- packages [ package . name + package . version ] = packages . get ( package . name + package . version , 0 ) + 1
227
- #blah['zzz' ] = blah .get('zzz' , 0) + 1
340
+ # Tracking unique package name + version from spdx file
341
+ packages [ matchname + matchver ] = packages .get (matchname + matchver , 0 ) + 1
228
342
229
- # NOTE: BD can mangle the original component name
343
+ # NOTE: BD can change the original component name
230
344
# EX: "React" -> "React from Facebook"
231
345
if package .external_references :
232
- extref = package .external_references [0 ].locator
233
-
234
- # KB lookup to check for pURL match
235
- params = {
236
- 'packageUrl' : extref
237
- }
238
- for result in bd .get_items ("/api/search/purl-components" , params = params ):
239
- # do we need to worry about more than 1 match?
240
- print (f"Found KB match for { extref } " )
241
- purlmatch = True
242
- #pprint(result)
243
- # in this event, override the spdx name and use the known KB name
244
- # (any concern for version mangling??)
245
- if matchname != result ['componentName' ]:
246
- print (f"updating { matchname } -> { result ['componentName' ]} " )
247
- matchname = result ['componentName' ]
248
- # Any match means we should already have it
249
- # But we will also check to see if the comp is in the BOM i guess
346
+ inkb , matchname = find_comp_in_kb (matchname , package .external_references [0 ].locator )
347
+ if inkb : kb_matches += 1
250
348
else :
251
349
nopurl += 1
252
350
print ("No pURL found for component: " )
@@ -255,22 +353,38 @@ def find_comp_in_bom(bd, compname, compver, projver):
255
353
print (" " + package .version )
256
354
257
355
if find_comp_in_bom (bd , matchname , matchver , version ):
258
- matches += 1
259
- print (" Found comp match in BOM: " + matchname + matchver )
356
+ bom_matches += 1
357
+ # print(" Found comp match in BOM: " + matchname + matchver)
260
358
else :
261
- # TODO:
262
- # 1) check if in custom component list (system-wide)
263
- # 2) add if not there
264
- # 3) add to project BOM
265
359
nomatch += 1
266
- print (" Need to add custom comp: " + package .name )
267
360
comp_data = {
268
361
"name" : package .name ,
269
362
"spdx_id" : package .spdx_id ,
270
363
"version" : package .version ,
271
364
"origin" : extref
272
365
}
273
366
comps_out .append (comp_data )
367
+
368
+ # Check if custom component already exists
369
+ comp_match , found_ver = find_cust_comp (package .name , package .version )
370
+
371
+ # TODO make these optional args with defaults
372
+ license = "NOASSERTION"
373
+ approval = "UNREVIEWED"
374
+ if not comp_match :
375
+ cust_comp_count += 1
376
+ create_cust_comp (package .name , package .version , license , approval )
377
+ elif comp_match and not found_ver :
378
+ cust_ver_count += 1
379
+ print ("Adding custom component version..." )
380
+ create_cust_comp_ver (comp_match , package .version , license )
381
+ else :
382
+ # nothing to do?
383
+ print ("probably found name and ver" )
384
+
385
+ # TODO write sbom add code
386
+ #add_to_sbom()
387
+
274
388
275
389
# Save unmatched components
276
390
json .dump (comps_out , outfile )
@@ -280,52 +394,11 @@ def find_comp_in_bom(bd, compname, compver, projver):
280
394
print ("------" )
281
395
print (f" SPDX packages processed: { package_count } " )
282
396
print (f" Non matches: { nomatch } " )
283
- print (f" Matches: { matches } " )
397
+ print (f" KB matches: { kb_matches } " )
398
+ print (f" BOM matches: { bom_matches } " )
284
399
print (f" Packages missing purl: { nopurl } " )
400
+ print (f" Custom components created: { cust_comp_count } " )
401
+ print (f" Custom component versions created: { cust_ver_count } " )
285
402
286
- pprint (packages )
403
+ # pprint(packages)
287
404
print (f" { len (packages )} unique packages processed" )
288
- # Parsed SPDX package data looks like
289
- # Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343',
290
- # name='micromatch',
291
- # download_location=NOASSERTION,
292
- # version='4.0.2',
293
- # file_name=None,
294
- # supplier=None,
295
- # originator=None,
296
- # files_analyzed=True,
297
- # verification_code=PackageVerificationCode(value='600ce1a1b891b48a20a3d395e4714f854dc6ced4',
298
- # excluded_files=[]),
299
- # checksums=[],
300
- # homepage='https://www.npmjs.com/package/micromatch',
301
- # source_info=None,
302
- # license_concluded=LicenseSymbol('MIT',
303
- # is_exception=False),
304
- # license_info_from_files=[LicenseSymbol('Apache-2.0',
305
- # is_exception=False),
306
- # LicenseSymbol('BSD-2-Clause',
307
- # is_exception=False),
308
- # LicenseSymbol('ISC',
309
- # is_exception=False),
310
- # LicenseSymbol('JSON',
311
- # is_exception=False),
312
- # LicenseSymbol('LicenseRef-Historical-Permission-Notice-and-Disclaimer---sell-variant',
313
- # is_exception=False),
314
- # LicenseSymbol('LicenseRef-MIT-Open-Group-variant',
315
- # is_exception=False)],
316
- # license_declared=LicenseSymbol('MIT',
317
- # is_exception=False),
318
- # license_comment=None,
319
- # copyright_text=NOASSERTION,
320
- # summary=None,
321
- # description=None,
322
- # comment=None,
323
- # external_references=[ExternalPackageRef(category=<ExternalPackageRefCategory.PACKAGE_MANAGER: 2>,
324
- # reference_type='purl',
325
- # locator='pkg:npm/[email protected] ',
326
- # comment=None)],
327
- # attribution_texts=[],
328
- # primary_package_purpose=None,
329
- # release_date=None,
330
- # built_date=None,
331
- # valid_until_date=None)
0 commit comments