Skip to content

Commit 77046c8

Browse files
author
Shane Wright
committed
Add SPDX upload functionality and poll for successful scan. Tons of cleanup, assorted bugfixes, and comments.
1 parent d832762 commit 77046c8

File tree

1 file changed

+196
-43
lines changed

1 file changed

+196
-43
lines changed

examples/client/parse_spdx.py

Lines changed: 196 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@
9494
from spdx_tools.spdx.parser.error import SPDXParsingError
9595
from spdx_tools.spdx.parser.parse_anything import parse_file
9696

97-
# TODO what happens if file doesn't exist?
9897
# Returns SPDX Document object on success, otherwise exits on parse failure
9998
# Input: file = Filename to process
10099
# Returns: SPDX document object
@@ -110,6 +109,7 @@ def spdx_parse(file):
110109
sys.exit(1)
111110

112111
# Validates the SPDX file. Logs all validation messages as warnings.
112+
# Input: SPDX document object
113113
def spdx_validate(document):
114114
print("Validating SPDX file...")
115115
start = time.process_time()
@@ -125,34 +125,153 @@ def spdx_validate(document):
125125
# sample data.
126126
logging.warning(validation_message.validation_message)
127127

128-
# TODO is it possible to make this a case-insensitive match?
129-
# Lookup the given matchname in the KB
130-
# Logs a successful match
131-
# Return the boolean purlmatch and matchname, which we might change from
132-
# its original value -- we will force it to be the same as the name in the KB
133-
# That way we can more accurately search the BOM later.
134-
def find_comp_in_kb(matchname, extref):
135-
# KB lookup to check for pURL match
128+
# Returns MIME type to provide to scan API
129+
# Input: filename to check
130+
def get_sbom_mime_type(filename):
131+
with open(filename, 'r') as f:
132+
data = f.readlines()
133+
content = " ".join(data)
134+
if 'CycloneDX' in content:
135+
return 'application/vnd.cyclonedx'
136+
if 'SPDX' in content:
137+
return 'application/spdx'
138+
return None
139+
140+
# Poll for successful scan of SBOM.
141+
# Input: Name of SBOM document (not the filename, the name defined inside the json body)
142+
# Returns on success. Errors will result in fatal exit.
143+
def poll_for_upload(sbom_name):
144+
max_retries = 30
145+
sleep_time = 10
146+
matched_scan = False
147+
148+
# Search for the latest scan matching our SBOM
149+
# This might be a risk for a race condition
150+
params = {
151+
'q': [f"name:{sbom_name}"],
152+
'sort': ["updatedAt: ASC"]
153+
}
154+
155+
cls = bd.get_resource('codeLocations', params=params)
156+
for cl in cls:
157+
# Force exact match of: spdx_doc_name + " spdx/sbom"
158+
# BD appends the "spdx/sbom" string to the name.
159+
if cl['name'] != sbom_name + " spdx/sbom":
160+
continue
161+
162+
matched_scan = True
163+
for link in (cl['_meta']['links']):
164+
# Locate the scans URL to check for status
165+
if link['rel'] == "scans":
166+
summaries_url = link['href']
167+
break
168+
169+
assert(summaries_url)
170+
params = {
171+
'sort': ["updatedAt: ASC"]
172+
}
173+
174+
while (max_retries):
175+
max_retries -= 1
176+
for item in bd.get_items(summaries_url, params=params):
177+
# Only checking the first item as it's the most recent
178+
if item['scanState'] == "SUCCESS":
179+
print("Scan complete")
180+
return
181+
elif item['scanState'] == "FAILURE":
182+
logging.error(f"SPDX Scan Failure: {item['statusMessage']}")
183+
sys.exit(1)
184+
else:
185+
# Only other state should be "STARTED" -- keep polling
186+
print(f"Waiting for status success, currently: {item['scanState']}")
187+
time.sleep(sleep_time)
188+
# Break out of for loop so we always check the most recent
189+
break
190+
191+
# Handle various errors that might happen
192+
if max_retries == 0:
193+
logging.error("Failed to verify successful SPDX Scan in {max_retries * sleep_time} seconds")
194+
elif not matched_scan:
195+
logging.error(f"No scan found for SBOM: {sbom_name}")
196+
else:
197+
logging.error(f"Unable to verify successful scan of SBOM: {sbom_name}")
198+
199+
sys.exit(1)
200+
201+
# TODO do we care about project_groups?
202+
# Upload provided SBOM file to Black Duck
203+
# Inputs:
204+
# filename - Name of file to upload
205+
# project - Project name to map to
206+
# version - Version name to map to
207+
def upload_sbom_file(filename, project, version):
208+
mime_type = get_sbom_mime_type(filename)
209+
if not mime_type:
210+
logging.error(f"Could not identify file content for {filename}")
211+
sys.exit(1)
212+
files = {"file": (filename, open(filename,"rb"), mime_type)}
213+
fields = {"projectName": project, "versionName": version}
214+
response = bd.session.post("/api/scan/data", files = files, data=fields)
215+
logging.info(response)
216+
217+
if response.status_code == 409:
218+
logging.info(f"File {filename} is already mapped to a different project version")
219+
220+
if response.status_code != 201:
221+
logging.error(f"Failed to upload SPDX file:")
222+
try:
223+
pprint(response.json()['errorMessage'])
224+
except:
225+
logging.error(f"Status code {response.status_code}")
226+
sys.exit(1)
227+
228+
229+
# Lookup the given pURL in the BD KB.
230+
# If successfully matched, update the associated package name and version with the data from the KB.
231+
# This will improve the accuracy of later lookups. We are replacing the SPDX input data with the
232+
# data stored in the KB.
233+
#
234+
# Inputs:
235+
# matchname - Name of package from the SPDX input file
236+
# matchver - Version of package
237+
# extref - pURL to look up
238+
#
239+
# Returns:
240+
# purlmatch - boolean (True if successful KB lookup)
241+
# matchname - Original parameter OR updated to reflect KB lookup name
242+
# matchver - Original parameter OR updated to reflect KB lookup version
243+
def find_comp_in_kb(matchname, matchver, extref):
136244
purlmatch = False
137245
params = {
138246
'packageUrl': extref
139247
}
140-
# TODO any other action to take here?
141-
# We should probably track KB matches?
142248
for result in bd.get_items("/api/search/purl-components", params=params):
143-
# TODO do we need to worry about more than 1 match?
249+
# This query should result in exactly 1 match
144250
purlmatch = True
145-
# in this event, override the spdx name and use the known KB name
146-
# TODO: is version mangling possible?
251+
# Override the spdx name and use the known KB name
147252
if matchname != result['componentName']:
148253
print(f"Renaming {matchname} -> {result['componentName']}")
149-
return(purlmatch, result['componentName'])
150-
return(purlmatch, matchname)
254+
matchname = result['componentName']
255+
# Override the spdx version and use the string from KB
256+
# for example, v2.8.5 -> 2.8.5
257+
if matchver != result['versionName']:
258+
print(f"Renaming {matchver} -> {result['versionName']}")
259+
matchver = result['versionName']
260+
261+
return(purlmatch, matchname, matchver)
262+
263+
# fall through -- lookup failed, so we keep the original name/ver
264+
return(purlmatch, matchname, matchver)
265+
151266

152-
# TODO is it possible to make this a case-insensitive match?
153267
# Locate component name + version in BOM
154-
# Returns True on success, False on failure
155-
def find_comp_in_bom(bd, compname, compver, projver):
268+
# Inputs:
269+
# compname - Component name to locate
270+
# compver - Component version to locate
271+
# projver - Project version to locate component in BOM
272+
#
273+
# Returns: True on success, False on failure
274+
def find_comp_in_bom(compname, compver, projver):
156275
have_match = False
157276
num_match = 0
158277

@@ -179,37 +298,49 @@ def find_comp_in_bom(bd, compname, compver, projver):
179298
return False
180299

181300

182-
# TODO is it possible to make this a case-insensitive match?
301+
# Verifies if a custom component and version already exist in the system
302+
#
303+
# Inputs:
304+
# compname - Component name to locate
305+
# compver - Component version to locate
183306
# Returns:
184307
# CompMatch - Contains matched component url, None for no match
185308
# VerMatch - Contains matched component verison url, None for no match
186-
def find_cust_comp(cust_comp_name, cust_comp_version):
309+
def find_cust_comp(compname, compver):
187310
params = {
188-
'q': [f"name:{cust_comp_name}"]
311+
'q': [f"name:{compname}"]
189312
}
190313

191314
matched_comp = None
192315
matched_ver = None
193-
# Relies on internal header
316+
# Warning: Relies on internal header
194317
headers = {'Accept': 'application/vnd.blackducksoftware.internal-1+json'}
195318
for comp in bd.get_resource('components', params=params, headers=headers):
196-
if cust_comp_name != comp['name']:
197-
# Skip it. We want to be precise in our matching, despite the API.
319+
if compname == comp['name']:
320+
# Force exact match
321+
matched_comp = comp['_meta']['href']
322+
else:
323+
# Keep checking search results
198324
continue
199-
matched_comp = comp['_meta']['href']
325+
200326
# Check version
201327
for version in bd.get_resource('versions', comp):
202-
if cust_comp_version == version['versionName']:
328+
if compver == version['versionName']:
203329
# Successfully matched both name and version
204330
matched_ver = version['_meta']['href']
205331
return(matched_comp, matched_ver)
206332

207-
return(matched_comp, matched_ver)
333+
# If we got this far, break out of the loop
334+
# We matched the component, but not the version
335+
break
208336

337+
return(matched_comp, matched_ver)
209338

210-
# Returns URL of matching license
211-
# Exits on failure, we assume it must pre-exist - TODO could probably just create this?
212-
# Note: License name search is case-sensitive
339+
# Find URL of license to use for custom compnent creation
340+
# Inputs:
341+
# license_name - Name of license to locate (case-sensitive)
342+
#
343+
# Returns: URL of license successfully matched. Failures are fatal.
213344
def get_license_url(license_name):
214345
params = {
215346
'q': [f"name:{license_name}"]
@@ -292,6 +423,7 @@ def create_cust_comp_ver(comp_url, version, license):
292423
# Inputs:
293424
# proj_version_url: API URL for a project+version to update
294425
# comp_ver_url: API URL of a component+version to add
426+
# Prints out any errors encountered. Errors are fatal.
295427
def add_to_sbom(proj_version_url, comp_ver_url):
296428
data = {
297429
'component': comp_ver_url
@@ -302,14 +434,15 @@ def add_to_sbom(proj_version_url, comp_ver_url):
302434
logging.error(f"Status code {response.status_code}")
303435
sys.exit(1)
304436

437+
305438
parser = argparse.ArgumentParser(description="Parse SPDX file and verify if component names are in current SBOM for given project-version")
306439
parser.add_argument("--base-url", required=True, help="Hub server URL e.g. https://your.blackduck.url")
307440
parser.add_argument("--token-file", dest='token_file', required=True,help="Access token file")
308441
parser.add_argument("--spdx-file", dest='spdx_file', required=True, help="SPDX input file")
309442
parser.add_argument("--out-file", dest='out_file', required=True, help="Unmatched components file")
310443
parser.add_argument("--project", dest='project_name', required=True, help="Project that contains the BOM components")
311444
parser.add_argument("--version", dest='version_name', required=True, help="Version that contains the BOM components")
312-
parser.add_argument("--license", dest='license_name', required=False, default="NOASSERTION", help="License name to use for custom components")
445+
parser.add_argument("--license", dest='license_name', required=False, default="NOASSERTION", help="License name to use for custom components (default: NOASSERTION)")
313446
parser.add_argument("--no-verify", dest='verify', action='store_false', help="Disable TLS certificate verification")
314447
parser.add_argument("--no-spdx-validate", dest='spdx_validate', action='store_false', help="Disable SPDX validation")
315448
args = parser.parse_args()
@@ -324,16 +457,32 @@ def add_to_sbom(proj_version_url, comp_ver_url):
324457
if (args.spdx_validate):
325458
spdx_validate(document)
326459
else:
327-
logging.error(f"Invalid SPDX file: {args.spdx_file}")
460+
logging.error(f"Could not open SPDX file: {args.spdx_file}")
328461
sys.exit(1)
329462

330463
with open(args.token_file, 'r') as tf:
331464
access_token = tf.readline().strip()
332465

466+
global bd
333467
bd = Client(base_url=args.base_url, token=access_token, verify=args.verify)
334468

469+
#pprint(bd.list_resources())
470+
471+
upload_sbom_file(args.spdx_file, args.project_name, args.version_name)
472+
# This will exit if it fails
473+
poll_for_upload(document.creation_info.name)
474+
335475
# some little debug/test stubs
336476
# TODO: delete these
477+
#matchcomp, matchver = find_cust_comp("ipaddress", "1.0.23")
478+
#if matchcomp:
479+
# print("matched comp")
480+
#else:
481+
# print("no comp match")
482+
#if matchver:
483+
# print("matched ver")
484+
#else:
485+
# print("no ver match")
337486
#comp_ver_url = create_cust_comp("MY COMPONENT z", "1", args.license_name)
338487
#
339488
#comp_url = "https://purl-validation.saas-staging.blackduck.com/api/components/886c04d4-28ce-4a27-be4c-f083e73a9f69"
@@ -350,7 +499,7 @@ def add_to_sbom(proj_version_url, comp_ver_url):
350499
# "spdx_id": "SPDXRef-Pkg-react-bootstrap-2.1.2-30223",
351500
# "version": "2.1.2",
352501
# "origin": null
353-
# TODO this try/except actually isn't right
502+
# TODO this try/except isn't quite right
354503
try: outfile = open(args.out_file, 'w')
355504
except:
356505
logging.exception("Failed to open file for writing: " + args.out_file)
@@ -368,6 +517,7 @@ def add_to_sbom(proj_version_url, comp_ver_url):
368517
assert len(projects) == 1, \
369518
f"There should one project named {args.project_name}. Found {len(projects)}"
370519
project = projects[0]
520+
371521
# Fetch Version (can only have 1)
372522
params = {
373523
'q': [f"versionName:{args.version_name}"]
@@ -381,7 +531,6 @@ def add_to_sbom(proj_version_url, comp_ver_url):
381531

382532
logging.debug(f"Found {project['name']}:{version['versionName']}")
383533

384-
385534
# situations to consider + actions
386535
# 1) No purl available : check SBOM for comp+ver, then add cust comp + add to SBOM
387536
# 2) Have purl + found in KB
@@ -417,7 +566,7 @@ def add_to_sbom(proj_version_url, comp_ver_url):
417566
# NOTE: BD can change the original component name
418567
# EX: "React" -> "React from Facebook"
419568
if package.external_references:
420-
inkb, matchname = find_comp_in_kb(matchname, package.external_references[0].locator)
569+
inkb, matchname, matchver = find_comp_in_kb(matchname, matchver, package.external_references[0].locator)
421570
if inkb: kb_matches += 1
422571
else:
423572
nopurl += 1
@@ -426,9 +575,9 @@ def add_to_sbom(proj_version_url, comp_ver_url):
426575
print(" " + package.spdx_id)
427576
print(" " + package.version)
428577

429-
if find_comp_in_bom(bd, matchname, matchver, version):
578+
if find_comp_in_bom(matchname, matchver, version):
430579
bom_matches += 1
431-
#print(" Found comp match in BOM: " + matchname + matchver)
580+
print(" Found comp match in BOM: " + matchname + matchver)
432581
else:
433582
nomatch += 1
434583
comp_data = {
@@ -439,27 +588,31 @@ def add_to_sbom(proj_version_url, comp_ver_url):
439588
}
440589
comps_out.append(comp_data)
441590

591+
# TODO what about: KB exists but not in BOM??
592+
# find_cust_comp is not generic enough for that situation
593+
#if inkb:
594+
# TODO handle add KB match to BOM here, short-circuit steps below
595+
442596
# Check if custom component already exists
443597
comp_url, comp_ver_url = find_cust_comp(package.name, package.version)
444598

445599
if not comp_url:
446600
# Custom component did not exist, so create it
447601
cust_comp_count += 1
448602
comp_ver_url = create_cust_comp(package.name, package.version,
449-
args.license_name, approval)
603+
args.license_name)
450604
elif comp_url and not comp_ver_url:
451605
# Custom component existed, but not the version we care about
452606
cust_ver_count += 1
453607
print(f"Adding version {package.version} to custom component {package.name}")
454608
comp_ver_url = create_cust_comp_ver(comp_url, package.version, args.license_name)
455-
# DEBUG
456-
quit()
457609
else:
458610
print("Custom component already exists, not in SBOM")
459611

460-
# is this possible? i don't think so
612+
# is this possible?
461613
assert(comp_ver_url), f"No comp_ver URL found for {package.name} {package.version}"
462-
print(f"Adding component to SBOM: {package.name} {package.version}")
614+
615+
print(f"Adding component to SBOM: {package.name} aka {matchname} {package.version}")
463616
add_to_sbom(proj_version_url, comp_ver_url)
464617

465618
# Save unmatched components

0 commit comments

Comments
 (0)