Skip to content

Commit 9507969

Browse files
author
Shane Wright
committed
signficant refactoring, plus adding custom component lookup and add functionality. WIP.
1 parent 4392280 commit 9507969

File tree

1 file changed

+182
-109
lines changed

1 file changed

+182
-109
lines changed

examples/client/parse_spdx.py

Lines changed: 182 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,59 @@
9292
from spdx_tools.spdx.parser.error import SPDXParsingError
9393
from spdx_tools.spdx.parser.parse_anything import parse_file
9494

95+
96+
# Returns SPDX Document object on success, otherwise exits on parse failure
97+
def spdx_parse(file):
98+
print("Parsing SPDX file...")
99+
start = time.process_time()
100+
try:
101+
document: Document = parse_file(file)
102+
print(f"SPDX parsing took {time.process_time() - start} seconds")
103+
return(document)
104+
except SPDXParsingError:
105+
logging.exception("Failed to parse spdx file")
106+
sys.exit(1)
107+
108+
# Validates the SPDX file. Logs all validation messages as warnings.
109+
def spdx_validate(document):
110+
print("Validating SPDX file...")
111+
start = time.process_time()
112+
validation_messages = validate_full_spdx_document(document)
113+
print(f"SPDX validation took {time.process_time() - start} seconds")
114+
115+
# TODO is there a way to distinguish between something fatal and something
116+
# BD can deal with?
117+
# TODO - this can take forever, so add an optional --skip-validation flag
118+
for validation_message in validation_messages:
119+
# Just printing these messages intead of exiting. Later when we try to import
120+
# the file to BD, let's plan to exit if it fails. Seeing lots of errors in the
121+
# sample data.
122+
logging.warning(validation_message.validation_message)
123+
124+
# Lookup the given matchname in the KB
125+
# Logs a successful match
126+
# Return the boolean purlmatch and matchname, which we might change from
127+
# its original value -- we will force it to be the same as the name in BD.
128+
# That way we can more accurately search the BOM later.
129+
def find_comp_in_kb(matchname, extref):
130+
# KB lookup to check for pURL match
131+
purlmatch = False
132+
params = {
133+
'packageUrl': extref
134+
}
135+
# TODO any other action to take here?
136+
# We should probably track KB matches?
137+
for result in bd.get_items("/api/search/purl-components", params=params):
138+
# do we need to worry about more than 1 match?
139+
#print(f"Found KB match for {extref}")
140+
purlmatch = True
141+
# in this event, override the spdx name and use the known KB name
142+
# (is version mangling possible??)
143+
if matchname != result['componentName']:
144+
print(f"updating {matchname} -> {result['componentName']}")
145+
return(purlmatch, result['componentName'])
146+
return(purlmatch, matchname)
147+
95148
# Locate component name + version in BOM
96149
# Returns True on success, False on failure
97150
def find_comp_in_bom(bd, compname, compver, projver):
@@ -109,17 +162,89 @@ def find_comp_in_bom(bd, compname, compver, projver):
109162
for comp in comps:
110163
if comp['componentName'] != compname:
111164
# The BD API search is inexact. Force our match to be precise.
112-
print(f"fuzzy match failed us: {comp['componentName']} vs {compname}")
165+
#print(f"fuzzy match failed us: {comp['componentName']} vs {compname}")
113166
continue
114167
# Check component name + version name
115168
if comp['componentVersionName'] == compver:
116169
return True
117170
return False
118171

119-
logging.basicConfig(
120-
level=logging.INFO,
121-
format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
122-
)
172+
173+
# Returns:
174+
# CompMatch - Contains matched component object, None for no match
175+
# FoundVer - Boolen: True if matched the custom component version
176+
def find_cust_comp(cust_comp_name, cust_comp_version):
177+
params = {
178+
'q': [f"name:{cust_comp_name}"]
179+
}
180+
181+
matched_comp = None
182+
# Relies on internal header
183+
headers = {'Accept': 'application/vnd.blackducksoftware.internal-1+json'}
184+
ver_match = False
185+
for comp in bd.get_resource('components', params=params, headers=headers):
186+
print(f"{comp['name']}")
187+
if cust_comp_name != comp['name']:
188+
# Skip it. We want to be precise in our matching, despite the API.
189+
continue
190+
matched_comp = comp
191+
# Check version
192+
for version in bd.get_resource('versions', comp):
193+
if cust_comp_version == version['versionName']:
194+
# Successfully matched both name and version
195+
ver_match = True
196+
return(matched_comp, ver_match)
197+
198+
return(matched_comp, ver_match)
199+
200+
201+
# Returns URL of matching license
202+
# Exits on failure, we assume it must exist - TODO could probably just create this?
203+
def get_license_url(license_name):
204+
params = {
205+
'q': [f"name:{license_name}"]
206+
}
207+
for result in bd.get_items("/api/licenses", params=params):
208+
# Added precise matching in case of a situation like "NOASSERTION" & "NOASSERTION2"
209+
if (result['name'] == license_name):
210+
return(result['_meta']['href'])
211+
212+
logging.error(f"Failed to find license {license_name}")
213+
sys.exit(1)
214+
215+
def create_cust_comp(name, version, license, approval):
216+
print(f"Adding custom component: {name} {version}")
217+
license_url = get_license_url(license)
218+
data = {
219+
'name': name,
220+
'version' : {
221+
'versionName' : version,
222+
'license' : {
223+
'license' : license_url
224+
},
225+
},
226+
'approvalStatus': approval
227+
}
228+
response = bd.session.post("api/components", json=data)
229+
pprint(response)
230+
231+
# TODO validate response
232+
# looks like a 412 if it already existed
233+
234+
# Create a version for a custom component that already exists
235+
# The comp argument is the component object from previous lookup
236+
def create_cust_comp_ver(comp, version, license):
237+
print(f"Adding version {version} to custom component {comp['name']}")
238+
license_url = get_license_url(license)
239+
data = {
240+
'versionName' : version,
241+
'license' : {
242+
'license' : license_url
243+
},
244+
}
245+
response = bd.session.post(comp['_meta']['href'] + "/versions", json=data)
246+
pprint(response)
247+
# TODO validate response
123248

124249
parser = argparse.ArgumentParser(description="Parse SPDX file and verify if component names are in current SBOM for given project-version")
125250
parser.add_argument("--base-url", required=True, help="Hub server URL e.g. https://your.blackduck.url")
@@ -129,38 +254,25 @@ def find_comp_in_bom(bd, compname, compver, projver):
129254
parser.add_argument("--project", dest='project_name', required=True, help="Project that contains the BOM components")
130255
parser.add_argument("--version", dest='version_name', required=True, help="Version that contains the BOM components")
131256
parser.add_argument("--no-verify", dest='verify', action='store_false', help="Disable TLS certificate verification")
257+
parser.add_argument("--no-spdx-validate", dest='spdx_validate', action='store_false', help="Disable SPDX validation")
132258
args = parser.parse_args()
133259

134-
# Parse SPDX file. This can take a very long time, so do this first.
135-
# Returns a Document object on success, otherwise raises an SPDXParsingError
136-
try:
137-
print("Reading SPDX file...")
138-
start = time.process_time()
139-
document: Document = parse_file(args.spdx_file)
140-
print(f"SPDX parsing took {time.process_time() - start} seconds")
141-
except SPDXParsingError:
142-
logging.exception("Failed to parse spdx file")
143-
sys.exit(1)
144-
145-
# TODO also validate the file, which is an extra step once you have a document?
146-
print("Validating SPDX file...")
147-
start = time.process_time()
148-
validation_messages = validate_full_spdx_document(document)
149-
print(f"SPDX validation took {time.process_time() - start} seconds")
260+
logging.basicConfig(
261+
level=logging.INFO,
262+
format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
263+
)
150264

151-
# TODO is there a way to distinguish between something fatal and something
152-
# BD can deal with?
153-
# I guess we can just print all the msgs and then also exit when the import fails..
154-
for validation_message in validation_messages:
155-
logging.warning(validation_message.validation_message)
265+
document = spdx_parse(args.spdx_file)
266+
if (args.spdx_validate):
267+
spdx_validate(document)
156268

157269
with open(args.token_file, 'r') as tf:
158270
access_token = tf.readline().strip()
159271

160272
bd = Client(base_url=args.base_url, token=access_token, verify=args.verify)
161273

162274
# Open unmatched component file
163-
# Will save name, spdxid, version, and origin/purl (if available) like so:
275+
# Will save name, spdxid, version, and origin/purl for later in json format:
164276
# "name": "react-bootstrap",
165277
# "spdx_id": "SPDXRef-Pkg-react-bootstrap-2.1.2-30223",
166278
# "version": "2.1.2",
@@ -195,12 +307,6 @@ def find_comp_in_bom(bd, compname, compver, projver):
195307

196308
logging.debug(f"Found {project['name']}:{version['versionName']}")
197309

198-
# Can now access attributes from the parsed document
199-
# Note: The SPDX module renames tags slightly from the original json format.
200-
201-
matches = 0
202-
nopurl = 0
203-
nomatch = 0
204310

205311
# situations to consider + actions
206312
# 1) No purl available : check SBOM for comp+ver, then add cust comp + add to SBOM
@@ -212,41 +318,33 @@ def find_comp_in_bom(bd, compname, compver, projver):
212318
# - In SBOM? (maybe already added or whatever?) -> done
213319
# - Else -> add cust comp + add to SBOM (same as 1)
214320

215-
# Walk through each component in the SPDX file
321+
# Stats to track
322+
bom_matches = 0
323+
kb_matches = 0
324+
nopurl = 0
325+
nomatch = 0
216326
package_count = 0
327+
cust_comp_count = 0
328+
cust_ver_count = 0
329+
# Saving all encountered components by their name+version (watching for repeats)
217330
packages = {}
331+
332+
# Walk through each component in the SPDX file
218333
for package in document.packages:
219334
package_count += 1
220-
# spdx-tools module says only name, spdx_id, download_location are required
221335
# We hope we'll have an external reference (pURL), but we might not.
222336
extref = None
223337
purlmatch = False
224338
matchname = package.name
225339
matchver = package.version
226-
packages[package.name+package.version] = packages.get(package.name+package.version, 0) + 1
227-
#blah['zzz'] = blah.get('zzz', 0) + 1
340+
# Tracking unique package name + version from spdx file
341+
packages[matchname+matchver] = packages.get(matchname+matchver, 0) + 1
228342

229-
# NOTE: BD can mangle the original component name
343+
# NOTE: BD can change the original component name
230344
# EX: "React" -> "React from Facebook"
231345
if package.external_references:
232-
extref = package.external_references[0].locator
233-
234-
# KB lookup to check for pURL match
235-
params = {
236-
'packageUrl': extref
237-
}
238-
for result in bd.get_items("/api/search/purl-components", params=params):
239-
# do we need to worry about more than 1 match?
240-
print(f"Found KB match for {extref}")
241-
purlmatch = True
242-
#pprint(result)
243-
# in this event, override the spdx name and use the known KB name
244-
# (any concern for version mangling??)
245-
if matchname != result['componentName']:
246-
print(f"updating {matchname} -> {result['componentName']}")
247-
matchname = result['componentName']
248-
# Any match means we should already have it
249-
# But we will also check to see if the comp is in the BOM i guess
346+
inkb, matchname = find_comp_in_kb(matchname, package.external_references[0].locator)
347+
if inkb: kb_matches += 1
250348
else:
251349
nopurl += 1
252350
print("No pURL found for component: ")
@@ -255,22 +353,38 @@ def find_comp_in_bom(bd, compname, compver, projver):
255353
print(" " + package.version)
256354

257355
if find_comp_in_bom(bd, matchname, matchver, version):
258-
matches += 1
259-
print(" Found comp match in BOM: " + matchname + matchver)
356+
bom_matches += 1
357+
#print(" Found comp match in BOM: " + matchname + matchver)
260358
else:
261-
# TODO:
262-
# 1) check if in custom component list (system-wide)
263-
# 2) add if not there
264-
# 3) add to project BOM
265359
nomatch += 1
266-
print(" Need to add custom comp: " + package.name)
267360
comp_data = {
268361
"name": package.name,
269362
"spdx_id": package.spdx_id,
270363
"version": package.version,
271364
"origin": extref
272365
}
273366
comps_out.append(comp_data)
367+
368+
# Check if custom component already exists
369+
comp_match, found_ver = find_cust_comp(package.name, package.version)
370+
371+
# TODO make these optional args with defaults
372+
license = "NOASSERTION"
373+
approval = "UNREVIEWED"
374+
if not comp_match:
375+
cust_comp_count += 1
376+
create_cust_comp(package.name, package.version, license, approval)
377+
elif comp_match and not found_ver:
378+
cust_ver_count += 1
379+
print("Adding custom component version...")
380+
create_cust_comp_ver(comp_match, package.version, license)
381+
else:
382+
# nothing to do?
383+
print("probably found name and ver")
384+
385+
# TODO write sbom add code
386+
#add_to_sbom()
387+
274388

275389
# Save unmatched components
276390
json.dump(comps_out, outfile)
@@ -280,52 +394,11 @@ def find_comp_in_bom(bd, compname, compver, projver):
280394
print("------")
281395
print(f" SPDX packages processed: {package_count}")
282396
print(f" Non matches: {nomatch}")
283-
print(f" Matches: {matches}")
397+
print(f" KB matches: {kb_matches}")
398+
print(f" BOM matches: {bom_matches}")
284399
print(f" Packages missing purl: {nopurl}")
400+
print(f" Custom components created: {cust_comp_count}")
401+
print(f" Custom component versions created: {cust_ver_count}")
285402

286-
pprint(packages)
403+
#pprint(packages)
287404
print(f" {len(packages)} unique packages processed")
288-
# Parsed SPDX package data looks like
289-
# Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343',
290-
# name='micromatch',
291-
# download_location=NOASSERTION,
292-
# version='4.0.2',
293-
# file_name=None,
294-
# supplier=None,
295-
# originator=None,
296-
# files_analyzed=True,
297-
# verification_code=PackageVerificationCode(value='600ce1a1b891b48a20a3d395e4714f854dc6ced4',
298-
# excluded_files=[]),
299-
# checksums=[],
300-
# homepage='https://www.npmjs.com/package/micromatch',
301-
# source_info=None,
302-
# license_concluded=LicenseSymbol('MIT',
303-
# is_exception=False),
304-
# license_info_from_files=[LicenseSymbol('Apache-2.0',
305-
# is_exception=False),
306-
# LicenseSymbol('BSD-2-Clause',
307-
# is_exception=False),
308-
# LicenseSymbol('ISC',
309-
# is_exception=False),
310-
# LicenseSymbol('JSON',
311-
# is_exception=False),
312-
# LicenseSymbol('LicenseRef-Historical-Permission-Notice-and-Disclaimer---sell-variant',
313-
# is_exception=False),
314-
# LicenseSymbol('LicenseRef-MIT-Open-Group-variant',
315-
# is_exception=False)],
316-
# license_declared=LicenseSymbol('MIT',
317-
# is_exception=False),
318-
# license_comment=None,
319-
# copyright_text=NOASSERTION,
320-
# summary=None,
321-
# description=None,
322-
# comment=None,
323-
# external_references=[ExternalPackageRef(category=<ExternalPackageRefCategory.PACKAGE_MANAGER: 2>,
324-
# reference_type='purl',
325-
# locator='pkg:npm/[email protected]',
326-
# comment=None)],
327-
# attribution_texts=[],
328-
# primary_package_purpose=None,
329-
# release_date=None,
330-
# built_date=None,
331-
# valid_until_date=None)

0 commit comments

Comments
 (0)