Skip to content

Commit d9e788c

Browse files
author
Shane Wright
committed
- Start making more modular - added find_comp_in_bom
- Add KB lookup using new API + improve comp name matching - Work on SPDX validation in addition to simple parsing - Misc cleanup - Still a WIP
1 parent 1f8c22d commit d9e788c

File tree

1 file changed

+88
-60
lines changed

1 file changed

+88
-60
lines changed

examples/client/parse_spdx.py

Lines changed: 88 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
json
5050
pprint
5151
spdx_tools
52+
re
5253
5354
- Blackduck instance
5455
- API token with sufficient privileges to perform project version phase
@@ -84,11 +85,37 @@
8485
import logging
8586
import time
8687
import json
88+
import re
8789
from pprint import pprint
8890
from spdx_tools.spdx.model.document import Document
91+
from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document
8992
from spdx_tools.spdx.parser.error import SPDXParsingError
9093
from spdx_tools.spdx.parser.parse_anything import parse_file
9194

95+
# Locate component name + version in BOM
96+
# Returns True on success, False on failure
97+
def find_comp_in_bom(bd, compname, compver, projver):
98+
have_match = False
99+
num_match = 0
100+
101+
# Lookup existing SBOM for a match (just on name to start)
102+
# This is a fuzzy match (see "react" for an example)
103+
params = {
104+
'q': [f"componentOrVersionName:{compname}"]
105+
}
106+
107+
# Search BOM for specific component name
108+
comps = bd.get_resource('components', projver, params=params)
109+
for comp in comps:
110+
if comp['componentName'] != compname:
111+
# The BD API search is inexact. Force our match to be precise.
112+
print(f"fuzzy match failed us: {comp['componentName']} vs {compname}")
113+
continue
114+
# Check component name + version name
115+
if comp['componentVersionName'] == compver:
116+
return True
117+
return False
118+
92119
logging.basicConfig(
93120
level=logging.INFO,
94121
format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
@@ -115,6 +142,23 @@
115142
logging.exception("Failed to parse spdx file")
116143
sys.exit(1)
117144

145+
# TODO also validate the file, which is an extra step once you have a document?
146+
print("Validating SPDX file...")
147+
start = time.process_time()
148+
validation_messages = validate_full_spdx_document(document)
149+
print(f"SPDX validation took {time.process_time() - start} seconds")
150+
fatal = False
151+
for validation_message in validation_messages:
152+
if re.match(r'.*WARNING.*', validation_message.validation_message):
153+
logging.warning(validation_message.validation_message)
154+
if re.match(r'.*ERROR.*', validation_message.validation_message):
155+
logging.error(validation_message.validation_message)
156+
fatal = True
157+
158+
if fatal:
159+
print("we are dead")
160+
quit()
161+
118162
with open(args.token_file, 'r') as tf:
119163
access_token = tf.readline().strip()
120164

@@ -158,93 +202,73 @@
158202

159203
# Can now access attributes from the parsed document
160204
# Note: The SPDX module renames tags slightly from the original json format.
161-
#print(f"Parsed document name: {document.creation_info.name}")
162-
#creators_as_str = ", ".join([creator.to_serialized_string() for creator in document.creation_info.creators])
163-
#print(f"Created on {document.creation_info.created} by {creators_as_str}")
164-
165-
# A test of multiple search params....
166-
# this works, but it's an OR, not AND
167-
# - we'll find everything with name OR version match
168-
#name = "micromatch"
169-
#ver = "4.0.2"
170-
#params = {
171-
# 'q': [f"componentOrVersionName:{name}"],
172-
# 'q': [f"componentOrVersionName:{ver}"],
173-
#}
174-
#comps = bd.get_resource('components', version, params=params)
175-
#for comp in comps:
176-
# print(comp['componentName'])
177-
178-
# A test of custom comp search params....
179-
#name = "swrightcomp"
180-
#params = {
181-
# 'q': [f"componentOrVersionName:{name}"],
182-
#}
183-
#comps = bd.get_resource('components', version, params=params)
184-
#for comp in comps:
185-
# print(comp['componentName'])
186-
#quit()
187-
# Some fun stats to track as we go
205+
188206
matches = 0
189207
nopurl = 0
190208
nomatch = 0
191209

210+
# situations to consider + actions
211+
# 1) No purl available : check SBOM for comp+ver, then add cust comp + add to SBOM
212+
# 2) Have purl + found in KB
213+
# - In SBOM? -> done
214+
# - Else -> add known KB comp to SBOM
215+
# *** this shouldn't happen in theory
216+
# 3) Have purl + not in KB (main case we are concerned with)
217+
# - In SBOM? (maybe already added or whatever?) -> done
218+
# - Else -> add cust comp + add to SBOM (same as 1)
219+
192220
# Walk through each component in the SPDX file
221+
package_count = 0
222+
packages = {}
193223
for package in document.packages:
224+
package_count += 1
194225
# spdx-tools module says only name, spdx_id, download_location are required
195226
# We hope we'll have an external reference (pURL), but we might not.
196227
extref = None
228+
purlmatch = False
229+
matchname = package.name
230+
matchver = package.version
231+
packages[package.name+package.version] = packages.get(package.name+package.version, 0) + 1
232+
#blah['zzz'] = blah.get('zzz', 0) + 1
197233

198234
# NOTE: BD can mangle the original component name
199235
# EX: "React" -> "React from Facebook"
200236
if package.external_references:
201-
print(" Found external reference: " + package.external_references[0].locator)
202237
extref = package.external_references[0].locator
203238

204-
# TODO lookup KB api here
205-
#/api/search/kb-components?filter=pURL:<name>
206-
207-
# TODO: if lookup successful, next in loop
208-
# if <lookup KB API true>:
209-
# continue
239+
# KB lookup to check for pURL match
240+
params = {
241+
'packageUrl': extref
242+
}
243+
for result in bd.get_items("/api/search/purl-components", params=params):
244+
# do we need to worry about more than 1 match?
245+
print(f"Found KB match for {extref}")
246+
purlmatch = True
247+
#pprint(result)
248+
# in this event, override the spdx name and use the known KB name
249+
# (any concern for version mangling??)
250+
if matchname != result['componentName']:
251+
print(f"updating {matchname} -> {result['componentName']}")
252+
matchname = result['componentName']
253+
# Any match means we should already have it
254+
# But we will also check to see if the comp is in the BOM i guess
210255
else:
211256
nopurl += 1
212-
print(" No pURL found for component: ")
257+
print("No pURL found for component: ")
213258
print(" " + package.name)
214259
print(" " + package.spdx_id)
215260
print(" " + package.version)
216261

217-
# Lookup existing SBOM for a match (just on name to start)
218-
# This is a fuzzy match (see "react" for an example)
219-
params = {
220-
'q': [f"componentOrVersionName:{package.name}"]
221-
}
222-
223-
# Search BOM for specific component name
224-
comps = bd.get_resource('components', version, params=params)
225-
# TODO investigate searching tag here
226-
have_match = False
227-
num_match = 0
228-
for comp in comps:
229-
#pprint(bd.list_resources(comp))
230-
#pprint(comp)
231-
# Check component name + version name
232-
if comp['componentVersionName'] == package.version:
233-
have_match = True
234-
num_match += 1
235-
# TODO need to worry about multiple matches?
236-
break
237-
238-
if have_match:
262+
if find_comp_in_bom(bd, matchname, matchver, version):
239263
matches += 1
240-
print("Found comp match in BOM: " + package.name)
264+
print(" Found comp match in BOM: " + matchname + matchver)
241265
else:
242266
# TODO:
243267
# 1) check if in custom component list (system-wide)
244268
# 2) add if not there
245269
# 3) add to project BOM
246270
nomatch += 1
247-
print("May need to add this custom comp: " + package.name)
271+
print(" Need to add custom comp: " + package.name)
248272
comp_data = {
249273
"name": package.name,
250274
"spdx_id": package.spdx_id,
@@ -257,11 +281,15 @@
257281
json.dump(comps_out, outfile)
258282
outfile.close()
259283

260-
print("Stats: ")
284+
print("\nStats: ")
285+
print("------")
286+
print(f" SPDX packages processed: {package_count}")
261287
print(f" Non matches: {nomatch}")
262288
print(f" Matches: {matches}")
263289
print(f" Packages missing purl: {nopurl}")
264290

291+
pprint(packages)
292+
print(f" {len(packages)} unique packages processed")
265293
# Parsed SPDX package data looks like
266294
# Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343',
267295
# name='micromatch',

0 commit comments

Comments
 (0)