|
49 | 49 | json
|
50 | 50 | pprint
|
51 | 51 | spdx_tools
|
| 52 | + re |
52 | 53 |
|
53 | 54 | - Blackduck instance
|
54 | 55 | - API token with sufficient privileges to perform project version phase
|
|
84 | 85 | import logging
|
85 | 86 | import time
|
86 | 87 | import json
|
| 88 | +import re |
87 | 89 | from pprint import pprint
|
88 | 90 | from spdx_tools.spdx.model.document import Document
|
| 91 | +from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document |
89 | 92 | from spdx_tools.spdx.parser.error import SPDXParsingError
|
90 | 93 | from spdx_tools.spdx.parser.parse_anything import parse_file
|
91 | 94 |
|
| 95 | +# Locate component name + version in BOM |
| 96 | +# Returns True on success, False on failure |
| 97 | +def find_comp_in_bom(bd, compname, compver, projver): |
| 98 | + have_match = False |
| 99 | + num_match = 0 |
| 100 | + |
| 101 | + # Lookup existing SBOM for a match (just on name to start) |
| 102 | + # This is a fuzzy match (see "react" for an example) |
| 103 | + params = { |
| 104 | + 'q': [f"componentOrVersionName:{compname}"] |
| 105 | + } |
| 106 | + |
| 107 | + # Search BOM for specific component name |
| 108 | + comps = bd.get_resource('components', projver, params=params) |
| 109 | + for comp in comps: |
| 110 | + if comp['componentName'] != compname: |
| 111 | + # The BD API search is inexact. Force our match to be precise. |
| 112 | + print(f"fuzzy match failed us: {comp['componentName']} vs {compname}") |
| 113 | + continue |
| 114 | + # Check component name + version name |
| 115 | + if comp['componentVersionName'] == compver: |
| 116 | + return True |
| 117 | + return False |
| 118 | + |
92 | 119 | logging.basicConfig(
|
93 | 120 | level=logging.INFO,
|
94 | 121 | format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
|
|
115 | 142 | logging.exception("Failed to parse spdx file")
|
116 | 143 | sys.exit(1)
|
117 | 144 |
|
| 145 | +# TODO also validate the file, which is an extra step once you have a document? |
| 146 | +print("Validating SPDX file...") |
| 147 | +start = time.process_time() |
| 148 | +validation_messages = validate_full_spdx_document(document) |
| 149 | +print(f"SPDX validation took {time.process_time() - start} seconds") |
| 150 | +fatal = False |
| 151 | +for validation_message in validation_messages: |
| 152 | + if re.match(r'.*WARNING.*', validation_message.validation_message): |
| 153 | + logging.warning(validation_message.validation_message) |
| 154 | + if re.match(r'.*ERROR.*', validation_message.validation_message): |
| 155 | + logging.error(validation_message.validation_message) |
| 156 | + fatal = True |
| 157 | + |
| 158 | +if fatal: |
| 159 | + print("we are dead") |
| 160 | +quit() |
| 161 | + |
118 | 162 | with open(args.token_file, 'r') as tf:
|
119 | 163 | access_token = tf.readline().strip()
|
120 | 164 |
|
|
158 | 202 |
|
159 | 203 | # Can now access attributes from the parsed document
|
160 | 204 | # Note: The SPDX module renames tags slightly from the original json format.
|
161 |
| -#print(f"Parsed document name: {document.creation_info.name}") |
162 |
| -#creators_as_str = ", ".join([creator.to_serialized_string() for creator in document.creation_info.creators]) |
163 |
| -#print(f"Created on {document.creation_info.created} by {creators_as_str}") |
164 |
| - |
165 |
| -# A test of multiple search params.... |
166 |
| -# this works, but it's an OR, not AND |
167 |
| -# - we'll find everything with name OR version match |
168 |
| -#name = "micromatch" |
169 |
| -#ver = "4.0.2" |
170 |
| -#params = { |
171 |
| -# 'q': [f"componentOrVersionName:{name}"], |
172 |
| -# 'q': [f"componentOrVersionName:{ver}"], |
173 |
| -#} |
174 |
| -#comps = bd.get_resource('components', version, params=params) |
175 |
| -#for comp in comps: |
176 |
| -# print(comp['componentName']) |
177 |
| - |
178 |
| -# A test of custom comp search params.... |
179 |
| -#name = "swrightcomp" |
180 |
| -#params = { |
181 |
| -# 'q': [f"componentOrVersionName:{name}"], |
182 |
| -#} |
183 |
| -#comps = bd.get_resource('components', version, params=params) |
184 |
| -#for comp in comps: |
185 |
| -# print(comp['componentName']) |
186 |
| -#quit() |
187 |
| -# Some fun stats to track as we go |
| 205 | + |
188 | 206 | matches = 0
|
189 | 207 | nopurl = 0
|
190 | 208 | nomatch = 0
|
191 | 209 |
|
| 210 | +# situations to consider + actions |
| 211 | +# 1) No purl available : check SBOM for comp+ver, then add cust comp + add to SBOM |
| 212 | +# 2) Have purl + found in KB |
| 213 | +# - In SBOM? -> done |
| 214 | +# - Else -> add known KB comp to SBOM |
| 215 | +# *** this shouldn't happen in theory |
| 216 | +# 3) Have purl + not in KB (main case we are concerned with) |
| 217 | +# - In SBOM? (maybe already added or whatever?) -> done |
| 218 | +# - Else -> add cust comp + add to SBOM (same as 1) |
| 219 | + |
192 | 220 | # Walk through each component in the SPDX file
|
| 221 | +package_count = 0 |
| 222 | +packages = {} |
193 | 223 | for package in document.packages:
|
| 224 | + package_count += 1 |
194 | 225 | # spdx-tools module says only name, spdx_id, download_location are required
|
195 | 226 | # We hope we'll have an external reference (pURL), but we might not.
|
196 | 227 | extref = None
|
| 228 | + purlmatch = False |
| 229 | + matchname = package.name |
| 230 | + matchver = package.version |
| 231 | + packages[package.name+package.version] = packages.get(package.name+package.version, 0) + 1 |
| 232 | + #blah['zzz'] = blah.get('zzz', 0) + 1 |
197 | 233 |
|
198 | 234 | # NOTE: BD can mangle the original component name
|
199 | 235 | # EX: "React" -> "React from Facebook"
|
200 | 236 | if package.external_references:
|
201 |
| - print(" Found external reference: " + package.external_references[0].locator) |
202 | 237 | extref = package.external_references[0].locator
|
203 | 238 |
|
204 |
| - # TODO lookup KB api here |
205 |
| - #/api/search/kb-components?filter=pURL:<name> |
206 |
| - |
207 |
| - # TODO: if lookup successful, next in loop |
208 |
| - # if <lookup KB API true>: |
209 |
| - # continue |
| 239 | + # KB lookup to check for pURL match |
| 240 | + params = { |
| 241 | + 'packageUrl': extref |
| 242 | + } |
| 243 | + for result in bd.get_items("/api/search/purl-components", params=params): |
| 244 | + # do we need to worry about more than 1 match? |
| 245 | + print(f"Found KB match for {extref}") |
| 246 | + purlmatch = True |
| 247 | + #pprint(result) |
| 248 | + # in this event, override the spdx name and use the known KB name |
| 249 | + # (any concern for version mangling??) |
| 250 | + if matchname != result['componentName']: |
| 251 | + print(f"updating {matchname} -> {result['componentName']}") |
| 252 | + matchname = result['componentName'] |
| 253 | + # Any match means we should already have it |
| 254 | + # But we will also check to see if the comp is in the BOM i guess |
210 | 255 | else:
|
211 | 256 | nopurl += 1
|
212 |
| - print(" No pURL found for component: ") |
| 257 | + print("No pURL found for component: ") |
213 | 258 | print(" " + package.name)
|
214 | 259 | print(" " + package.spdx_id)
|
215 | 260 | print(" " + package.version)
|
216 | 261 |
|
217 |
| - # Lookup existing SBOM for a match (just on name to start) |
218 |
| - # This is a fuzzy match (see "react" for an example) |
219 |
| - params = { |
220 |
| - 'q': [f"componentOrVersionName:{package.name}"] |
221 |
| - } |
222 |
| - |
223 |
| - # Search BOM for specific component name |
224 |
| - comps = bd.get_resource('components', version, params=params) |
225 |
| - # TODO investigate searching tag here |
226 |
| - have_match = False |
227 |
| - num_match = 0 |
228 |
| - for comp in comps: |
229 |
| - #pprint(bd.list_resources(comp)) |
230 |
| - #pprint(comp) |
231 |
| - # Check component name + version name |
232 |
| - if comp['componentVersionName'] == package.version: |
233 |
| - have_match = True |
234 |
| - num_match += 1 |
235 |
| - # TODO need to worry about multiple matches? |
236 |
| - break |
237 |
| - |
238 |
| - if have_match: |
| 262 | + if find_comp_in_bom(bd, matchname, matchver, version): |
239 | 263 | matches += 1
|
240 |
| - print("Found comp match in BOM: " + package.name) |
| 264 | + print(" Found comp match in BOM: " + matchname + matchver) |
241 | 265 | else:
|
242 | 266 | # TODO:
|
243 | 267 | # 1) check if in custom component list (system-wide)
|
244 | 268 | # 2) add if not there
|
245 | 269 | # 3) add to project BOM
|
246 | 270 | nomatch += 1
|
247 |
| - print("May need to add this custom comp: " + package.name) |
| 271 | + print(" Need to add custom comp: " + package.name) |
248 | 272 | comp_data = {
|
249 | 273 | "name": package.name,
|
250 | 274 | "spdx_id": package.spdx_id,
|
|
257 | 281 | json.dump(comps_out, outfile)
|
258 | 282 | outfile.close()
|
259 | 283 |
|
260 |
| -print("Stats: ") |
| 284 | +print("\nStats: ") |
| 285 | +print("------") |
| 286 | +print(f" SPDX packages processed: {package_count}") |
261 | 287 | print(f" Non matches: {nomatch}")
|
262 | 288 | print(f" Matches: {matches}")
|
263 | 289 | print(f" Packages missing purl: {nopurl}")
|
264 | 290 |
|
| 291 | +pprint(packages) |
| 292 | +print(f" {len(packages)} unique packages processed") |
265 | 293 | # Parsed SPDX package data looks like
|
266 | 294 | # Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343',
|
267 | 295 | # name='micromatch',
|
|
0 commit comments