|
| 1 | +''' |
| 2 | +Created on August 15, 2023 |
| 3 | +@author: swright |
| 4 | +
|
| 5 | +##################### DISCLAIMER ########################## |
| 6 | +## This script was created for a specific purpose and ## |
| 7 | +## SHOULD NOT BE USED as a general purpose utility. ## |
| 8 | +## For general purpose utility use ## |
| 9 | +## /examples/client/generate_sbom.py ## |
| 10 | +########################################################### |
| 11 | +
|
| 12 | +Copyright (C) 2023 Synopsys, Inc. |
| 13 | +http://www.blackducksoftware.com/ |
| 14 | +
|
| 15 | +Licensed to the Apache Software Foundation (ASF) under one |
| 16 | +or more contributor license agreements. See the NOTICE file |
| 17 | +distributed with this work for additional information |
| 18 | +regarding copyright ownership. The ASF licenses this file |
| 19 | +to you under the Apache License, Version 2.0 (the |
| 20 | +"License"); you may not use this file except in compliance |
| 21 | +with the License. You may obtain a copy of the License at |
| 22 | +
|
| 23 | +http://www.apache.org/licenses/LICENSE-2.0 |
| 24 | +
|
| 25 | +Unless required by applicable law or agreed to in writing, |
| 26 | +software distributed under the License is distributed on an |
| 27 | +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 28 | +KIND, either express or implied. See the License for the |
| 29 | +specific language governing permissions and limitations |
| 30 | +under the License. |
| 31 | +
|
| 32 | +This script will parse a provided SPDX file and import the SBOM to the |
| 33 | +specified Project Name and Project Version. |
| 34 | +
|
| 35 | +Then it will search each component specified in the SPDX file to determine |
| 36 | +if the component was succesfully imported. Any missing components will be |
| 37 | +added as a custom component and then added to the Project+Verion's BOM. |
| 38 | +
|
| 39 | +Requirements |
| 40 | +
|
| 41 | +- python3 version 3.8 or newer recommended |
| 42 | +- the following packages are used by the script and should be installed |
| 43 | + prior to use: |
| 44 | + argparse |
| 45 | + blackduck |
| 46 | + sys |
| 47 | + logging |
| 48 | + time |
| 49 | + json |
| 50 | + pprint |
| 51 | + spdx_tools |
| 52 | +
|
| 53 | +- Blackduck instance |
| 54 | +- API token with sufficient privileges to perform project version phase |
| 55 | + change. |
| 56 | +
|
| 57 | +Install python packages with the following command: |
| 58 | +
|
| 59 | + pip3 install argparse blackduck sys logging time json spdx_tools |
| 60 | +
|
| 61 | +usage: parse_spdx.py [-h] --base-url BASE_URL --token-file TOKEN_FILE --spdx-file SPDX_FILE --out-file OUT_FILE --project PROJECT_NAME --version VERSION_NAME [--no-verify] |
| 62 | +
|
| 63 | +Parse SPDX file and verify if component names are in current SBOM for given project-version |
| 64 | +
|
| 65 | +optional arguments: |
| 66 | + -h, --help show this help message and exit |
| 67 | + --base-url BASE_URL Hub server URL e.g. https://your.blackduck.url |
| 68 | + --token-file TOKEN_FILE |
| 69 | + Access token file |
| 70 | + --spdx-file SPDX_FILE |
| 71 | + SPDX input file |
| 72 | + --out-file OUT_FILE Unmatched components file |
| 73 | + --project PROJECT_NAME |
| 74 | + Project that contains the BOM components |
| 75 | + --version VERSION_NAME |
| 76 | + Version that contains the BOM components |
| 77 | + --no-verify Disable TLS certificate verification |
| 78 | +
|
| 79 | +''' |
| 80 | + |
| 81 | +from blackduck import Client |
| 82 | +import argparse |
| 83 | +import sys |
| 84 | +import logging |
| 85 | +import time |
| 86 | +import json |
| 87 | +from pprint import pprint |
| 88 | +from spdx_tools.spdx.model.document import Document |
| 89 | +from spdx_tools.spdx.parser.error import SPDXParsingError |
| 90 | +from spdx_tools.spdx.parser.parse_anything import parse_file |
| 91 | + |
| 92 | +logging.basicConfig( |
| 93 | + level=logging.INFO, |
| 94 | + format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s" |
| 95 | +) |
| 96 | + |
| 97 | +parser = argparse.ArgumentParser(description="Parse SPDX file and verify if component names are in current SBOM for given project-version") |
| 98 | +parser.add_argument("--base-url", required=True, help="Hub server URL e.g. https://your.blackduck.url") |
| 99 | +parser.add_argument("--token-file", dest='token_file', required=True,help="Access token file") |
| 100 | +parser.add_argument("--spdx-file", dest='spdx_file', required=True, help="SPDX input file") |
| 101 | +parser.add_argument("--out-file", dest='out_file', required=True, help="Unmatched components file") |
| 102 | +parser.add_argument("--project", dest='project_name', required=True, help="Project that contains the BOM components") |
| 103 | +parser.add_argument("--version", dest='version_name', required=True, help="Version that contains the BOM components") |
| 104 | +parser.add_argument("--no-verify", dest='verify', action='store_false', help="Disable TLS certificate verification") |
| 105 | +args = parser.parse_args() |
| 106 | + |
| 107 | +# Parse SPDX file. This can take a very long time, so do this first. |
| 108 | +# Returns a Document object on success, otherwise raises an SPDXParsingError |
| 109 | +try: |
| 110 | + print("Reading SPDX file...") |
| 111 | + start = time.process_time() |
| 112 | + document: Document = parse_file(args.spdx_file) |
| 113 | + print(f"SPDX parsing took {time.process_time() - start} seconds") |
| 114 | +except SPDXParsingError: |
| 115 | + logging.exception("Failed to parse spdx file") |
| 116 | + sys.exit(1) |
| 117 | + |
| 118 | +with open(args.token_file, 'r') as tf: |
| 119 | + access_token = tf.readline().strip() |
| 120 | + |
| 121 | +bd = Client(base_url=args.base_url, token=access_token, verify=args.verify) |
| 122 | + |
| 123 | +# Open unmatched component file |
| 124 | +# Will save name, spdxid, version, and origin/purl (if available) like so: |
| 125 | +# "name": "react-bootstrap", |
| 126 | +# "spdx_id": "SPDXRef-Pkg-react-bootstrap-2.1.2-30223", |
| 127 | +# "version": "2.1.2", |
| 128 | +# "origin": null |
| 129 | +try: outfile = open(args.out_file, 'w') |
| 130 | +except: |
| 131 | + logging.exception("Failed to open file for writing: " + args.out_file) |
| 132 | + sys.exit(1) |
| 133 | + |
| 134 | +# Saved component data to write to file |
| 135 | +comps_out = [] |
| 136 | + |
| 137 | +# Fetch Project (can only have 1) |
| 138 | +params = { |
| 139 | + 'q': [f"name:{args.project_name}"] |
| 140 | +} |
| 141 | +projects = [p for p in bd.get_resource('projects', params=params) |
| 142 | + if p['name'] == args.project_name] |
| 143 | +assert len(projects) == 1, \ |
| 144 | + f"There should one project named {args.project_name}. Found {len(projects)}" |
| 145 | +project = projects[0] |
| 146 | + |
| 147 | +# Fetch Version (can only have 1) |
| 148 | +params = { |
| 149 | + 'q': [f"versionName:{args.version_name}"] |
| 150 | +} |
| 151 | +versions = [v for v in bd.get_resource('versions', project, params=params) |
| 152 | + if v['versionName'] == args.version_name] |
| 153 | +assert len(versions) == 1, \ |
| 154 | + f"There should be 1 version named {args.version_name}. Found {len(versions)}" |
| 155 | +version = versions[0] |
| 156 | + |
| 157 | +logging.debug(f"Found {project['name']}:{version['versionName']}") |
| 158 | + |
| 159 | +# Can now access attributes from the parsed document |
| 160 | +# Note: The SPDX module renames tags slightly from the original json format. |
| 161 | +#print(f"Parsed document name: {document.creation_info.name}") |
| 162 | +#creators_as_str = ", ".join([creator.to_serialized_string() for creator in document.creation_info.creators]) |
| 163 | +#print(f"Created on {document.creation_info.created} by {creators_as_str}") |
| 164 | + |
| 165 | +# A test of multiple search params.... |
| 166 | +# this works, but it's an OR, not AND |
| 167 | +# - we'll find everything with name OR version match |
| 168 | +#name = "micromatch" |
| 169 | +#ver = "4.0.2" |
| 170 | +#params = { |
| 171 | +# 'q': [f"componentOrVersionName:{name}"], |
| 172 | +# 'q': [f"componentOrVersionName:{ver}"], |
| 173 | +#} |
| 174 | +#comps = bd.get_resource('components', version, params=params) |
| 175 | +#for comp in comps: |
| 176 | +# print(comp['componentName']) |
| 177 | + |
| 178 | +# A test of custom comp search params.... |
| 179 | +#name = "swrightcomp" |
| 180 | +#params = { |
| 181 | +# 'q': [f"componentOrVersionName:{name}"], |
| 182 | +#} |
| 183 | +#comps = bd.get_resource('components', version, params=params) |
| 184 | +#for comp in comps: |
| 185 | +# print(comp['componentName']) |
| 186 | +#quit() |
| 187 | +# Some fun stats to track as we go |
| 188 | +matches = 0 |
| 189 | +nopurl = 0 |
| 190 | +nomatch = 0 |
| 191 | + |
| 192 | +# Walk through each component in the SPDX file |
| 193 | +for package in document.packages: |
| 194 | + # spdx-tools module says only name, spdx_id, download_location are required |
| 195 | + # We hope we'll have an external reference (pURL), but we might not. |
| 196 | + extref = None |
| 197 | + |
| 198 | + # NOTE: BD can mangle the original component name |
| 199 | + # EX: "React" -> "React from Facebook" |
| 200 | + if package.external_references: |
| 201 | + print(" Found external reference: " + package.external_references[0].locator) |
| 202 | + extref = package.external_references[0].locator |
| 203 | + |
| 204 | + # TODO lookup KB api here |
| 205 | + #/api/search/kb-components?filter=pURL:<name> |
| 206 | + |
| 207 | + # TODO: if lookup successful, next in loop |
| 208 | + # if <lookup KB API true>: |
| 209 | + # continue |
| 210 | + else: |
| 211 | + nopurl += 1 |
| 212 | + print(" No pURL found for component: ") |
| 213 | + print(" " + package.name) |
| 214 | + print(" " + package.spdx_id) |
| 215 | + print(" " + package.version) |
| 216 | + |
| 217 | + # Lookup existing SBOM for a match (just on name to start) |
| 218 | + # This is a fuzzy match (see "react" for an example) |
| 219 | + params = { |
| 220 | + 'q': [f"componentOrVersionName:{package.name}"] |
| 221 | + } |
| 222 | + |
| 223 | + # Search BOM for specific component name |
| 224 | + comps = bd.get_resource('components', version, params=params) |
| 225 | + # TODO investigate searching tag here |
| 226 | + have_match = False |
| 227 | + num_match = 0 |
| 228 | + for comp in comps: |
| 229 | + #pprint(bd.list_resources(comp)) |
| 230 | + #pprint(comp) |
| 231 | + # Check component name + version name |
| 232 | + if comp['componentVersionName'] == package.version: |
| 233 | + have_match = True |
| 234 | + num_match += 1 |
| 235 | + # TODO need to worry about multiple matches? |
| 236 | + break |
| 237 | + |
| 238 | + if have_match: |
| 239 | + matches += 1 |
| 240 | + print("Found comp match in BOM: " + package.name) |
| 241 | + else: |
| 242 | + # TODO: |
| 243 | + # 1) check if in custom component list (system-wide) |
| 244 | + # 2) add if not there |
| 245 | + # 3) add to project BOM |
| 246 | + nomatch += 1 |
| 247 | + print("May need to add this custom comp: " + package.name) |
| 248 | + comp_data = { |
| 249 | + "name": package.name, |
| 250 | + "spdx_id": package.spdx_id, |
| 251 | + "version": package.version, |
| 252 | + "origin": extref |
| 253 | + } |
| 254 | + comps_out.append(comp_data) |
| 255 | + |
| 256 | +# Save unmatched components |
| 257 | +json.dump(comps_out, outfile) |
| 258 | +outfile.close() |
| 259 | + |
| 260 | +print("Stats: ") |
| 261 | +print(f" Non matches: {nomatch}") |
| 262 | +print(f" Matches: {matches}") |
| 263 | +print(f" Packages missing purl: {nopurl}") |
| 264 | + |
| 265 | +# Parsed SPDX package data looks like |
| 266 | +# Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343', |
| 267 | +# name='micromatch', |
| 268 | +# download_location=NOASSERTION, |
| 269 | +# version='4.0.2', |
| 270 | +# file_name=None, |
| 271 | +# supplier=None, |
| 272 | +# originator=None, |
| 273 | +# files_analyzed=True, |
| 274 | +# verification_code=PackageVerificationCode(value='600ce1a1b891b48a20a3d395e4714f854dc6ced4', |
| 275 | +# excluded_files=[]), |
| 276 | +# checksums=[], |
| 277 | +# homepage='https://www.npmjs.com/package/micromatch', |
| 278 | +# source_info=None, |
| 279 | +# license_concluded=LicenseSymbol('MIT', |
| 280 | +# is_exception=False), |
| 281 | +# license_info_from_files=[LicenseSymbol('Apache-2.0', |
| 282 | +# is_exception=False), |
| 283 | +# LicenseSymbol('BSD-2-Clause', |
| 284 | +# is_exception=False), |
| 285 | +# LicenseSymbol('ISC', |
| 286 | +# is_exception=False), |
| 287 | +# LicenseSymbol('JSON', |
| 288 | +# is_exception=False), |
| 289 | +# LicenseSymbol('LicenseRef-Historical-Permission-Notice-and-Disclaimer---sell-variant', |
| 290 | +# is_exception=False), |
| 291 | +# LicenseSymbol('LicenseRef-MIT-Open-Group-variant', |
| 292 | +# is_exception=False)], |
| 293 | +# license_declared=LicenseSymbol('MIT', |
| 294 | +# is_exception=False), |
| 295 | +# license_comment=None, |
| 296 | +# copyright_text=NOASSERTION, |
| 297 | +# summary=None, |
| 298 | +# description=None, |
| 299 | +# comment=None, |
| 300 | +# external_references=[ExternalPackageRef(category=<ExternalPackageRefCategory.PACKAGE_MANAGER: 2>, |
| 301 | +# reference_type='purl', |
| 302 | +# locator='pkg:npm/[email protected]', |
| 303 | +# comment=None)], |
| 304 | +# attribution_texts=[], |
| 305 | +# primary_package_purpose=None, |
| 306 | +# release_date=None, |
| 307 | +# built_date=None, |
| 308 | +# valid_until_date=None) |
0 commit comments