Skip to content

Commit 1f8c22d

Browse files
author
Shane Wright
committed
Initial code for SPDX import/parse tool
1 parent aff6c6a commit 1f8c22d

File tree

1 file changed

+308
-0
lines changed

1 file changed

+308
-0
lines changed

examples/client/parse_spdx.py

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
'''
2+
Created on August 15, 2023
3+
@author: swright
4+
5+
##################### DISCLAIMER ##########################
6+
## This script was created for a specific purpose and ##
7+
## SHOULD NOT BE USED as a general purpose utility. ##
8+
## For general purpose utility use ##
9+
## /examples/client/generate_sbom.py ##
10+
###########################################################
11+
12+
Copyright (C) 2023 Synopsys, Inc.
13+
http://www.blackducksoftware.com/
14+
15+
Licensed to the Apache Software Foundation (ASF) under one
16+
or more contributor license agreements. See the NOTICE file
17+
distributed with this work for additional information
18+
regarding copyright ownership. The ASF licenses this file
19+
to you under the Apache License, Version 2.0 (the
20+
"License"); you may not use this file except in compliance
21+
with the License. You may obtain a copy of the License at
22+
23+
http://www.apache.org/licenses/LICENSE-2.0
24+
25+
Unless required by applicable law or agreed to in writing,
26+
software distributed under the License is distributed on an
27+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
28+
KIND, either express or implied. See the License for the
29+
specific language governing permissions and limitations
30+
under the License.
31+
32+
This script will parse a provided SPDX file and import the SBOM to the
33+
specified Project Name and Project Version.
34+
35+
Then it will search each component specified in the SPDX file to determine
36+
if the component was succesfully imported. Any missing components will be
37+
added as a custom component and then added to the Project+Verion's BOM.
38+
39+
Requirements
40+
41+
- python3 version 3.8 or newer recommended
42+
- the following packages are used by the script and should be installed
43+
prior to use:
44+
argparse
45+
blackduck
46+
sys
47+
logging
48+
time
49+
json
50+
pprint
51+
spdx_tools
52+
53+
- Blackduck instance
54+
- API token with sufficient privileges to perform project version phase
55+
change.
56+
57+
Install python packages with the following command:
58+
59+
pip3 install argparse blackduck sys logging time json spdx_tools
60+
61+
usage: parse_spdx.py [-h] --base-url BASE_URL --token-file TOKEN_FILE --spdx-file SPDX_FILE --out-file OUT_FILE --project PROJECT_NAME --version VERSION_NAME [--no-verify]
62+
63+
Parse SPDX file and verify if component names are in current SBOM for given project-version
64+
65+
optional arguments:
66+
-h, --help show this help message and exit
67+
--base-url BASE_URL Hub server URL e.g. https://your.blackduck.url
68+
--token-file TOKEN_FILE
69+
Access token file
70+
--spdx-file SPDX_FILE
71+
SPDX input file
72+
--out-file OUT_FILE Unmatched components file
73+
--project PROJECT_NAME
74+
Project that contains the BOM components
75+
--version VERSION_NAME
76+
Version that contains the BOM components
77+
--no-verify Disable TLS certificate verification
78+
79+
'''
80+
81+
from blackduck import Client
82+
import argparse
83+
import sys
84+
import logging
85+
import time
86+
import json
87+
from pprint import pprint
88+
from spdx_tools.spdx.model.document import Document
89+
from spdx_tools.spdx.parser.error import SPDXParsingError
90+
from spdx_tools.spdx.parser.parse_anything import parse_file
91+
92+
logging.basicConfig(
93+
level=logging.INFO,
94+
format="[%(asctime)s] {%(module)s:%(lineno)d} %(levelname)s - %(message)s"
95+
)
96+
97+
parser = argparse.ArgumentParser(description="Parse SPDX file and verify if component names are in current SBOM for given project-version")
98+
parser.add_argument("--base-url", required=True, help="Hub server URL e.g. https://your.blackduck.url")
99+
parser.add_argument("--token-file", dest='token_file', required=True,help="Access token file")
100+
parser.add_argument("--spdx-file", dest='spdx_file', required=True, help="SPDX input file")
101+
parser.add_argument("--out-file", dest='out_file', required=True, help="Unmatched components file")
102+
parser.add_argument("--project", dest='project_name', required=True, help="Project that contains the BOM components")
103+
parser.add_argument("--version", dest='version_name', required=True, help="Version that contains the BOM components")
104+
parser.add_argument("--no-verify", dest='verify', action='store_false', help="Disable TLS certificate verification")
105+
args = parser.parse_args()
106+
107+
# Parse SPDX file. This can take a very long time, so do this first.
108+
# Returns a Document object on success, otherwise raises an SPDXParsingError
109+
try:
110+
print("Reading SPDX file...")
111+
start = time.process_time()
112+
document: Document = parse_file(args.spdx_file)
113+
print(f"SPDX parsing took {time.process_time() - start} seconds")
114+
except SPDXParsingError:
115+
logging.exception("Failed to parse spdx file")
116+
sys.exit(1)
117+
118+
with open(args.token_file, 'r') as tf:
119+
access_token = tf.readline().strip()
120+
121+
bd = Client(base_url=args.base_url, token=access_token, verify=args.verify)
122+
123+
# Open unmatched component file
124+
# Will save name, spdxid, version, and origin/purl (if available) like so:
125+
# "name": "react-bootstrap",
126+
# "spdx_id": "SPDXRef-Pkg-react-bootstrap-2.1.2-30223",
127+
# "version": "2.1.2",
128+
# "origin": null
129+
try: outfile = open(args.out_file, 'w')
130+
except:
131+
logging.exception("Failed to open file for writing: " + args.out_file)
132+
sys.exit(1)
133+
134+
# Saved component data to write to file
135+
comps_out = []
136+
137+
# Fetch Project (can only have 1)
138+
params = {
139+
'q': [f"name:{args.project_name}"]
140+
}
141+
projects = [p for p in bd.get_resource('projects', params=params)
142+
if p['name'] == args.project_name]
143+
assert len(projects) == 1, \
144+
f"There should one project named {args.project_name}. Found {len(projects)}"
145+
project = projects[0]
146+
147+
# Fetch Version (can only have 1)
148+
params = {
149+
'q': [f"versionName:{args.version_name}"]
150+
}
151+
versions = [v for v in bd.get_resource('versions', project, params=params)
152+
if v['versionName'] == args.version_name]
153+
assert len(versions) == 1, \
154+
f"There should be 1 version named {args.version_name}. Found {len(versions)}"
155+
version = versions[0]
156+
157+
logging.debug(f"Found {project['name']}:{version['versionName']}")
158+
159+
# Can now access attributes from the parsed document
160+
# Note: The SPDX module renames tags slightly from the original json format.
161+
#print(f"Parsed document name: {document.creation_info.name}")
162+
#creators_as_str = ", ".join([creator.to_serialized_string() for creator in document.creation_info.creators])
163+
#print(f"Created on {document.creation_info.created} by {creators_as_str}")
164+
165+
# A test of multiple search params....
166+
# this works, but it's an OR, not AND
167+
# - we'll find everything with name OR version match
168+
#name = "micromatch"
169+
#ver = "4.0.2"
170+
#params = {
171+
# 'q': [f"componentOrVersionName:{name}"],
172+
# 'q': [f"componentOrVersionName:{ver}"],
173+
#}
174+
#comps = bd.get_resource('components', version, params=params)
175+
#for comp in comps:
176+
# print(comp['componentName'])
177+
178+
# A test of custom comp search params....
179+
#name = "swrightcomp"
180+
#params = {
181+
# 'q': [f"componentOrVersionName:{name}"],
182+
#}
183+
#comps = bd.get_resource('components', version, params=params)
184+
#for comp in comps:
185+
# print(comp['componentName'])
186+
#quit()
187+
# Some fun stats to track as we go
188+
matches = 0
189+
nopurl = 0
190+
nomatch = 0
191+
192+
# Walk through each component in the SPDX file
193+
for package in document.packages:
194+
# spdx-tools module says only name, spdx_id, download_location are required
195+
# We hope we'll have an external reference (pURL), but we might not.
196+
extref = None
197+
198+
# NOTE: BD can mangle the original component name
199+
# EX: "React" -> "React from Facebook"
200+
if package.external_references:
201+
print(" Found external reference: " + package.external_references[0].locator)
202+
extref = package.external_references[0].locator
203+
204+
# TODO lookup KB api here
205+
#/api/search/kb-components?filter=pURL:<name>
206+
207+
# TODO: if lookup successful, next in loop
208+
# if <lookup KB API true>:
209+
# continue
210+
else:
211+
nopurl += 1
212+
print(" No pURL found for component: ")
213+
print(" " + package.name)
214+
print(" " + package.spdx_id)
215+
print(" " + package.version)
216+
217+
# Lookup existing SBOM for a match (just on name to start)
218+
# This is a fuzzy match (see "react" for an example)
219+
params = {
220+
'q': [f"componentOrVersionName:{package.name}"]
221+
}
222+
223+
# Search BOM for specific component name
224+
comps = bd.get_resource('components', version, params=params)
225+
# TODO investigate searching tag here
226+
have_match = False
227+
num_match = 0
228+
for comp in comps:
229+
#pprint(bd.list_resources(comp))
230+
#pprint(comp)
231+
# Check component name + version name
232+
if comp['componentVersionName'] == package.version:
233+
have_match = True
234+
num_match += 1
235+
# TODO need to worry about multiple matches?
236+
break
237+
238+
if have_match:
239+
matches += 1
240+
print("Found comp match in BOM: " + package.name)
241+
else:
242+
# TODO:
243+
# 1) check if in custom component list (system-wide)
244+
# 2) add if not there
245+
# 3) add to project BOM
246+
nomatch += 1
247+
print("May need to add this custom comp: " + package.name)
248+
comp_data = {
249+
"name": package.name,
250+
"spdx_id": package.spdx_id,
251+
"version": package.version,
252+
"origin": extref
253+
}
254+
comps_out.append(comp_data)
255+
256+
# Save unmatched components
257+
json.dump(comps_out, outfile)
258+
outfile.close()
259+
260+
print("Stats: ")
261+
print(f" Non matches: {nomatch}")
262+
print(f" Matches: {matches}")
263+
print(f" Packages missing purl: {nopurl}")
264+
265+
# Parsed SPDX package data looks like
266+
# Package(spdx_id='SPDXRef-Pkg-micromatch-4.0.2-30343',
267+
# name='micromatch',
268+
# download_location=NOASSERTION,
269+
# version='4.0.2',
270+
# file_name=None,
271+
# supplier=None,
272+
# originator=None,
273+
# files_analyzed=True,
274+
# verification_code=PackageVerificationCode(value='600ce1a1b891b48a20a3d395e4714f854dc6ced4',
275+
# excluded_files=[]),
276+
# checksums=[],
277+
# homepage='https://www.npmjs.com/package/micromatch',
278+
# source_info=None,
279+
# license_concluded=LicenseSymbol('MIT',
280+
# is_exception=False),
281+
# license_info_from_files=[LicenseSymbol('Apache-2.0',
282+
# is_exception=False),
283+
# LicenseSymbol('BSD-2-Clause',
284+
# is_exception=False),
285+
# LicenseSymbol('ISC',
286+
# is_exception=False),
287+
# LicenseSymbol('JSON',
288+
# is_exception=False),
289+
# LicenseSymbol('LicenseRef-Historical-Permission-Notice-and-Disclaimer---sell-variant',
290+
# is_exception=False),
291+
# LicenseSymbol('LicenseRef-MIT-Open-Group-variant',
292+
# is_exception=False)],
293+
# license_declared=LicenseSymbol('MIT',
294+
# is_exception=False),
295+
# license_comment=None,
296+
# copyright_text=NOASSERTION,
297+
# summary=None,
298+
# description=None,
299+
# comment=None,
300+
# external_references=[ExternalPackageRef(category=<ExternalPackageRefCategory.PACKAGE_MANAGER: 2>,
301+
# reference_type='purl',
302+
# locator='pkg:npm/[email protected]',
303+
# comment=None)],
304+
# attribution_texts=[],
305+
# primary_package_purpose=None,
306+
# release_date=None,
307+
# built_date=None,
308+
# valid_until_date=None)

0 commit comments

Comments
 (0)