Skip to content

Commit 63a24ea

Browse files
author
Glenn Snyder
committed
adding license string search results, if present
1 parent b36d96e commit 63a24ea

File tree

3 files changed

+213
-16
lines changed

3 files changed

+213
-16
lines changed

examples/convert_bom_component_origin_info_to_csv.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,17 @@
99
parser = argparse.ArgumentParser("Process the JSON output from get_bom_component_origin_info.py to create CSV output format")
1010
parser.add_argument("-f", "--origin_info", help="By default, program reads JSON doc from stdin, but you can alternatively give a file name")
1111
parser.add_argument("-u", "--un_matched_files", action="store_true", help="Include un-matched files in the output")
12-
parser.add_argument("-l", "--file_level_license", action="store_true", help="Include file level license data, if present")
13-
parser.add_argument("-c", "--file_level_copyright", action="store_true", help="Include file level copyright data, if present")
12+
parser.add_argument("-l", "--file_level_license", action="store_true", help="Include file level license data (aka deep license data from the Black Duck KB), if present")
13+
parser.add_argument("-c", "--file_level_copyright", action="store_true", help="Include file level copyright data (aka copyright data from the Black Duck KB), if present")
14+
parser.add_argument("-s", "--string_search", action="store_true", help="Include any licenses found via string search (i.e. --detect.blackduck.signature.scanner.license.search==true")
15+
parser.add_argument("-a", "--all", action="store_true", help="Shortcut for including everything (i.e. all of it)")
1416
parser.add_argument("output_file")
1517

1618
args = parser.parse_args()
1719

20+
if args.all:
21+
args.un_matched_files = args.file_level_license = args.file_level_copyright = args.string_search = True
22+
1823
logging.basicConfig(format='%(asctime)s%(levelname)s:%(message)s', stream=sys.stderr, level=logging.DEBUG)
1924
logging.getLogger("requests").setLevel(logging.WARNING)
2025
logging.getLogger("urllib3").setLevel(logging.WARNING)
@@ -35,17 +40,22 @@
3540
'source',
3641
'origin(s)',
3742
'origin_id(s)',
38-
'copyright'
43+
'copyright',
44+
'match type(s)',
45+
'codelocation'
3946
]
4047
writer = csv.DictWriter(csv_file, fieldnames=columns)
4148
writer.writeheader()
4249

4350
for component, component_info in origin_info.items():
44-
if component == 'un_matched_files':
45-
# ignore, skip un_matched_files
51+
if component in ['un_matched_files', 'license_search_results']:
52+
# ignore, skip un_matched_files and license search results
53+
# since they are not components but other sections of the JSON doc
54+
#
4655
continue
4756
logging.debug(f"Writing info for {component}")
4857
for matched_file_info in component_info.get('matched_files', []):
58+
# import pdb; pdb.set_trace()
4959
row = {
5060
'component': component,
5161
'file path': matched_file_info['filePath']['path'],
@@ -57,6 +67,8 @@
5767
'origin(s)': ",".join([o['externalNamespace'] for o in component_info['bom_component_info']['origins']]),
5868
'origin_id(s)': ",".join([o.get('externalId', "") for o in component_info['bom_component_info']['origins']]),
5969
'copyright': None,
70+
'match type(s)': ",".join(component_info['bom_component_info'].get('matchTypes', [])),
71+
'codelocation': matched_file_info['scan']['name'],
6072
}
6173
writer.writerow(row)
6274

@@ -75,7 +87,9 @@
7587
'source': 'KB',
7688
'origin(s)': origin.get('originName'),
7789
'origin_id(s)': origin.get('originId'),
78-
'copyright': None
90+
'copyright': None,
91+
'match type(s)': 'From KB',
92+
'codelocation': None,
7993
}
8094
writer.writerow(row)
8195

@@ -93,11 +107,13 @@
93107
'origin(s)': origin.get('originName'),
94108
'origin_id(s)': origin.get('originId'),
95109
'copyright': copyright['matchData'].replace('\n', ''),
110+
'match type(s)': 'From KB',
111+
'codelocation': None
96112
}
97113
writer.writerow(row)
98114

99115
if args.un_matched_files:
100-
for un_matched_file in origin_info.get('un_matched_files'):
116+
for un_matched_file in origin_info.get('un_matched_files', []):
101117
uri = urllib.parse.unquote(un_matched_file['uri'])
102118
parsed = urllib.parse.urlparse(uri)
103119
if parsed.scheme == 'zip':
@@ -115,13 +131,50 @@
115131

116132
row = {
117133
'component': None,
118-
'component modified': None,
119134
'file path': file_path,
120135
'file name': file_name,
121136
'archive context': archive_context,
122137
'usage(s)': None,
123138
'license(s)': None,
124-
'match type(s)': "Un-matched/Un-identified",
125-
'scan (code location)': un_matched_file.get('scan', {}).get('name', 'unknown')
139+
'source': 'customers source',
140+
'origin(s)': None,
141+
'origin_id(s)': None,
142+
'copyright': None,
143+
'match type(s)': 'Not matched (un-identified)',
144+
'codelocation': None
126145
}
127-
writer.writerow(row)
146+
writer.writerow(row)
147+
148+
if args.string_search:
149+
for codelocation, codelocation_info in origin_info.get("license_search_results", {}).items():
150+
for scan in codelocation_info.get("scans", []):
151+
for file_bom_entry in scan.get("file_bom_entries", []):
152+
row = {
153+
'component': None,
154+
'file path': file_bom_entry.get('uri'),
155+
'file name': file_bom_entry.get('name'),
156+
'archive context': file_bom_entry.get('compositePath', {}).get('archiveContext'),
157+
'usage(s)': None,
158+
'license(s)': None,
159+
'source': 'customers source',
160+
'origin(s)': None,
161+
'origin_id(s)': None,
162+
'copyright': None,
163+
'match type(s)': 'License Search',
164+
'codelocation': codelocation
165+
}
166+
writer.writerow(row)
167+
168+
169+
170+
171+
172+
173+
174+
175+
176+
177+
178+
179+
180+

examples/get_bom_component_origin_info.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,21 @@
88
from blackduck.HubRestApi import HubInstance
99

1010

11-
parser = argparse.ArgumentParser("Retreive BOM component license information for the given project and version")
11+
parser = argparse.ArgumentParser("Retreive BOM component origin information, and additional information, for the given project and version")
1212
parser.add_argument("project_name")
1313
parser.add_argument("version")
14-
parser.add_argument("-l", "--deep_license_info", action="store_true")
15-
parser.add_argument("-c", "--copyright_info", action="store_true")
16-
parser.add_argument("-m", "--matched_files", action="store_true")
17-
parser.add_argument("-u", "--un_matched_files", action="store_true")
14+
parser.add_argument("-l", "--deep_license_info", action="store_true", help="Include deep license (aka embedded license) information from the Black Duck KB for (KB) components in the BOM")
15+
parser.add_argument("-c", "--copyright_info", action="store_true", help="Include copyright info from the Black Duck KB for (KB) components in the BOM")
16+
parser.add_argument("-m", "--matched_files", action="store_true", help="Include a list of the matched (aka identified) files and the components they belong to.")
17+
parser.add_argument("-u", "--un_matched_files", action="store_true", help="Include a list of un-matched (un-identified) files")
18+
parser.add_argument("-s", "--string_search", action="store_true", help="Include any licenses found via string search (i.e. --detect.blackduck.signature.scanner.license.search==true")
19+
parser.add_argument("-a", "--all", action="store_true", help="Shortcut for including everything (i.e. all of it)")
1820

1921

2022
args = parser.parse_args()
2123

24+
if args.all:
25+
args.deep_license_info = args.copyright_info = args.matched_files = args.un_matched_files = args.string_search = True
2226

2327
hub = HubInstance()
2428

@@ -120,4 +124,63 @@
120124
'un_matched_files': un_matched_files
121125
})
122126

127+
if args.string_search:
128+
#
129+
# Gathering the information on additional licenses/files identified using the
130+
# string search features requires we iterate over all the file system scans
131+
# to retrieve any additional licenses/files "discovered" (i.e. in GUI they are
132+
# displayed as "discoveries")
133+
#
134+
version_id = version['_meta']['href'].split("/")[-1]
135+
codelocations_url = hub.get_link(version, "codelocations")
136+
codelocations = hub.execute_get(codelocations_url).json().get('items', [])
137+
138+
# all the results will be stored here using the code location
139+
# name as the key and the value will include all the licenses, files
140+
# found to have license info in them
141+
#
142+
license_search_results = {}
143+
144+
for codeloc in codelocations:
145+
license_search_results.update({
146+
codeloc['name']: {
147+
'codeloc_info': codeloc
148+
}
149+
})
150+
151+
codeloc_id = codeloc['_meta']['href'].split("/")[-1]
152+
scans_url = hub.get_link(codeloc, "scans")
153+
scans = hub.execute_get(scans_url).json().get('items', [])
154+
latest_scan_url = hub.get_link(codeloc, "latest-scan")
155+
latest_scan = hub.execute_get(latest_scan_url).json()
156+
157+
all_scans = []
158+
159+
# TODO: Do I need to trim to the latest FS scan? Leaving it as list for now
160+
fs_scans = list(filter(lambda s: s['scanType'] == "FS", scans))
161+
162+
for fs_scan in fs_scans:
163+
scan_id = fs_scan['_meta']['href'].split("/")[-1]
164+
lic_summary_url = version['_meta']['href'] + f"/scans/{scan_id}/license-search-summary"
165+
custom_headers = {'Accept':'*/*'}
166+
lic_search_summary = hub.execute_get(lic_summary_url, custom_headers=custom_headers).json().get('items', [])
167+
168+
file_bom_entries = []
169+
for license_d in lic_search_summary:
170+
logging.debug(f"Getting {license_d['fileCount']} files where {license_d['licenseName']} was referenced.")
171+
file_bom_entries_url = hub.get_apibase() + f"/internal/releases/{version_id}/scans/{scan_id}/nodes/0/file-bom-entries?offset=0&limit=100&sort=&allDescendants=true&filter=stringSearchLicense:{license_d['vsl']}"
172+
file_bom_entries.extend(hub.execute_get(file_bom_entries_url).json().get('items', []))
173+
all_scans.append({
174+
'scan_info': fs_scan,
175+
'lic_search_summary': lic_search_summary,
176+
'file_bom_entries': file_bom_entries
177+
})
178+
license_search_results[codeloc['name']].update({
179+
'scans': all_scans
180+
})
181+
182+
all_origin_info.update({
183+
'license_search_results': license_search_results
184+
})
185+
123186
print(json.dumps(all_origin_info))
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import json
5+
import logging
6+
import sys
7+
8+
from blackduck.HubRestApi import HubInstance
9+
10+
11+
parser = argparse.ArgumentParser("Retreive license search results, i.e. --detect.blackduck.signature.scanner.license.search=true")
12+
parser.add_argument("project_name")
13+
parser.add_argument("version")
14+
15+
args = parser.parse_args()
16+
17+
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', stream=sys.stderr, level=logging.DEBUG)
18+
logging.getLogger("requests").setLevel(logging.WARNING)
19+
logging.getLogger("urllib3").setLevel(logging.WARNING)
20+
21+
hub = HubInstance()
22+
23+
project = hub.get_project_by_name(args.project_name)
24+
version = hub.get_version_by_name(project, args.version)
25+
26+
version_id = version['_meta']['href'].split("/")[-1]
27+
28+
codelocations_url = hub.get_link(version, "codelocations")
29+
codelocations = hub.execute_get(codelocations_url).json().get('items', [])
30+
31+
# all the results will be stored here using the code location
32+
# name as the key and the value will include all the licenses, files
33+
# found to have license info in them
34+
#
35+
license_search_results = {}
36+
37+
for codeloc in codelocations:
38+
license_search_results.update({
39+
codeloc['name']: {
40+
'codeloc_info': codeloc
41+
}
42+
})
43+
44+
codeloc_id = codeloc['_meta']['href'].split("/")[-1]
45+
scans_url = hub.get_link(codeloc, "scans")
46+
scans = hub.execute_get(scans_url).json().get('items', [])
47+
latest_scan_url = hub.get_link(codeloc, "latest-scan")
48+
latest_scan = hub.execute_get(latest_scan_url).json()
49+
50+
all_scans = []
51+
52+
# TODO: Do I need to trim to the latest FS scan? Leaving it as list for now
53+
fs_scans = list(filter(lambda s: s['scanType'] == "FS", scans))
54+
55+
for fs_scan in fs_scans:
56+
scan_id = fs_scan['_meta']['href'].split("/")[-1]
57+
lic_summary_url = version['_meta']['href'] + f"/scans/{scan_id}/license-search-summary"
58+
custom_headers = {'Accept':'*/*'}
59+
lic_search_summary = hub.execute_get(lic_summary_url, custom_headers=custom_headers).json().get('items', [])
60+
61+
file_bom_entries = []
62+
for license_d in lic_search_summary:
63+
logging.debug(f"Getting {license_d['fileCount']} files where {license_d['licenseName']} was referenced.")
64+
file_bom_entries_url = hub.get_apibase() + f"/internal/releases/{version_id}/scans/{scan_id}/nodes/0/file-bom-entries?offset=0&limit=100&sort=&allDescendants=true&filter=stringSearchLicense:{license_d['vsl']}"
65+
file_bom_entries.extend(hub.execute_get(file_bom_entries_url).json().get('items', []))
66+
all_scans.append({
67+
'scan_info': fs_scan,
68+
'lic_search_summary': lic_search_summary,
69+
'file_bom_entries': file_bom_entries
70+
})
71+
license_search_results[codeloc['name']].update({
72+
'scans': all_scans
73+
})
74+
75+
print(json.dumps(license_search_results))
76+
77+
78+
79+
80+
81+

0 commit comments

Comments
 (0)