Skip to content

Commit c807197

Browse files
author
Glenn Snyder
committed
adding code to demonstrate how to retrieve file-level license data, copyright data, and convert the info into CSV format
1 parent 8743370 commit c807197

File tree

2 files changed

+171
-22
lines changed

2 files changed

+171
-22
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import argparse
2+
import csv
3+
import logging
4+
import json
5+
import os.path
6+
import sys
7+
import urllib.parse
8+
9+
parser = argparse.ArgumentParser("Process the JSON output from get_bom_component_origin_info.py to create CSV output format")
10+
parser.add_argument("-f", "--origin_info", help="By default, program reads JSON doc from stdin, but you can alternatively give a file name")
11+
parser.add_argument("-u", "--un_matched_files", action="store_true", help="Include un-matched files in the output")
12+
parser.add_argument("output_file")
13+
14+
args = parser.parse_args()
15+
16+
logging.basicConfig(format='%(asctime)s%(levelname)s:%(message)s', stream=sys.stderr, level=logging.DEBUG)
17+
logging.getLogger("requests").setLevel(logging.WARNING)
18+
logging.getLogger("urllib3").setLevel(logging.WARNING)
19+
20+
if args.origin_info:
21+
origin_info = json.load(open(args.origin_info, 'r'))
22+
else:
23+
origin_info = json.load(sys.stdin)
24+
25+
with open(args.output_file, 'w') as csv_file:
26+
columns = [
27+
'component',
28+
'component modified',
29+
'file path',
30+
'file name',
31+
'archive context',
32+
'usage(s)',
33+
'license(s)',
34+
'match type(s)',
35+
'scan (code location)'
36+
]
37+
writer = csv.DictWriter(csv_file, fieldnames=columns)
38+
writer.writeheader()
39+
40+
for component, component_info in origin_info.items():
41+
if component == 'un_matched_files':
42+
# ignore, skip un_matched_files
43+
continue
44+
logging.debug(f"Writing info for {component}")
45+
for matched_file_info in component_info.get('matched_files', []):
46+
row = {
47+
'component': component,
48+
'component modified': component_info['bom_component_info'].get('componentModified', None),
49+
'file path': matched_file_info['filePath']['path'],
50+
'file name': matched_file_info['filePath']['fileName'],
51+
'archive context': matched_file_info['filePath']['archiveContext'],
52+
'usage(s)': ",".join(matched_file_info['usages']),
53+
'license(s)': ",".join([l['licenseDisplay'] for l in component_info['bom_component_info']['licenses']]),
54+
'match type(s)': ",".join(component_info['bom_component_info']['matchTypes']),
55+
'scan (code location)': matched_file_info.get('scan', {}).get('name', 'unknown')
56+
}
57+
writer.writerow(row)
58+
59+
if args.un_matched_files:
60+
for un_matched_file in origin_info.get('un_matched_files'):
61+
uri = urllib.parse.unquote(un_matched_file['uri'])
62+
parsed = urllib.parse.urlparse(uri)
63+
if parsed.scheme == 'zip':
64+
file_path = parsed.fragment
65+
file_name = os.path.basename(parsed.fragment)
66+
archive_context = parsed.path
67+
elif parsed.scheme == 'file':
68+
file_path = parsed.path
69+
file_name = os.path.basename(parsed.path)
70+
archive_context = None
71+
else:
72+
file_path = "unrecognized"
73+
file_name = "unrecognized"
74+
archive_context = "unrecognized scheme"
75+
76+
row = {
77+
'component': None,
78+
'component modified': None,
79+
'file path': file_path,
80+
'file name': file_name,
81+
'archive context': archive_context,
82+
'usage(s)': None,
83+
'license(s)': None,
84+
'match type(s)': "Un-matched/Un-identified",
85+
'scan (code location)': un_matched_file.get('scan', {}).get('name', 'unknown')
86+
}
87+
writer.writerow(row)

examples/get_bom_component_origin_info.py

Lines changed: 84 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
parser = argparse.ArgumentParser("Retreive BOM component license information for the given project and version")
1212
parser.add_argument("project_name")
1313
parser.add_argument("version")
14+
parser.add_argument("-l", "--deep_license_info", action="store_true")
15+
parser.add_argument("-c", "--copyright_info", action="store_true")
16+
parser.add_argument("-m", "--matched_files", action="store_true")
17+
parser.add_argument("-u", "--un_matched_files", action="store_true")
18+
1419

1520
args = parser.parse_args()
1621

@@ -20,7 +25,7 @@
2025
project = hub.get_project_by_name(args.project_name)
2126
version = hub.get_version_by_name(project, args.version)
2227

23-
bom_components = hub.get_version_components(version)
28+
bom_components = hub.get_version_components(version).get('items', [])
2429

2530
all_origins = dict()
2631

@@ -30,32 +35,89 @@
3035

3136
all_origin_info = {}
3237

33-
for bom_component in bom_components['items']:
34-
component_url = bom_component['component']
35-
response = hub.execute_get(component_url)
38+
scan_cache = {}
39+
40+
for bom_component in bom_components:
41+
if 'componentVersionName' in bom_component:
42+
bom_component_name = f"{bom_component['componentName']}:{bom_component['componentVersionName']}"
43+
else:
44+
bom_component_name = f"{bom_component['componentName']}"
3645

3746
# Component details include the home page url and additional home pages
38-
logging.debug("Retrieving component home page info for {}:{}".format(
39-
bom_component['componentName'], bom_component['componentVersionName']))
40-
component_details = None
41-
if response.status_code == 200:
42-
component_details = response.json()
47+
component_url = bom_component['component']
48+
component_details = hub.execute_get(component_url).json()
4349

50+
#
51+
# Grab origin info, file-level license info, and file-level copyright info
52+
#
53+
all_origin_details = list()
4454
for origin in bom_component.get('origins', []):
45-
logging.debug("Retrieving origin details for origin {}".format(origin['name']))
55+
logging.debug(f"Retrieving origin details for {bom_component_name} and origin {origin['name']}")
4656
origin_url = hub.get_link(origin, 'origin')
47-
response = hub.execute_get(origin_url)
48-
origin_details = None
49-
if response.status_code == 200:
50-
origin_details = response.json()
51-
52-
all_origin_info.update({
53-
"{}:{}".format(bom_component['componentName'], bom_component['componentVersionName']): {
54-
"component_details": component_details,
55-
"component_home_page": component_details.get("url"),
56-
"additional_home_pages": component_details.get("additionalHomepages"),
57-
"origin_details": origin_details,
58-
}
57+
origin_details = hub.execute_get(origin_url).json()
58+
59+
#
60+
# Add deep license info and copyright info, as appropriate
61+
#
62+
info_to_get = []
63+
if args.deep_license_info:
64+
info_to_get.extend([
65+
("file-licenses", "file_licenses"),
66+
("file-licenses-fuzzy", "file_licenses_fuzzy")
67+
])
68+
69+
if args.copyright_info:
70+
info_to_get.extend([
71+
("file-copyrights", "file_copyrights"),
72+
("component-origin-copyrights", "component_origin_copyrights")
73+
])
74+
for link_t in info_to_get:
75+
link_name = link_t[0]
76+
k = link_t[1]
77+
logging.debug(f"Retrieving {link_name} for {bom_component_name}")
78+
url = hub.get_link(origin_details, link_name)
79+
info = hub.execute_get(url).json().get('items', [])
80+
origin_details[k] = info
81+
82+
all_origin_details.append(origin_details)
83+
84+
all_origin_info.update({
85+
bom_component_name: {
86+
"bom_component_info": bom_component,
87+
"component_details": component_details,
88+
"component_home_page": component_details.get("url"),
89+
"additional_home_pages": component_details.get("additionalHomepages"),
90+
"all_origin_details": all_origin_details,
91+
}
92+
})
93+
94+
if args.matched_files:
95+
logging.debug(f"Retrieving matched files for {bom_component_name}")
96+
matched_files_url = hub.get_link(bom_component, "matched-files") + "?limit=99999"
97+
matched_files = hub.execute_get(matched_files_url).json().get('items', [])
98+
# Get scan info
99+
for matched_file in matched_files:
100+
scan_url = hub.get_link(matched_file, "codelocations")
101+
if scan_url in scan_cache:
102+
scan = scan_cache[scan_url]
103+
else:
104+
scan = hub.execute_get(scan_url).json()
105+
scan_cache[scan_url] = scan
106+
matched_file['scan'] = scan
107+
all_origin_info[bom_component_name].update({
108+
'matched_files': matched_files
59109
})
60110

111+
if args.un_matched_files:
112+
# TODO: Probably need to loop on this with smaller page sizes to handle very large
113+
# project-versions with many (signature) scans mapped to it
114+
#
115+
logging.debug(f"Retrieving un-matched files for project {project['name']}, version {version['versionName']}")
116+
un_matched_files_url = f"{version['_meta']['href']}/matched-files?limit=99999&filter=bomMatchType:unmatched"
117+
un_matched_files = hub.execute_get(un_matched_files_url).json().get('items', [])
118+
logging.debug(f"Adding {len(un_matched_files)} un-matched files to the output")
119+
all_origin_info.update({
120+
'un_matched_files': un_matched_files
121+
})
122+
61123
print(json.dumps(all_origin_info))

0 commit comments

Comments
 (0)