Skip to content

Commit 770cc97

Browse files
author
Murat Kumykov
committed
Merge branch 'master' into custom_signatures
2 parents bd3489b + fe02bc7 commit 770cc97

File tree

5 files changed

+309
-8
lines changed

5 files changed

+309
-8
lines changed

blackduck/HubRestApi.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,10 +1391,17 @@ def get_components(self, limit=100, parameters={}):
13911391
response = self.execute_get(url, custom_headers=custom_headers)
13921392
return response.json()
13931393

1394-
def search_components(self, search_str, limit=100, parameters={}):
1394+
def search_components(self, search_str_or_query, limit=100, parameters={}):
13951395
if limit:
13961396
parameters.update({'limit':limit})
1397-
url = self.get_apibase() + "/search/components?q=name:{}".format(urllib.parse.quote(search_str))
1397+
if search_str_or_query.startswith("q="):
1398+
# allow caller to override original behavior with their own query
1399+
query = search_str_or_query
1400+
else:
1401+
# maintain original, somewhat flawed behavior
1402+
query = "q=name:{}".format(search_str_or_query)
1403+
parm_str = self._get_parameter_string(parameters)
1404+
url = self.get_apibase() + "/search/components{}&{}".format(parm_str, query)
13981405
response = self.execute_get(url)
13991406
return response.json()
14001407

blackduck/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
VERSION = (0, 0, 46)
1+
VERSION = (0, 0, 49)
22

33
__version__ = '.'.join(map(str, VERSION))
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env python
2+
3+
'''
4+
Created on Mar 29, 2019
5+
6+
@author: gsnyder
7+
8+
Retrieve components marked as commercial and having CVE's
9+
10+
Warning: This program is single-threaded to minimize the load on the system so it can take
11+
a very long time to run.
12+
13+
'''
14+
15+
import argparse
16+
import csv
17+
from datetime import datetime
18+
import json
19+
import logging
20+
import sys
21+
22+
from blackduck.HubRestApi import HubInstance
23+
24+
parser = argparse.ArgumentParser("Find components marked as commercial, and with vulnerabilities, in the Black Duck KB and write them to an Excel file, one row per vulnerability")
25+
parser.add_argument("-f", "--file", default="has_commercial_components.csv", help="The output file name (default: has_commercial_components.csv) to use when capturing all the components marked commercial from the Black Duck KB that have vulnerabilities")
26+
parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)")
27+
parser.add_argument("-t", "--total", type=int, default=99999, help="The total number of components to retrieve")
28+
args = parser.parse_args()
29+
30+
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', stream=sys.stderr, level=logging.DEBUG)
31+
logging.getLogger("requests").setLevel(logging.WARNING)
32+
logging.getLogger("urllib3").setLevel(logging.WARNING)
33+
34+
hub = HubInstance()
35+
36+
components_url = hub.get_apibase() + "/search/components"
37+
38+
offset = 0
39+
total_hits = 0
40+
41+
# loop to page through the results from the KB until there are none left
42+
while total_hits < args.total:
43+
logging.debug("Retrieving components with has_commercial=true AND has_cves=true from offset {}, limit {}".format(
44+
offset, args.limit))
45+
find_commercial_url = components_url + "?q=has_commercial:true&filter=has_cves:true&limit={}&offset={}".format(
46+
args.limit, offset)
47+
48+
logging.debug("Executing GET on {}".format(find_commercial_url))
49+
results = hub.execute_get(find_commercial_url).json().get('items', [])
50+
51+
if results:
52+
offset += args.limit
53+
hits = results[0]['hits']
54+
total_hits += len(hits)
55+
logging.debug("Found {} hits, total hits now {}".format(len(hits), total_hits))
56+
57+
rows = []
58+
for hit in hits:
59+
number_versions = int(hit['fields']['release_count'][0])
60+
component_name = hit['fields']['name'][0]
61+
component_url = hit['component']
62+
component_description = hit['fields']['description'][0]
63+
if number_versions < 1000:
64+
component = hub.execute_get(hit['component']).json()
65+
versions_url = hub.get_link(component, "versions")
66+
versions = hub.execute_get(versions_url).json().get('items', [])
67+
logging.debug("Found {} versions for component {}".format(len(versions), component['name']))
68+
for version in versions:
69+
version_name = version['versionName']
70+
version_url =version['_meta']['href']
71+
vuln_url = hub.get_link(version, "vulnerabilities")
72+
vulns = hub.execute_get(vuln_url).json().get('items', [])
73+
logging.debug("Found {} vulnerabilities for version {}".format(
74+
len(vulns), version_name))
75+
for vuln in vulns:
76+
logging.debug("Adding {}".format(vuln['name']))
77+
row_data = {
78+
"Component Name": component_name,
79+
"Component URL": component_url,
80+
"Description": component_description,
81+
"Version": version_name,
82+
"Version URL": version_url,
83+
"Vuln": vuln['name'],
84+
"Vulnerability URL": vuln['_meta']['href'],
85+
"Vuln Description": vuln['description'],
86+
"Vuln Severity": vuln['severity'],
87+
"Vuln CWE URL": hub.get_link(vuln, "cwes"),
88+
"Vuln Published Date": vuln['publishedDate'],
89+
"Vuln Updated Date": vuln['updatedDate'],
90+
}
91+
92+
#
93+
# Expand CVSS2 and CVSS3 data into separate columns so they can be used (in Excel)
94+
# to filter, sort, etc
95+
#
96+
cvss2 = {}
97+
if 'temporalMetrics' in vuln['cvss2']:
98+
# expand temporal metrics
99+
cvss2_temporal_metrics = {"cvss2_temporal_"+k:v for (k,v) in vuln['cvss2']['temporalMetrics'].items()}
100+
cvss2.update(cvss2_temporal_metrics)
101+
# remove the redundant info
102+
del vuln['cvss2']['temporalMetrics']
103+
cvss2.update({"cvss2_"+k:str(v) for (k,v) in vuln['cvss2'].items()})
104+
row_data.update(cvss2)
105+
106+
cvss3 = {}
107+
if 'cvss3' in vuln:
108+
if 'temporalMetrics' in vuln['cvss3']:
109+
# expand temporal metrics
110+
cvss3_temporal_metrics = {"cvss3_temporal_"+k:v for (k,v) in vuln['cvss3']['temporalMetrics'].items()}
111+
cvss3.update(cvss3_temporal_metrics)
112+
# remove the redundant info
113+
del vuln['cvss3']['temporalMetrics']
114+
cvss3 = {"cvss3_"+k:str(v) for (k,v) in vuln['cvss3'].items()}
115+
row_data.update(cvss3)
116+
rows.append(row_data)
117+
118+
if len(hits) < args.limit:
119+
# at the end?
120+
logging.debug("Looks like we are at the end, breaking loop")
121+
break
122+
else:
123+
logging.debug("No results, exiting loop")
124+
break
125+
126+
logging.debug("Saving {} hits to has_commercial_components.csv".format(total_hits))
127+
all_columns = set()
128+
for row in rows:
129+
all_columns = all_columns.union(row.keys())
130+
131+
# Relying on spelling of keys/column names to put them into a 'nice' order
132+
# when they are written out to CSV using DictWriter
133+
all_columns = sorted(all_columns)
134+
135+
with open(args.file, "w", newline="") as csvfile:
136+
writer = csv.DictWriter(csvfile, fieldnames=all_columns)
137+
writer.writeheader()
138+
for row in rows:
139+
writer.writerow(row)
140+
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import json
5+
import logging
6+
import math
7+
import sys
8+
from pathlib import Path
9+
from urllib.parse import urlparse
10+
11+
from blackduck.HubRestApi import HubInstance
12+
13+
14+
parser = argparse.ArgumentParser("Given the URL to a Github repository, find it in the Black Duck KB and return any info available")
15+
parser.add_argument("github_url")
16+
parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)")
17+
args = parser.parse_args()
18+
19+
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stderr, level=logging.DEBUG)
20+
logging.getLogger("requests").setLevel(logging.WARNING)
21+
logging.getLogger("urllib3").setLevel(logging.WARNING)
22+
23+
def match_urls(input_url, url_from_kb):
24+
'''Try matching variations of the input URL against the url from the Black Duck KB,
25+
e.g. add a '/' or add '.git' or check http versus https
26+
'''
27+
if input_url.startswith("https:"):
28+
check_urls = [input_url, input_url.replace("https:", "http:")]
29+
elif input_url.startswith("http:"):
30+
check_urls = [input_url, input_url.replace("http:", "https:")]
31+
else:
32+
raise Exception("Unsupported scheme {}".format(urlparse(input_url).scheme))
33+
34+
all_checks = []
35+
for check_url in check_urls:
36+
with_slash_added = check_url + "/" == url_from_kb
37+
with_dot_git_added = check_url + ".git" == url_from_kb
38+
exact = check_url == url_from_kb
39+
all_checks.extend([with_slash_added, with_dot_git_added, exact])
40+
41+
return any(all_checks)
42+
43+
def has_github_link(links):
44+
http = any(["http://github.com" in l for l in links])
45+
https = any(["https://github.com" in l for l in links])
46+
return (http or https)
47+
48+
hub = HubInstance()
49+
50+
#
51+
# Get the repo name which, for now, is defined as the last "part" in the pathname to the repo
52+
# The repo name will be used to search the Black Duck KB, e.g.
53+
# https://github.com/django/django --> django becomes the search keyword
54+
#
55+
repo_name = Path(urlparse(args.github_url).path).parts[-1]
56+
57+
possible_matches = []
58+
exact_match = None
59+
60+
first = True
61+
num_results_found = math.inf
62+
offset = 0
63+
total_hits = 0
64+
65+
#
66+
# Loop on the search results (aka hits) accumulating any possible matches
67+
# and stop if/when you find an exact match.
68+
#
69+
# An exact match is a component from the initial search that has the exact URL provided
70+
#
71+
while offset < num_results_found:
72+
logging.debug(f"Searching for {repo_name}, offset {offset}, limit {args.limit}")
73+
parameters = {'limit': args.limit, 'offset': offset}
74+
75+
search_results = hub.search_components(
76+
search_str_or_query=f"q={repo_name}",
77+
parameters=parameters)
78+
79+
if first:
80+
first = False
81+
num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
82+
logging.debug(f"numResultsFound: {num_results_found}")
83+
84+
for hit in search_results['items'][0].get('hits', []):
85+
number_versions = int(hit['fields']['release_count'][0])
86+
component_name = hit['fields']['name'][0]
87+
component_url = hit['component']
88+
component_description = hit['fields']['description'][0]
89+
links = hit['fields'].get('links', [])
90+
91+
total_hits += 1
92+
93+
if component_name.lower() == repo_name.lower() and has_github_link(links):
94+
component_info = {
95+
'component_name': component_name,
96+
'component_url': component_url,
97+
'component_description': component_description,
98+
'links': links,
99+
'number_versions': number_versions
100+
}
101+
logging.info(f"Found one possible match in {component_name}")
102+
possible_matches.append(component_info)
103+
matched_urls = [l for l in links if match_urls(args.github_url, l)]
104+
105+
if matched_urls:
106+
logging.debug(f"Found the following matched URLS: {matched_urls}")
107+
exact_match = component_info
108+
logging.debug("Found an exact match, breaking loop")
109+
break # breaks from for-loop
110+
111+
if exact_match:
112+
break # breaks from while-loop
113+
114+
offset += args.limit
115+
116+
logging.debug(f"Found {len(possible_matches)} components that could be matches after looking at {total_hits} components found in the search results")
117+
118+
if exact_match:
119+
logging.info("Found an exact match")
120+
else:
121+
logging.warning("Did not find any exact match")
122+
123+
summary = {
124+
'possible_matches': possible_matches,
125+
'exact_match': exact_match
126+
}
127+
print(json.dumps(summary))
128+

examples/search_components.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import argparse
44
import json
55
import logging
6+
import math
67
import sys
78

89
from blackduck.HubRestApi import HubInstance
910

1011
parser = argparse.ArgumentParser("A program that uses the Black Duck search API to find components...and other things")
1112
parser.add_argument("search_str")
12-
13-
parser.add_argument("-l", "--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
13+
parser.add_argument("-l", "--limit", type=int, default=100, help="Set the per call limit on number of results")
14+
parser.add_argument("--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
1415

1516
args = parser.parse_args()
1617

@@ -21,12 +22,37 @@
2122
'INFO': logging.INFO,
2223
'WARNING': logging.WARNING,
2324
}
24-
logging.basicConfig(stream=sys.stdout, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
25+
logging.basicConfig(stream=sys.stderr, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
2526
logging.getLogger("requests").setLevel(logging.WARNING)
2627
logging.getLogger("urllib3").setLevel(logging.WARNING)
2728

2829
hub = HubInstance()
2930

31+
offset=0
32+
33+
hits = []
34+
num_results_found = math.inf
35+
first = True
36+
37+
while offset < num_results_found:
38+
parameters = {
39+
'limit': args.limit,
40+
'offset': offset,
41+
}
42+
search_results = hub.search_components(args.search_str, parameters=parameters)
43+
if first:
44+
first = False
45+
num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
46+
logging.debug(f"Search ({args.search_str}) numResultsFound: {num_results_found}")
47+
48+
hits.extend(search_results['items'][0]['hits'])
49+
num_results_this_page = search_results['items'][0]['searchResultStatistics']['numResultsInThisPage']
50+
51+
offset += args.limit
52+
logging.debug(f"Retrieved {num_results_this_page} out of {num_results_found}, setting offset to {offset}")
53+
54+
logging.debug(f"Retreived a total of {len(hits)} hits")
55+
print(json.dumps(hits))
56+
57+
3058

31-
search_results = hub.search_components(args.search_str)
32-
print(json.dumps(search_results))

0 commit comments

Comments
 (0)