Merge branch 'master' into custom_signatures

Murat Kumykov · Murat Kumykov · commit 770cc97100d3 · 2020-05-28T11:20:51.000-04:00
diff --git a/blackduck/HubRestApi.py b/blackduck/HubRestApi.py
@@ -1391,10 +1391,17 @@ def get_components(self, limit=100, parameters={}):
         response = self.execute_get(url, custom_headers=custom_headers)
         return response.json()
 
-    def search_components(self, search_str, limit=100, parameters={}):
+    def search_components(self, search_str_or_query, limit=100, parameters={}):
         if limit:
             parameters.update({'limit':limit})
-        url = self.get_apibase() + "/search/components?q=name:{}".format(urllib.parse.quote(search_str))
+        if search_str_or_query.startswith("q="):
+            # allow caller to override original behavior with their own query
+            query = search_str_or_query
+        else:
+            # maintain original, somewhat flawed behavior
+            query = "q=name:{}".format(search_str_or_query)
+        parm_str = self._get_parameter_string(parameters)
+        url = self.get_apibase() + "/search/components{}&{}".format(parm_str, query)
         response = self.execute_get(url)
         return response.json()
         
diff --git a/blackduck/__version__.py b/blackduck/__version__.py
@@ -1,3 +1,3 @@
-VERSION = (0, 0, 46)
+VERSION = (0, 0, 49)
 
 __version__ = '.'.join(map(str, VERSION))
diff --git a/examples/find_commercial_components.py b/examples/find_commercial_components.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+'''
+Created on Mar 29, 2019
+
+@author: gsnyder
+
+Retrieve components marked as commercial and having CVE's
+
+Warning: This program is single-threaded to minimize the load on the system so it can take
+a very long time to run.
+
+'''
+
+import argparse
+import csv
+from datetime import datetime
+import json
+import logging
+import sys
+
+from blackduck.HubRestApi import HubInstance
+
+parser = argparse.ArgumentParser("Find components marked as commercial, and with vulnerabilities, in the Black Duck KB and write them to an Excel file, one row per vulnerability")
+parser.add_argument("-f", "--file", default="has_commercial_components.csv", help="The output file name (default: has_commercial_components.csv) to use when capturing all the components marked commercial from the Black Duck KB that have vulnerabilities")
+parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)")
+parser.add_argument("-t", "--total", type=int, default=99999, help="The total number of components to retrieve")
+args = parser.parse_args()
+
+logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', stream=sys.stderr, level=logging.DEBUG)
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+hub = HubInstance()
+
+components_url = hub.get_apibase() + "/search/components"
+
+offset = 0
+total_hits = 0
+
+# loop to page through the results from the KB until there are none left
+while total_hits < args.total:
+    logging.debug("Retrieving components with has_commercial=true AND has_cves=true from offset {}, limit {}".format(
+        offset, args.limit))
+    find_commercial_url = components_url + "?q=has_commercial:true&filter=has_cves:true&limit={}&offset={}".format(
+        args.limit, offset)
+
+    logging.debug("Executing GET on {}".format(find_commercial_url))
+    results = hub.execute_get(find_commercial_url).json().get('items', [])
+
+    if results:
+        offset += args.limit
+        hits = results[0]['hits']
+        total_hits += len(hits)
+        logging.debug("Found {} hits, total hits now {}".format(len(hits), total_hits))
+
+        rows = []
+        for hit in hits:
+            number_versions = int(hit['fields']['release_count'][0])
+            component_name = hit['fields']['name'][0]
+            component_url = hit['component']
+            component_description = hit['fields']['description'][0]
+            if number_versions < 1000:
+                component = hub.execute_get(hit['component']).json()
+                versions_url = hub.get_link(component, "versions")
+                versions = hub.execute_get(versions_url).json().get('items', [])
+                logging.debug("Found {} versions for component {}".format(len(versions), component['name']))
+                for version in versions:
+                    version_name = version['versionName']
+                    version_url =version['_meta']['href']
+                    vuln_url = hub.get_link(version, "vulnerabilities")
+                    vulns = hub.execute_get(vuln_url).json().get('items', [])
+                    logging.debug("Found {} vulnerabilities for version {}".format(
+                        len(vulns), version_name))
+                    for vuln in vulns:      
+                        logging.debug("Adding {}".format(vuln['name']))
+                        row_data = {
+                                "Component Name": component_name,
+                                "Component URL": component_url,
+                                "Description": component_description,
+                                "Version": version_name,
+                                "Version URL": version_url,
+                                "Vuln": vuln['name'],
+                                "Vulnerability URL": vuln['_meta']['href'],
+                                "Vuln Description": vuln['description'],
+                                "Vuln Severity": vuln['severity'],
+                                "Vuln CWE URL": hub.get_link(vuln, "cwes"),
+                                "Vuln Published Date": vuln['publishedDate'],
+                                "Vuln Updated Date": vuln['updatedDate'],
+                            }
+
+                        #
+                        # Expand CVSS2 and CVSS3 data into separate columns so they can be used (in Excel)
+                        # to filter, sort, etc
+                        #
+                        cvss2 = {}
+                        if 'temporalMetrics' in vuln['cvss2']:
+                            # expand temporal metrics
+                            cvss2_temporal_metrics = {"cvss2_temporal_"+k:v for (k,v) in vuln['cvss2']['temporalMetrics'].items()}
+                            cvss2.update(cvss2_temporal_metrics)
+                            # remove the redundant info
+                            del vuln['cvss2']['temporalMetrics']
+                        cvss2.update({"cvss2_"+k:str(v) for (k,v) in vuln['cvss2'].items()})
+                        row_data.update(cvss2)
+
+                        cvss3 = {}
+                        if 'cvss3' in vuln:
+                            if 'temporalMetrics' in vuln['cvss3']:
+                                # expand temporal metrics
+                                cvss3_temporal_metrics = {"cvss3_temporal_"+k:v for (k,v) in vuln['cvss3']['temporalMetrics'].items()}
+                                cvss3.update(cvss3_temporal_metrics)
+                                # remove the redundant info
+                                del vuln['cvss3']['temporalMetrics']
+                            cvss3 = {"cvss3_"+k:str(v) for (k,v) in vuln['cvss3'].items()}
+                            row_data.update(cvss3)
+                        rows.append(row_data)
+
+        if len(hits) < args.limit:
+            # at the end?
+            logging.debug("Looks like we are at the end, breaking loop")
+            break
+    else:
+        logging.debug("No results, exiting loop")
+        break
+
+logging.debug("Saving {} hits to has_commercial_components.csv".format(total_hits))
+all_columns = set()
+for row in rows:
+    all_columns = all_columns.union(row.keys())
+
+# Relying on spelling of keys/column names to put them into a 'nice' order
+# when they are written out to CSV using DictWriter
+all_columns = sorted(all_columns)
+
+with open(args.file, "w", newline="") as csvfile:
+    writer = csv.DictWriter(csvfile, fieldnames=all_columns)
+    writer.writeheader()
+    for row in rows:
+        writer.writerow(row)
+
diff --git a/examples/find_github_repos_in_bd_kb.py b/examples/find_github_repos_in_bd_kb.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import logging
+import math
+import sys
+from pathlib import Path
+from urllib.parse import urlparse
+
+from blackduck.HubRestApi import HubInstance
+
+
+parser = argparse.ArgumentParser("Given the URL to a Github repository, find it in the Black Duck KB and return any info available")
+parser.add_argument("github_url")
+parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)")
+args = parser.parse_args()
+
+logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stderr, level=logging.DEBUG)
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+def match_urls(input_url, url_from_kb):
+    '''Try matching variations of the input URL against the url from the Black Duck KB, 
+        e.g. add a '/' or add '.git' or check http versus https
+    '''
+    if input_url.startswith("https:"):
+        check_urls = [input_url, input_url.replace("https:", "http:")]
+    elif input_url.startswith("http:"):
+        check_urls = [input_url, input_url.replace("http:", "https:")]
+    else:
+        raise Exception("Unsupported scheme {}".format(urlparse(input_url).scheme))
+
+    all_checks = []
+    for check_url in check_urls:
+        with_slash_added = check_url + "/" == url_from_kb
+        with_dot_git_added = check_url + ".git" == url_from_kb
+        exact = check_url == url_from_kb
+        all_checks.extend([with_slash_added, with_dot_git_added, exact])
+
+    return any(all_checks)
+
+def has_github_link(links):
+    http = any(["http://github.com" in l for l in links])
+    https = any(["https://github.com" in l for l in links])
+    return (http or https)
+
+hub = HubInstance()
+
+# 
+# Get the repo name which, for now, is defined as the last "part" in the pathname to the repo
+# The repo name will be used to search the Black Duck KB, e.g.
+#       https://github.com/django/django --> django becomes the search keyword
+#
+repo_name = Path(urlparse(args.github_url).path).parts[-1]
+
+possible_matches = []
+exact_match = None
+
+first = True
+num_results_found = math.inf
+offset = 0
+total_hits = 0
+
+#
+# Loop on the search results (aka hits) accumulating any possible matches
+# and stop if/when you find an exact match. 
+#
+# An exact match is a component from the initial search that has the exact URL provided
+#
+while offset < num_results_found:
+    logging.debug(f"Searching for {repo_name}, offset {offset}, limit {args.limit}")
+    parameters = {'limit': args.limit, 'offset': offset}
+
+    search_results = hub.search_components(
+        search_str_or_query=f"q={repo_name}", 
+        parameters=parameters)
+
+    if first:
+        first = False
+        num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
+        logging.debug(f"numResultsFound: {num_results_found}")
+    
+    for hit in search_results['items'][0].get('hits', []):
+        number_versions = int(hit['fields']['release_count'][0])
+        component_name = hit['fields']['name'][0]
+        component_url = hit['component']
+        component_description = hit['fields']['description'][0]
+        links = hit['fields'].get('links', [])
+
+        total_hits += 1
+
+        if component_name.lower() == repo_name.lower() and has_github_link(links):
+            component_info = {
+                    'component_name': component_name,
+                    'component_url': component_url,
+                    'component_description': component_description,
+                    'links': links,
+                    'number_versions': number_versions
+                }
+            logging.info(f"Found one possible match in {component_name}")
+            possible_matches.append(component_info)
+            matched_urls = [l for l in links if match_urls(args.github_url, l)]
+
+            if matched_urls:
+                logging.debug(f"Found the following matched URLS: {matched_urls}")
+                exact_match = component_info
+                logging.debug("Found an exact match, breaking loop")
+                break # breaks from for-loop
+
+    if exact_match:
+        break # breaks from while-loop
+
+    offset += args.limit
+
+logging.debug(f"Found {len(possible_matches)} components that could be matches after looking at {total_hits} components found in the search results")
+
+if exact_match:
+    logging.info("Found an exact match")
+else:
+    logging.warning("Did not find any exact match")
+
+summary = {
+    'possible_matches': possible_matches,
+    'exact_match': exact_match
+}
+print(json.dumps(summary))
+
diff --git a/examples/search_components.py b/examples/search_components.py
@@ -3,14 +3,15 @@
 import argparse
 import json
 import logging
+import math
 import sys
 
 from blackduck.HubRestApi import HubInstance
 
 parser = argparse.ArgumentParser("A program that uses the Black Duck search API to find components...and other things")
 parser.add_argument("search_str")
-
-parser.add_argument("-l", "--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
+parser.add_argument("-l", "--limit", type=int, default=100, help="Set the per call limit on number of results")
+parser.add_argument("--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
 
 args = parser.parse_args()
 
@@ -21,12 +22,37 @@
     'INFO': logging.INFO,
     'WARNING': logging.WARNING,
 }
-logging.basicConfig(stream=sys.stdout, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
+logging.basicConfig(stream=sys.stderr, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
 logging.getLogger("requests").setLevel(logging.WARNING)
 logging.getLogger("urllib3").setLevel(logging.WARNING)
 
 hub = HubInstance()
 
+offset=0
+
+hits = []
+num_results_found = math.inf
+first = True
+
+while offset < num_results_found:
+    parameters = {
+        'limit': args.limit,
+        'offset': offset,
+    }
+    search_results = hub.search_components(args.search_str, parameters=parameters)
+    if first:
+        first = False
+        num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
+        logging.debug(f"Search ({args.search_str}) numResultsFound: {num_results_found}")
+
+    hits.extend(search_results['items'][0]['hits'])
+    num_results_this_page = search_results['items'][0]['searchResultStatistics']['numResultsInThisPage']
+
+    offset += args.limit
+    logging.debug(f"Retrieved {num_results_this_page} out of {num_results_found}, setting offset to {offset}")
+
+logging.debug(f"Retreived a total of {len(hits)} hits")
+print(json.dumps(hits))
+
+
 
-search_results = hub.search_components(args.search_str)
-print(json.dumps(search_results))

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`		`-VERSION = (0, 0, 46)`
	`1`	`+VERSION = (0, 0, 49)`
`2`	`2`
`3`	`3`	`__version__ = '.'.join(map(str, VERSION))`